You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common.h 21 kB

13 years ago
10 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
13 years ago
13 years ago
7 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #ifndef COMMON_H
  39. #define COMMON_H
  40. #ifdef __cplusplus
  41. extern "C" {
  42. /* Assume C declarations for C++ */
  43. #endif /* __cplusplus */
  44. #ifndef _GNU_SOURCE
  45. #define _GNU_SOURCE
  46. #endif
  47. #ifndef __USE_XOPEN
  48. #define __USE_XOPEN
  49. #endif
  50. #ifndef __USE_SVID
  51. #define __USE_SVID
  52. #endif
  53. #ifdef BUILD_KERNEL
  54. #include "config_kernel.h"
  55. #else
  56. #include "config.h"
  57. #endif
  58. #undef ENABLE_SSE_EXCEPTION
  59. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  60. #define SMP
  61. #endif
  62. #if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
  63. #define WINDOWS_ABI
  64. #define OS_WINDOWS
  65. #ifdef DOUBLE
  66. #define DOUBLE_DEFINED DOUBLE
  67. #undef DOUBLE
  68. #endif
  69. #endif
  70. #if !defined(NOINCLUDE) && !defined(ASSEMBLER)
  71. #include <stdio.h>
  72. #include <stdlib.h>
  73. #include <string.h>
  74. #if !defined(_MSC_VER)
  75. #include <unistd.h>
  76. #elif _MSC_VER < 1900
  77. #define snprintf _snprintf
  78. #endif
  79. #include <time.h>
  80. #ifdef OS_LINUX
  81. #include <malloc.h>
  82. #include <sched.h>
  83. #endif
  84. #if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_ANDROID)
  85. #include <sched.h>
  86. #endif
  87. #ifdef OS_ANDROID
  88. #define NO_SYSV_IPC
  89. //Android NDK only supports complex.h since Android 5.0
  90. #if __ANDROID_API__ < 21
  91. #define FORCE_OPENBLAS_COMPLEX_STRUCT
  92. #endif
  93. #endif
  94. #ifdef OS_HAIKU
  95. #define NO_SYSV_IPC
  96. #endif
  97. #ifdef OS_WINDOWS
  98. #ifdef ATOM
  99. #define GOTO_ATOM ATOM
  100. #undef ATOM
  101. #endif
  102. #include <windows.h>
  103. #include <math.h>
  104. #ifdef GOTO_ATOM
  105. #define ATOM GOTO_ATOM
  106. #undef GOTO_ATOM
  107. #endif
  108. #elif !defined(OS_EMBEDDED)
  109. #include <sys/mman.h>
  110. #ifndef NO_SYSV_IPC
  111. #include <sys/shm.h>
  112. #endif
  113. #include <sys/time.h>
  114. #include <time.h>
  115. #include <unistd.h>
  116. #include <math.h>
  117. #if defined(SMP) || defined(USE_LOCKING)
  118. #include <pthread.h>
  119. #endif
  120. #else
  121. #include <time.h>
  122. #include <math.h>
  123. #endif
  124. #if defined(OS_SUNOS)
  125. #include <thread.h>
  126. #endif
  127. #ifdef __DECC
  128. #include <c_asm.h>
  129. #include <machine/builtins.h>
  130. #endif
  131. #if defined(ARCH_IA64) && defined(ENABLE_SSE_EXCEPTION)
  132. #include <fenv.h>
  133. #endif
  134. #endif
  135. #if defined(OS_WINDOWS) && defined(DOUBLE_DEFINED)
  136. #define DOUBLE DOUBLE_DEFINED
  137. #undef DOUBLE_DEFINED
  138. #endif
  139. #undef DEBUG_INFO
  140. #define SMP_DEBUG
  141. #undef MALLOC_DEBUG
  142. #undef SMP_ALLOC_DEBUG
  143. #ifndef ZERO
  144. #ifdef XDOUBLE
  145. #define ZERO 0.e0L
  146. #elif defined DOUBLE
  147. #define ZERO 0.e0
  148. #else
  149. #define ZERO 0.e0f
  150. #endif
  151. #endif
  152. #ifndef ONE
  153. #ifdef XDOUBLE
  154. #define ONE 1.e0L
  155. #elif defined DOUBLE
  156. #define ONE 1.e0
  157. #else
  158. #define ONE 1.e0f
  159. #endif
  160. #endif
  161. #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
  162. #define ALLOCA_ALIGN 63UL
  163. #define NUM_BUFFERS MAX(50,(MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER))
  164. #ifdef NEEDBUNDERSCORE
  165. #define BLASFUNC(FUNC) FUNC##_
  166. #else
  167. #define BLASFUNC(FUNC) FUNC
  168. #endif
  169. #undef USE_PTHREAD_LOCK
  170. #undef USE_PTHREAD_SPINLOCK
  171. #if defined(USE_PTHREAD_LOCK) && defined(USE_PTHREAD_SPINLOCK)
  172. #error "You can't specify both LOCK operation!"
  173. #endif
  174. #if defined(SMP) || defined(USE_LOCKING)
  175. #define USE_PTHREAD_LOCK
  176. #undef USE_PTHREAD_SPINLOCK
  177. #endif
  178. #ifdef OS_WINDOWS
  179. #undef USE_PTHREAD_LOCK
  180. #undef USE_PTHREAD_SPINLOCK
  181. #endif
  182. #if defined(USE_PTHREAD_LOCK)
  183. #define LOCK_COMMAND(x) pthread_mutex_lock(x)
  184. #define UNLOCK_COMMAND(x) pthread_mutex_unlock(x)
  185. #elif defined(USE_PTHREAD_SPINLOCK)
  186. #ifndef ASSEMBLER
  187. typedef volatile int pthread_spinlock_t;
  188. int pthread_spin_lock (pthread_spinlock_t *__lock);
  189. int pthread_spin_unlock (pthread_spinlock_t *__lock);
  190. #endif
  191. #define LOCK_COMMAND(x) pthread_spin_lock(x)
  192. #define UNLOCK_COMMAND(x) pthread_spin_unlock(x)
  193. #else
  194. #define LOCK_COMMAND(x) blas_lock(x)
  195. #define UNLOCK_COMMAND(x) blas_unlock(x)
  196. #endif
  197. #define GOTO_SHMID 0x510510
  198. #if 0
  199. #ifndef __CUDACC__
  200. #define __global__
  201. #define __device__
  202. #define __host__
  203. #define __shared__
  204. #endif
  205. #endif
  206. #ifndef ASSEMBLER
  207. #ifdef QUAD_PRECISION
  208. typedef struct {
  209. unsigned long x[2];
  210. } xdouble;
  211. #elif defined EXPRECISION
  212. #define xdouble long double
  213. #else
  214. #define xdouble double
  215. #endif
  216. #if defined(OS_WINDOWS) && defined(__64BIT__)
  217. typedef long long BLASLONG;
  218. typedef unsigned long long BLASULONG;
  219. #else
  220. typedef long BLASLONG;
  221. typedef unsigned long BLASULONG;
  222. #endif
  223. #ifndef bfloat16
  224. #include <stdint.h>
  225. typedef uint16_t bfloat16;
  226. #define BFLOAT16CONVERSION 1
  227. #endif
  228. #ifdef USE64BITINT
  229. typedef BLASLONG blasint;
  230. #if defined(OS_WINDOWS) && defined(__64BIT__)
  231. #define blasabs(x) llabs(x)
  232. #else
  233. #define blasabs(x) labs(x)
  234. #endif
  235. #else
  236. typedef int blasint;
  237. #define blasabs(x) abs(x)
  238. #endif
  239. #else
  240. #ifdef USE64BITINT
  241. #define INTSHIFT 3
  242. #define INTSIZE 8
  243. #else
  244. #define INTSHIFT 2
  245. #define INTSIZE 4
  246. #endif
  247. #endif
  248. #ifdef XDOUBLE
  249. #define FLOAT xdouble
  250. #ifdef QUAD_PRECISION
  251. #define XFLOAT xidouble
  252. #endif
  253. #ifdef QUAD_PRECISION
  254. #define SIZE 32
  255. #define BASE_SHIFT 5
  256. #define ZBASE_SHIFT 6
  257. #else
  258. #define SIZE 16
  259. #define BASE_SHIFT 4
  260. #define ZBASE_SHIFT 5
  261. #endif
  262. #elif defined(DOUBLE)
  263. #define FLOAT double
  264. #define SIZE 8
  265. #define BASE_SHIFT 3
  266. #define ZBASE_SHIFT 4
  267. #elif defined(BFLOAT16)
  268. #define IFLOAT bfloat16
  269. #define XFLOAT IFLOAT
  270. #define FLOAT float
  271. #define SIZE 2
  272. #define BASE_SHIFT 1
  273. #define ZBASE_SHIFT 2
  274. #else
  275. #define FLOAT float
  276. #define SIZE 4
  277. #define BASE_SHIFT 2
  278. #define ZBASE_SHIFT 3
  279. #endif
  280. #ifndef XFLOAT
  281. #define XFLOAT FLOAT
  282. #endif
  283. #ifndef IFLOAT
  284. #define IFLOAT FLOAT
  285. #endif
  286. #ifndef COMPLEX
  287. #define COMPSIZE 1
  288. #else
  289. #define COMPSIZE 2
  290. #endif
  291. #define Address_H(x) (((x)+(1<<15))>>16)
  292. #define Address_L(x) ((x)-((Address_H(x))<<16))
  293. #ifndef MAX_CPU_NUMBER
  294. #define MAX_CPU_NUMBER 2
  295. #endif
  296. #if defined(OS_SUNOS)
  297. #define YIELDING thr_yield()
  298. #endif
  299. #if defined(OS_WINDOWS)
  300. #if defined(_MSC_VER) && !defined(__clang__)
  301. #define YIELDING YieldProcessor()
  302. #else
  303. #define YIELDING SwitchToThread()
  304. #endif
  305. #endif
  306. #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
  307. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
  308. #endif
  309. #ifdef BULLDOZER
  310. #ifndef YIELDING
  311. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  312. #endif
  313. #endif
  314. #if defined(POWER8) || defined(POWER9) || defined(POWER10)
  315. #ifndef YIELDING
  316. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  317. #endif
  318. #endif
  319. /*
  320. #ifdef PILEDRIVER
  321. #ifndef YIELDING
  322. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  323. #endif
  324. #endif
  325. */
  326. /*
  327. #ifdef STEAMROLLER
  328. #ifndef YIELDING
  329. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  330. #endif
  331. #endif
  332. */
  333. #ifndef YIELDING
  334. #define YIELDING sched_yield()
  335. #endif
  336. /***
  337. To alloc job_t on heap or statck.
  338. please https://github.com/xianyi/OpenBLAS/issues/246
  339. ***/
  340. #if defined(OS_WINDOWS)
  341. #define GETRF_MEM_ALLOC_THRESHOLD 32
  342. #define BLAS3_MEM_ALLOC_THRESHOLD 32
  343. #endif
  344. #ifndef GETRF_MEM_ALLOC_THRESHOLD
  345. #define GETRF_MEM_ALLOC_THRESHOLD 80
  346. #endif
  347. #ifndef BLAS3_MEM_ALLOC_THRESHOLD
  348. #define BLAS3_MEM_ALLOC_THRESHOLD 32
  349. #endif
  350. #ifdef QUAD_PRECISION
  351. #include "common_quad.h"
  352. #endif
  353. #ifdef ARCH_ALPHA
  354. #include "common_alpha.h"
  355. #endif
  356. #if (defined(ARCH_X86) || defined(ARCH_X86_64)) && defined(__CET__) && defined(__has_include)
  357. #if __has_include(<cet.h>)
  358. #include <cet.h>
  359. #endif
  360. #endif
  361. #ifndef _CET_ENDBR
  362. #define _CET_ENDBR
  363. #endif
  364. #ifdef ARCH_X86
  365. #include "common_x86.h"
  366. #endif
  367. #ifdef ARCH_X86_64
  368. #include "common_x86_64.h"
  369. #endif
  370. #ifdef ARCH_IA64
  371. #include "common_ia64.h"
  372. #endif
  373. #ifdef ARCH_POWER
  374. #include "common_power.h"
  375. #endif
  376. #ifdef sparc
  377. #include "common_sparc.h"
  378. #endif
  379. #ifdef ARCH_MIPS
  380. #include "common_mips.h"
  381. #endif
  382. #ifdef ARCH_RISCV64
  383. #include "common_riscv64.h"
  384. #endif
  385. #ifdef ARCH_MIPS64
  386. #include "common_mips64.h"
  387. #endif
  388. #ifdef ARCH_ARM
  389. #include "common_arm.h"
  390. #endif
  391. #ifdef ARCH_ARM64
  392. #include "common_arm64.h"
  393. #endif
  394. #ifdef ARCH_ZARCH
  395. #include "common_zarch.h"
  396. #endif
  397. #ifdef ARCH_LOONGARCH64
  398. #include "common_loongarch64.h"
  399. #endif
  400. #ifdef ARCH_E2K
  401. #include "common_e2k.h"
  402. #endif
  403. #ifndef ASSEMBLER
  404. #ifdef OS_WINDOWSSTORE
  405. typedef char env_var_t[MAX_PATH];
  406. #define readenv(p, n) 0
  407. #else
  408. #if defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT)
  409. typedef char env_var_t[MAX_PATH];
  410. #define readenv(p, n) GetEnvironmentVariable((LPCTSTR)(n), (LPTSTR)(p), sizeof(p))
  411. #else
  412. typedef char* env_var_t;
  413. #define readenv(p, n) ((p)=getenv(n))
  414. #endif
  415. #endif
  416. #if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS)
  417. #ifdef _POSIX_MONOTONIC_CLOCK
  418. #if defined(__GLIBC_PREREQ) // cut the if condition if two lines, otherwise will fail at __GLIBC_PREREQ(2, 17)
  419. #if __GLIBC_PREREQ(2, 17) // don't require -lrt
  420. #define USE_MONOTONIC
  421. #endif
  422. #elif defined(OS_ANDROID)
  423. #define USE_MONOTONIC
  424. #endif
  425. #endif
  426. /* use similar scale as x86 rdtsc for timeouts to work correctly */
  427. static inline unsigned long long rpcc(void){
  428. #ifdef USE_MONOTONIC
  429. struct timespec ts;
  430. clock_gettime(CLOCK_MONOTONIC, &ts);
  431. return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec;
  432. #elif !defined(OS_EMBEDDED)
  433. struct timeval tv;
  434. gettimeofday(&tv,NULL);
  435. return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000;
  436. #else
  437. return 0;
  438. #endif
  439. }
  440. #define RPCC_DEFINED
  441. #define RPCC64BIT
  442. #endif // !RPCC_DEFINED
  443. #if !defined(BLAS_LOCK_DEFINED) && defined(__GNUC__)
  444. static void __inline blas_lock(volatile BLASULONG *address){
  445. do {
  446. while (*address) {YIELDING;};
  447. } while (!__sync_bool_compare_and_swap(address, 0, 1));
  448. }
  449. #define BLAS_LOCK_DEFINED
  450. #endif
  451. #ifndef RPCC_DEFINED
  452. #error "rpcc() implementation is missing for your platform"
  453. #endif
  454. #ifndef BLAS_LOCK_DEFINED
  455. #error "blas_lock() implementation is missing for your platform"
  456. #endif
  457. #endif // !ASSEMBLER
  458. #ifdef OS_LINUX
  459. #include "common_linux.h"
  460. #endif
  461. #ifdef OS_EMBEDDED
  462. #define DTB_DEFAULT_ENTRIES 64
  463. #endif
  464. #define MMAP_ACCESS (PROT_READ | PROT_WRITE)
  465. #ifdef __NetBSD__
  466. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANON)
  467. #else
  468. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
  469. #endif
  470. #ifndef ASSEMBLER
  471. /* C99 supports complex floating numbers natively, which GCC also offers as an
  472. extension since version 3.0. If neither are available, use a compatible
  473. structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
  474. #if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
  475. (__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) && !defined(_MSC_VER)
  476. #define OPENBLAS_COMPLEX_C99
  477. #ifndef __cplusplus
  478. #include <complex.h>
  479. #endif
  480. typedef float _Complex openblas_complex_float;
  481. typedef double _Complex openblas_complex_double;
  482. typedef xdouble _Complex openblas_complex_xdouble;
  483. #define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
  484. #define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
  485. #define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
  486. #else
  487. #define OPENBLAS_COMPLEX_STRUCT
  488. typedef struct { float real, imag; } openblas_complex_float;
  489. typedef struct { double real, imag; } openblas_complex_double;
  490. typedef struct { xdouble real, imag; } openblas_complex_xdouble;
  491. #define openblas_make_complex_float(real, imag) {(real), (imag)}
  492. #define openblas_make_complex_double(real, imag) {(real), (imag)}
  493. #define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
  494. #endif
  495. #endif
  496. #include "param.h"
  497. #include "common_param.h"
  498. #ifndef STDERR
  499. #define STDERR stderr
  500. #endif
  501. #ifndef MASK
  502. #define MASK(a, b) (((a) + ((b) - 1)) & ~((b) - 1))
  503. #endif
  504. #if defined(XDOUBLE) || defined(DOUBLE)
  505. #define FLOATRET FLOAT
  506. #else
  507. #ifdef NEED_F2CCONV
  508. #define FLOATRET double
  509. #else
  510. #define FLOATRET float
  511. #endif
  512. #endif
  513. #ifndef ASSEMBLER
  514. #ifndef NOINCLUDE
  515. /* Inclusion of a standard header file is needed for definition of __STDC_*
  516. predefined macros with some compilers (e.g. GCC 4.7 on Linux). This occurs
  517. as a side effect of including either <features.h> or <stdc-predef.h>. */
  518. #include <stdio.h>
  519. #endif // NOINCLUDE
  520. #ifdef XDOUBLE
  521. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
  522. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
  523. #elif defined(DOUBLE)
  524. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
  525. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
  526. #else
  527. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
  528. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
  529. #endif
  530. #if defined(C_PGI) || defined(C_SUN)
  531. #if defined(__STDC_IEC_559_COMPLEX__)
  532. #define CREAL(X) creal(X)
  533. #define CIMAG(X) cimag(X)
  534. #else
  535. #define CREAL(X) (*((FLOAT *)&X + 0))
  536. #define CIMAG(X) (*((FLOAT *)&X + 1))
  537. #endif
  538. #else
  539. #ifdef OPENBLAS_COMPLEX_STRUCT
  540. #define CREAL(Z) ((Z).real)
  541. #define CIMAG(Z) ((Z).imag)
  542. #else
  543. #define CREAL __real__
  544. #define CIMAG __imag__
  545. #endif
  546. #endif
  547. #endif // ASSEMBLER
  548. #ifndef IFLUSH
  549. #define IFLUSH
  550. #endif
  551. #ifndef IFLUSH_HALF
  552. #define IFLUSH_HALF
  553. #endif
  554. #if defined(C_GCC) && (( __GNUC__ <= 3) || ((__GNUC__ == 4) && (__GNUC_MINOR__ < 2)))
  555. #ifdef USE_OPENMP
  556. #undef USE_OPENMP
  557. #endif
  558. #endif
  559. #if defined(C_MSVC)
  560. #define inline __inline
  561. #endif
  562. #ifndef ASSEMBLER
  563. #ifndef MIN
  564. #define MIN(a,b) (a>b? b:a)
  565. #endif
  566. #ifndef MAX
  567. #define MAX(a,b) (a<b? b:a)
  568. #endif
  569. #define TOUPPER(a) {if ((a) > 0x60) (a) -= 0x20;}
  570. #if defined(__FreeBSD__) || defined(__APPLE__)
  571. #define MAP_ANONYMOUS MAP_ANON
  572. #endif
  573. /* Common Memory Management Routine */
  574. void blas_set_parameter(void);
  575. int blas_get_cpu_number(void);
  576. void *blas_memory_alloc (int);
  577. void blas_memory_free (void *);
  578. void *blas_memory_alloc_nolock (int); //use malloc without blas_lock
  579. void blas_memory_free_nolock (void *);
  580. int get_num_procs (void);
  581. #if defined(OS_LINUX) && defined(SMP) && !defined(NO_AFFINITY)
  582. int get_num_nodes (void);
  583. int get_num_proc (int);
  584. int get_node_equal (void);
  585. #endif
  586. void goto_set_num_threads(int);
  587. void gotoblas_affinity_init(void);
  588. void gotoblas_affinity_quit(void);
  589. void gotoblas_dynamic_init(void);
  590. void gotoblas_dynamic_quit(void);
  591. void gotoblas_profile_init(void);
  592. void gotoblas_profile_quit(void);
  593. int support_avx512(void);
  594. #ifdef USE_OPENMP
  595. #ifndef C_MSVC
  596. int omp_in_parallel(void);
  597. int omp_get_num_procs(void);
  598. #else
  599. __declspec(dllimport) int __cdecl omp_in_parallel(void);
  600. __declspec(dllimport) int __cdecl omp_get_num_procs(void);
  601. #endif
  602. #ifdef HAVE_C11
  603. #if defined(C_GCC) && ( __GNUC__ < 7)
  604. // workaround for GCC bug 65467
  605. #ifndef _Atomic
  606. #define _Atomic volatile
  607. #endif
  608. #endif
  609. #include <stdatomic.h>
  610. #else
  611. #ifndef _Atomic
  612. #define _Atomic volatile
  613. #endif
  614. #endif
  615. #else
  616. #ifdef __ELF__
  617. int omp_in_parallel (void) __attribute__ ((weak));
  618. int omp_get_num_procs(void) __attribute__ ((weak));
  619. #endif
  620. #endif
  621. static __inline void blas_unlock(volatile BLASULONG *address){
  622. MB;
  623. *address = 0;
  624. }
  625. #ifdef OS_WINDOWSSTORE
  626. static __inline int readenv_atoi(char *env) {
  627. return 0;
  628. }
  629. #else
  630. #ifdef OS_WINDOWS
  631. static __inline int readenv_atoi(char *env) {
  632. env_var_t p;
  633. return readenv(p,env) ? 0 : atoi(p);
  634. }
  635. #else
  636. static __inline int readenv_atoi(char *env) {
  637. char *p;
  638. if (( p = getenv(env) ))
  639. return (atoi(p));
  640. else
  641. return(0);
  642. }
  643. #endif
  644. #endif
  645. #if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
  646. static __inline void compinv(FLOAT *b, FLOAT ar, FLOAT ai){
  647. #ifndef UNIT
  648. FLOAT ratio, den;
  649. if (
  650. #ifdef XDOUBLE
  651. (fabsl(ar)) >= (fabsl(ai))
  652. #elif defined DOUBLE
  653. (fabs (ar)) >= (fabs (ai))
  654. #else
  655. (fabsf(ar)) >= (fabsf(ai))
  656. #endif
  657. ) {
  658. ratio = ai / ar;
  659. den = (FLOAT)(ONE / (ar * (ONE + ratio * ratio)));
  660. ar = den;
  661. ai = -ratio * den;
  662. } else {
  663. ratio = ar / ai;
  664. den = (FLOAT)(ONE /(ai * (ONE + ratio * ratio)));
  665. ar = ratio * den;
  666. ai = -den;
  667. }
  668. b[0] = ar;
  669. b[1] = ai;
  670. #else
  671. b[0] = ONE;
  672. b[1] = ZERO;
  673. #endif
  674. }
  675. #endif
  676. #ifdef MALLOC_DEBUG
  677. void *blas_debug_alloc(int);
  678. void *blas_debug_free(void *);
  679. #undef malloc
  680. #undef free
  681. #define malloc(a) blas_debug_alloc(a)
  682. #define free(a) blas_debug_free (a)
  683. #endif
  684. #ifndef COPYOVERHEAD
  685. #define GEMMRETTYPE int
  686. #else
  687. typedef struct {
  688. double outercopy;
  689. double innercopy;
  690. double kernel;
  691. double mflops;
  692. } copyoverhead_t;
  693. #define GEMMRETTYPE copyoverhead_t
  694. #endif
  695. #endif
  696. #ifndef BUILD_KERNEL
  697. #define KNAME(A, B) A
  698. #else
  699. #define KNAME(A, B) A##B
  700. #endif
  701. #include "common_interface.h"
  702. #ifdef SANITY_CHECK
  703. #include "common_reference.h"
  704. #endif
  705. #include "common_macro.h"
  706. #include "common_level1.h"
  707. #include "common_level2.h"
  708. #include "common_level3.h"
  709. #include "common_lapack.h"
  710. #ifdef CBLAS
  711. # define OPENBLAS_CONST /* see comment in cblas.h */
  712. # include "cblas.h"
  713. #endif
  714. #ifndef ASSEMBLER
  715. #include "common_stackalloc.h"
  716. #if 0
  717. #include "symcopy.h"
  718. #endif
  719. #if defined(SMP_SERVER) && defined(SMP_ONDEMAND)
  720. #error Both SMP_SERVER and SMP_ONDEMAND are specified.
  721. #endif
  722. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  723. #include "common_thread.h"
  724. #endif
  725. #endif
  726. #define INFO_NUM 99
  727. #ifndef DEFAULT_CPU_NUMBER
  728. #define DEFAULT_CPU_NUMBER 4
  729. #endif
  730. #ifndef IDEBUG_START
  731. #define IDEBUG_START
  732. #endif
  733. #ifndef IDEBUG_END
  734. #define IDEBUG_END
  735. #endif
  736. #if !defined(ASSEMBLER) && defined(FUNCTION_PROFILE)
  737. typedef struct {
  738. int func;
  739. unsigned long long calls, fops, area, cycles, tcycles;
  740. } func_profile_t;
  741. extern func_profile_t function_profile_table[];
  742. extern int gotoblas_profile;
  743. #ifdef XDOUBLE
  744. #define NUMOPT QNUMOPT
  745. #elif defined DOUBLE
  746. #define NUMOPT DNUMOPT
  747. #else
  748. #define NUMOPT SNUMOPT
  749. #endif
  750. #define FUNCTION_PROFILE_START() { unsigned long long profile_start = rpcc(), profile_end;
  751. #ifdef SMP
  752. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  753. if (gotoblas_profile) { \
  754. profile_end = rpcc(); \
  755. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  756. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  757. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  758. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  759. function_profile_table[PROFILE_FUNC_NAME].tcycles += blas_cpu_number * (profile_end - profile_start); \
  760. } \
  761. }
  762. #else
  763. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  764. if (gotoblas_profile) { \
  765. profile_end = rpcc(); \
  766. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  767. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  768. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  769. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  770. function_profile_table[PROFILE_FUNC_NAME].tcycles += (profile_end - profile_start); \
  771. } \
  772. }
  773. #endif
  774. #else
  775. #define FUNCTION_PROFILE_START()
  776. #define FUNCTION_PROFILE_END(COMP, AREA, OPS)
  777. #endif
  778. #if 1
  779. #define PRINT_DEBUG_CNAME
  780. #define PRINT_DEBUG_NAME
  781. #else
  782. #define PRINT_DEBUG_CNAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
  783. #define PRINT_DEBUG_NAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
  784. #endif
  785. #ifdef __cplusplus
  786. }
  787. #endif /* __cplusplus */
  788. #endif