You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common.h 18 kB

13 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
13 years ago
13 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #ifndef COMMON_H
  39. #define COMMON_H
  40. #ifdef __cplusplus
  41. extern "C" {
  42. /* Assume C declarations for C++ */
  43. #endif /* __cplusplus */
  44. #ifndef _GNU_SOURCE
  45. #define _GNU_SOURCE
  46. #endif
  47. #ifndef __USE_XOPEN
  48. #define __USE_XOPEN
  49. #endif
  50. #ifndef __USE_SVID
  51. #define __USE_SVID
  52. #endif
  53. #ifdef BUILD_KERNEL
  54. #include "config_kernel.h"
  55. #else
  56. #include "config.h"
  57. #endif
  58. #undef ENABLE_SSE_EXCEPTION
  59. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  60. #define SMP
  61. #endif
  62. #if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
  63. #define WINDOWS_ABI
  64. #define OS_WINDOWS
  65. #ifdef DOUBLE
  66. #define DOUBLE_DEFINED DOUBLE
  67. #undef DOUBLE
  68. #endif
  69. #endif
  70. #if !defined(NOINCLUDE) && !defined(ASSEMBLER)
  71. #include <stdio.h>
  72. #include <stdlib.h>
  73. #include <string.h>
  74. #if !defined(_MSC_VER)
  75. #include <unistd.h>
  76. #endif
  77. #ifdef OS_LINUX
  78. #include <malloc.h>
  79. #include <sched.h>
  80. #endif
  81. #if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD)
  82. #include <sched.h>
  83. #endif
  84. #ifdef OS_ANDROID
  85. #define NO_SYSV_IPC
  86. #endif
  87. #ifdef OS_WINDOWS
  88. #ifdef ATOM
  89. #define GOTO_ATOM ATOM
  90. #undef ATOM
  91. #endif
  92. #include <windows.h>
  93. #include <math.h>
  94. #ifdef GOTO_ATOM
  95. #define ATOM GOTO_ATOM
  96. #undef GOTO_ATOM
  97. #endif
  98. #else
  99. #include <sys/mman.h>
  100. #ifndef NO_SYSV_IPC
  101. #include <sys/shm.h>
  102. #endif
  103. #include <sys/time.h>
  104. #include <unistd.h>
  105. #include <math.h>
  106. #ifdef SMP
  107. #include <pthread.h>
  108. #endif
  109. #endif
  110. #if defined(OS_SUNOS)
  111. #include <thread.h>
  112. #endif
  113. #ifdef __DECC
  114. #include <c_asm.h>
  115. #include <machine/builtins.h>
  116. #endif
  117. #if defined(ARCH_IA64) && defined(ENABLE_SSE_EXCEPTION)
  118. #include <fenv.h>
  119. #endif
  120. #endif
  121. #if defined(OS_WINDOWS) && defined(DOUBLE_DEFINED)
  122. #define DOUBLE DOUBLE_DEFINED
  123. #undef DOUBLE_DEFINED
  124. #endif
  125. #undef DEBUG_INFO
  126. #define SMP_DEBUG
  127. #undef MALLOC_DEBUG
  128. #undef SMP_ALLOC_DEBUG
  129. #ifndef ZERO
  130. #ifdef XDOUBLE
  131. #define ZERO 0.e0L
  132. #elif defined DOUBLE
  133. #define ZERO 0.e0
  134. #else
  135. #define ZERO 0.e0f
  136. #endif
  137. #endif
  138. #ifndef ONE
  139. #ifdef XDOUBLE
  140. #define ONE 1.e0L
  141. #elif defined DOUBLE
  142. #define ONE 1.e0
  143. #else
  144. #define ONE 1.e0f
  145. #endif
  146. #endif
  147. #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
  148. #define ALLOCA_ALIGN 63UL
  149. #define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
  150. #ifdef NEEDBUNDERSCORE
  151. #define BLASFUNC(FUNC) FUNC##_
  152. #else
  153. #define BLASFUNC(FUNC) FUNC
  154. #endif
  155. #undef USE_PTHREAD_LOCK
  156. #undef USE_PTHREAD_SPINLOCK
  157. #if defined(USE_PTHREAD_LOCK) && defined(USE_PTHREAD_SPINLOCK)
  158. #error "You can't specify both LOCK operation!"
  159. #endif
  160. #ifdef SMP
  161. #define USE_PTHREAD_LOCK
  162. #undef USE_PTHREAD_SPINLOCK
  163. #endif
  164. #ifdef OS_WINDOWS
  165. #undef USE_PTHREAD_LOCK
  166. #undef USE_PTHREAD_SPINLOCK
  167. #endif
  168. #if defined(USE_PTHREAD_LOCK)
  169. #define LOCK_COMMAND(x) pthread_mutex_lock(x)
  170. #define UNLOCK_COMMAND(x) pthread_mutex_unlock(x)
  171. #elif defined(USE_PTHREAD_SPINLOCK)
  172. #ifndef ASSEMBLER
  173. typedef volatile int pthread_spinlock_t;
  174. int pthread_spin_lock (pthread_spinlock_t *__lock);
  175. int pthread_spin_unlock (pthread_spinlock_t *__lock);
  176. #endif
  177. #define LOCK_COMMAND(x) pthread_spin_lock(x)
  178. #define UNLOCK_COMMAND(x) pthread_spin_unlock(x)
  179. #else
  180. #define LOCK_COMMAND(x) blas_lock(x)
  181. #define UNLOCK_COMMAND(x) blas_unlock(x)
  182. #endif
  183. #define GOTO_SHMID 0x510510
  184. #if 0
  185. #ifndef __CUDACC__
  186. #define __global__
  187. #define __device__
  188. #define __host__
  189. #define __shared__
  190. #endif
  191. #endif
  192. #ifndef ASSEMBLER
  193. #ifdef QUAD_PRECISION
  194. typedef struct {
  195. unsigned long x[2];
  196. } xdouble;
  197. #elif defined EXPRECISION
  198. #define xdouble long double
  199. #else
  200. #define xdouble double
  201. #endif
  202. #if defined(OS_WINDOWS) && defined(__64BIT__)
  203. typedef long long BLASLONG;
  204. typedef unsigned long long BLASULONG;
  205. #else
  206. typedef long BLASLONG;
  207. typedef unsigned long BLASULONG;
  208. #endif
  209. #ifdef USE64BITINT
  210. typedef BLASLONG blasint;
  211. #else
  212. typedef int blasint;
  213. #endif
  214. #else
  215. #ifdef USE64BITINT
  216. #define INTSHIFT 3
  217. #define INTSIZE 8
  218. #else
  219. #define INTSHIFT 2
  220. #define INTSIZE 4
  221. #endif
  222. #endif
  223. #ifdef XDOUBLE
  224. #define FLOAT xdouble
  225. #ifdef QUAD_PRECISION
  226. #define XFLOAT xidouble
  227. #endif
  228. #ifdef QUAD_PRECISION
  229. #define SIZE 32
  230. #define BASE_SHIFT 5
  231. #define ZBASE_SHIFT 6
  232. #else
  233. #define SIZE 16
  234. #define BASE_SHIFT 4
  235. #define ZBASE_SHIFT 5
  236. #endif
  237. #elif defined(DOUBLE)
  238. #define FLOAT double
  239. #define SIZE 8
  240. #define BASE_SHIFT 3
  241. #define ZBASE_SHIFT 4
  242. #else
  243. #define FLOAT float
  244. #define SIZE 4
  245. #define BASE_SHIFT 2
  246. #define ZBASE_SHIFT 3
  247. #endif
  248. #ifndef XFLOAT
  249. #define XFLOAT FLOAT
  250. #endif
  251. #ifndef COMPLEX
  252. #define COMPSIZE 1
  253. #else
  254. #define COMPSIZE 2
  255. #endif
  256. #define Address_H(x) (((x)+(1<<15))>>16)
  257. #define Address_L(x) ((x)-((Address_H(x))<<16))
  258. #ifndef MAX_CPU_NUMBER
  259. #define MAX_CPU_NUMBER 2
  260. #endif
  261. #if defined(OS_SUNOS)
  262. #define YIELDING thr_yield()
  263. #endif
  264. #if defined(OS_WINDOWS)
  265. #if defined(_MSC_VER) && !defined(__clang__)
  266. #define YIELDING YieldProcessor()
  267. #else
  268. #define YIELDING SwitchToThread()
  269. #endif
  270. #endif
  271. #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
  272. #define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
  273. #endif
  274. #ifdef BULLDOZER
  275. #ifndef YIELDING
  276. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  277. #endif
  278. #endif
  279. #ifdef PILEDRIVER
  280. #ifndef YIELDING
  281. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  282. #endif
  283. #endif
  284. /*
  285. #ifdef STEAMROLLER
  286. #ifndef YIELDING
  287. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  288. #endif
  289. #endif
  290. */
  291. #ifndef YIELDING
  292. #define YIELDING sched_yield()
  293. #endif
  294. /***
  295. To alloc job_t on heap or statck.
  296. please https://github.com/xianyi/OpenBLAS/issues/246
  297. ***/
  298. #if defined(OS_WINDOWS)
  299. #define GETRF_MEM_ALLOC_THRESHOLD 32
  300. #define BLAS3_MEM_ALLOC_THRESHOLD 32
  301. #endif
  302. #ifndef GETRF_MEM_ALLOC_THRESHOLD
  303. #define GETRF_MEM_ALLOC_THRESHOLD 80
  304. #endif
  305. #ifndef BLAS3_MEM_ALLOC_THRESHOLD
  306. #define BLAS3_MEM_ALLOC_THRESHOLD 160
  307. #endif
  308. #ifdef QUAD_PRECISION
  309. #include "common_quad.h"
  310. #endif
  311. #ifdef ARCH_ALPHA
  312. #include "common_alpha.h"
  313. #endif
  314. #ifdef ARCH_X86
  315. #include "common_x86.h"
  316. #endif
  317. #ifdef ARCH_X86_64
  318. #include "common_x86_64.h"
  319. #endif
  320. #ifdef ARCH_IA64
  321. #include "common_ia64.h"
  322. #endif
  323. #ifdef ARCH_POWER
  324. #include "common_power.h"
  325. #endif
  326. #ifdef sparc
  327. #include "common_sparc.h"
  328. #endif
  329. #ifdef ARCH_MIPS64
  330. #include "common_mips64.h"
  331. #endif
  332. #ifdef ARCH_ARM
  333. #include "common_arm.h"
  334. #endif
  335. #ifdef ARCH_ARM64
  336. #include "common_arm64.h"
  337. #endif
  338. #ifndef ASSEMBLER
  339. #ifdef OS_WINDOWS
  340. typedef char env_var_t[MAX_PATH];
  341. #define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p))
  342. #else
  343. typedef char* env_var_t;
  344. #define readenv(p, n) ((p)=getenv(n))
  345. #endif
  346. #endif
  347. #ifdef OS_LINUX
  348. #include "common_linux.h"
  349. #endif
  350. #define MMAP_ACCESS (PROT_READ | PROT_WRITE)
  351. #ifdef __NetBSD__
  352. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANON)
  353. #else
  354. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
  355. #endif
  356. #include "param.h"
  357. #include "common_param.h"
  358. #ifndef STDERR
  359. #define STDERR stderr
  360. #endif
  361. #ifndef MASK
  362. #define MASK(a, b) (((a) + ((b) - 1)) & ~((b) - 1))
  363. #endif
  364. #if defined(XDOUBLE) || defined(DOUBLE)
  365. #define FLOATRET FLOAT
  366. #else
  367. #ifdef NEED_F2CCONV
  368. #define FLOATRET double
  369. #else
  370. #define FLOATRET float
  371. #endif
  372. #endif
  373. #ifndef ASSEMBLER
  374. #ifndef NOINCLUDE
  375. /* Inclusion of a standard header file is needed for definition of __STDC_*
  376. predefined macros with some compilers (e.g. GCC 4.7 on Linux). This occurs
  377. as a side effect of including either <features.h> or <stdc-predef.h>. */
  378. #include <stdio.h>
  379. #endif // NOINCLUDE
  380. /* C99 supports complex floating numbers natively, which GCC also offers as an
  381. extension since version 3.0. If neither are available, use a compatible
  382. structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
  383. #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
  384. (__GNUC__ >= 3 && !defined(__cplusplus)) || \
  385. _MSC_VER >= 1800) // Visual Studio 2013 supports complex
  386. #define OPENBLAS_COMPLEX_C99
  387. #ifndef __cplusplus
  388. #include <complex.h>
  389. #endif
  390. typedef float _Complex openblas_complex_float;
  391. typedef double _Complex openblas_complex_double;
  392. typedef xdouble _Complex openblas_complex_xdouble;
  393. #define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
  394. #define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
  395. #define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
  396. #else
  397. #define OPENBLAS_COMPLEX_STRUCT
  398. typedef struct { float real, imag; } openblas_complex_float;
  399. typedef struct { double real, imag; } openblas_complex_double;
  400. typedef struct { xdouble real, imag; } openblas_complex_xdouble;
  401. #define openblas_make_complex_float(real, imag) {(real), (imag)}
  402. #define openblas_make_complex_double(real, imag) {(real), (imag)}
  403. #define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
  404. #endif
  405. #ifdef XDOUBLE
  406. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
  407. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
  408. #elif defined(DOUBLE)
  409. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_double
  410. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i)
  411. #else
  412. #define OPENBLAS_COMPLEX_FLOAT openblas_complex_float
  413. #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i)
  414. #endif
  415. #if defined(C_PGI) || defined(C_SUN)
  416. #define CREAL(X) (*((FLOAT *)&X + 0))
  417. #define CIMAG(X) (*((FLOAT *)&X + 1))
  418. #else
  419. #ifdef OPENBLAS_COMPLEX_STRUCT
  420. #define CREAL(Z) ((Z).real)
  421. #define CIMAG(Z) ((Z).imag)
  422. #else
  423. #define CREAL __real__
  424. #define CIMAG __imag__
  425. #endif
  426. #endif
  427. #endif // ASSEMBLER
  428. #ifndef IFLUSH
  429. #define IFLUSH
  430. #endif
  431. #ifndef IFLUSH_HALF
  432. #define IFLUSH_HALF
  433. #endif
  434. #if defined(C_GCC) && (( __GNUC__ <= 3) || ((__GNUC__ == 4) && (__GNUC_MINOR__ < 2)))
  435. #ifdef USE_OPENMP
  436. #undef USE_OPENMP
  437. #endif
  438. #endif
  439. #if defined(C_MSVC)
  440. #define inline __inline
  441. #endif
  442. #ifndef ASSEMBLER
  443. #ifndef MIN
  444. #define MIN(a,b) (a>b? b:a)
  445. #endif
  446. #ifndef MAX
  447. #define MAX(a,b) (a<b? b:a)
  448. #endif
  449. #define TOUPPER(a) {if ((a) > 0x60) (a) -= 0x20;}
  450. #if defined(__FreeBSD__) || defined(__APPLE__)
  451. #define MAP_ANONYMOUS MAP_ANON
  452. #endif
  453. /* Common Memory Management Routine */
  454. void blas_set_parameter(void);
  455. int blas_get_cpu_number(void);
  456. void *blas_memory_alloc (int);
  457. void blas_memory_free (void *);
  458. void *blas_memory_alloc_nolock (int); //use malloc without blas_lock
  459. void blas_memory_free_nolock (void *);
  460. int get_num_procs (void);
  461. #if defined(OS_LINUX) && defined(SMP) && !defined(NO_AFFINITY)
  462. int get_num_nodes (void);
  463. int get_num_proc (int);
  464. int get_node_equal (void);
  465. #endif
  466. void goto_set_num_threads(int);
  467. void gotoblas_affinity_init(void);
  468. void gotoblas_affinity_quit(void);
  469. void gotoblas_dynamic_init(void);
  470. void gotoblas_dynamic_quit(void);
  471. void gotoblas_profile_init(void);
  472. void gotoblas_profile_quit(void);
  473. #ifdef USE_OPENMP
  474. int omp_in_parallel(void);
  475. int omp_get_num_procs(void);
  476. #else
  477. #ifdef __ELF__
  478. int omp_in_parallel (void) __attribute__ ((weak));
  479. int omp_get_num_procs(void) __attribute__ ((weak));
  480. #endif
  481. #endif
  482. static __inline void blas_unlock(volatile BLASULONG *address){
  483. MB;
  484. *address = 0;
  485. }
  486. #ifdef OS_WINDOWS
  487. static __inline int readenv_atoi(char *env) {
  488. env_var_t p;
  489. return readenv(p,env) ? 0 : atoi(p);
  490. }
  491. #else
  492. static __inline int readenv_atoi(char *env) {
  493. char *p;
  494. if (( p = getenv(env) ))
  495. return (atoi(p));
  496. else
  497. return(0);
  498. }
  499. #endif
  500. #if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
  501. static __inline void compinv(FLOAT *b, FLOAT ar, FLOAT ai){
  502. #ifndef UNIT
  503. FLOAT ratio, den;
  504. if (
  505. #ifdef XDOUBLE
  506. (fabsl(ar)) >= (fabsl(ai))
  507. #elif defined DOUBLE
  508. (fabs (ar)) >= (fabs (ai))
  509. #else
  510. (fabsf(ar)) >= (fabsf(ai))
  511. #endif
  512. ) {
  513. ratio = ai / ar;
  514. den = (FLOAT)(ONE / (ar * (ONE + ratio * ratio)));
  515. ar = den;
  516. ai = -ratio * den;
  517. } else {
  518. ratio = ar / ai;
  519. den = (FLOAT)(ONE /(ai * (ONE + ratio * ratio)));
  520. ar = ratio * den;
  521. ai = -den;
  522. }
  523. b[0] = ar;
  524. b[1] = ai;
  525. #else
  526. b[0] = ONE;
  527. b[1] = ZERO;
  528. #endif
  529. }
  530. #endif
  531. #ifdef MALLOC_DEBUG
  532. void *blas_debug_alloc(int);
  533. void *blas_debug_free(void *);
  534. #undef malloc
  535. #undef free
  536. #define malloc(a) blas_debug_alloc(a)
  537. #define free(a) blas_debug_free (a)
  538. #endif
  539. #ifndef COPYOVERHEAD
  540. #define GEMMRETTYPE int
  541. #else
  542. typedef struct {
  543. double outercopy;
  544. double innercopy;
  545. double kernel;
  546. double mflops;
  547. } copyoverhead_t;
  548. #define GEMMRETTYPE copyoverhead_t
  549. #endif
  550. #endif
  551. #ifndef BUILD_KERNEL
  552. #define KNAME(A, B) A
  553. #else
  554. #define KNAME(A, B) A##B
  555. #endif
  556. #include "common_interface.h"
  557. #ifdef SANITY_CHECK
  558. #include "common_reference.h"
  559. #endif
  560. #include "common_macro.h"
  561. #include "common_level1.h"
  562. #include "common_level2.h"
  563. #include "common_level3.h"
  564. #include "common_lapack.h"
  565. #ifdef CBLAS
  566. # define OPENBLAS_CONST /* see comment in cblas.h */
  567. # include "cblas.h"
  568. #endif
  569. #ifndef ASSEMBLER
  570. #if 0
  571. #include "symcopy.h"
  572. #endif
  573. #if defined(SMP_SERVER) && defined(SMP_ONDEMAND)
  574. #error Both SMP_SERVER and SMP_ONDEMAND are specified.
  575. #endif
  576. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  577. #include "common_thread.h"
  578. #endif
  579. #endif
  580. #define INFO_NUM 99
  581. #ifndef DEFAULT_CPU_NUMBER
  582. #define DEFAULT_CPU_NUMBER 4
  583. #endif
  584. #ifndef IDEBUG_START
  585. #define IDEBUG_START
  586. #endif
  587. #ifndef IDEBUG_END
  588. #define IDEBUG_END
  589. #endif
  590. #if !defined(ASSEMBLER) && defined(FUNCTION_PROFILE)
  591. typedef struct {
  592. int func;
  593. unsigned long long calls, fops, area, cycles, tcycles;
  594. } func_profile_t;
  595. extern func_profile_t function_profile_table[];
  596. extern int gotoblas_profile;
  597. #ifdef XDOUBLE
  598. #define NUMOPT QNUMOPT
  599. #elif defined DOUBLE
  600. #define NUMOPT DNUMOPT
  601. #else
  602. #define NUMOPT SNUMOPT
  603. #endif
  604. #define FUNCTION_PROFILE_START() { unsigned long long profile_start = rpcc(), profile_end;
  605. #ifdef SMP
  606. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  607. if (gotoblas_profile) { \
  608. profile_end = rpcc(); \
  609. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  610. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  611. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  612. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  613. function_profile_table[PROFILE_FUNC_NAME].tcycles += blas_cpu_number * (profile_end - profile_start); \
  614. } \
  615. }
  616. #else
  617. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  618. if (gotoblas_profile) { \
  619. profile_end = rpcc(); \
  620. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  621. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  622. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  623. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  624. function_profile_table[PROFILE_FUNC_NAME].tcycles += (profile_end - profile_start); \
  625. } \
  626. }
  627. #endif
  628. #else
  629. #define FUNCTION_PROFILE_START()
  630. #define FUNCTION_PROFILE_END(COMP, AREA, OPS)
  631. #endif
  632. #if 1
  633. #define PRINT_DEBUG_CNAME
  634. #define PRINT_DEBUG_NAME
  635. #else
  636. #define PRINT_DEBUG_CNAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
  637. #define PRINT_DEBUG_NAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
  638. #endif
  639. #ifdef __cplusplus
  640. }
  641. #endif /* __cplusplus */
  642. #endif