You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common.h 16 kB

13 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
13 years ago
13 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #ifndef COMMON_H
  39. #define COMMON_H
  40. #ifdef __cplusplus
  41. extern "C" {
  42. /* Assume C declarations for C++ */
  43. #endif /* __cplusplus */
  44. #ifndef _GNU_SOURCE
  45. #define _GNU_SOURCE
  46. #endif
  47. #ifndef __USE_XOPEN
  48. #define __USE_XOPEN
  49. #endif
  50. #ifndef __USE_SVID
  51. #define __USE_SVID
  52. #endif
  53. #ifdef BUILD_KERNEL
  54. #include "config_kernel.h"
  55. #else
  56. #include "config.h"
  57. #endif
  58. #undef ENABLE_SSE_EXCEPTION
  59. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  60. #define SMP
  61. #endif
  62. #if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
  63. #define WINDOWS_ABI
  64. #define OS_WINDOWS
  65. #ifdef DOUBLE
  66. #define DOUBLE_DEFINED DOUBLE
  67. #undef DOUBLE
  68. #endif
  69. #endif
  70. #if !defined(NOINCLUDE) && !defined(ASSEMBLER)
  71. #include <stdio.h>
  72. #include <stdlib.h>
  73. #include <string.h>
  74. #if !defined(_MSC_VER)
  75. #include <unistd.h>
  76. #endif
  77. #ifdef OS_LINUX
  78. #include <malloc.h>
  79. #include <sched.h>
  80. #endif
  81. #if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD)
  82. #include <sched.h>
  83. #endif
  84. #ifdef OS_WINDOWS
  85. #ifdef ATOM
  86. #define GOTO_ATOM ATOM
  87. #undef ATOM
  88. #endif
  89. #include <windows.h>
  90. #include <math.h>
  91. #ifdef GOTO_ATOM
  92. #define ATOM GOTO_ATOM
  93. #undef GOTO_ATOM
  94. #endif
  95. #else
  96. #include <sys/mman.h>
  97. #include <sys/shm.h>
  98. #include <sys/time.h>
  99. #include <unistd.h>
  100. #include <math.h>
  101. #ifdef SMP
  102. #include <pthread.h>
  103. #endif
  104. #endif
  105. #if defined(OS_SUNOS)
  106. #include <thread.h>
  107. #endif
  108. #ifdef __DECC
  109. #include <c_asm.h>
  110. #include <machine/builtins.h>
  111. #endif
  112. #if defined(ARCH_IA64) && defined(ENABLE_SSE_EXCEPTION)
  113. #include <fenv.h>
  114. #endif
  115. #endif
  116. #if defined(OS_WINDOWS) && defined(DOUBLE_DEFINED)
  117. #define DOUBLE DOUBLE_DEFINED
  118. #undef DOUBLE_DEFINED
  119. #endif
  120. #undef DEBUG_INFO
  121. #define SMP_DEBUG
  122. #undef MALLOC_DEBUG
  123. #undef SMP_ALLOC_DEBUG
  124. #ifndef ZERO
  125. #ifdef XDOUBLE
  126. #define ZERO 0.e0L
  127. #elif defined DOUBLE
  128. #define ZERO 0.e0
  129. #else
  130. #define ZERO 0.e0f
  131. #endif
  132. #endif
  133. #ifndef ONE
  134. #ifdef XDOUBLE
  135. #define ONE 1.e0L
  136. #elif defined DOUBLE
  137. #define ONE 1.e0
  138. #else
  139. #define ONE 1.e0f
  140. #endif
  141. #endif
  142. #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
  143. #define ALLOCA_ALIGN 63UL
  144. #define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
  145. #ifdef NEEDBUNDERSCORE
  146. #define BLASFUNC(FUNC) FUNC##_
  147. #else
  148. #define BLASFUNC(FUNC) FUNC
  149. #endif
  150. #undef USE_PTHREAD_LOCK
  151. #undef USE_PTHREAD_SPINLOCK
  152. #if defined(USE_PTHREAD_LOCK) && defined(USE_PTHREAD_SPINLOCK)
  153. #error "You can't specify both LOCK operation!"
  154. #endif
  155. #ifdef SMP
  156. #define USE_PTHREAD_LOCK
  157. #undef USE_PTHREAD_SPINLOCK
  158. #endif
  159. #ifdef OS_WINDOWS
  160. #undef USE_PTHREAD_LOCK
  161. #undef USE_PTHREAD_SPINLOCK
  162. #endif
  163. #if defined(USE_PTHREAD_LOCK)
  164. #define LOCK_COMMAND(x) pthread_mutex_lock(x)
  165. #define UNLOCK_COMMAND(x) pthread_mutex_unlock(x)
  166. #elif defined(USE_PTHREAD_SPINLOCK)
  167. #ifndef ASSEMBLER
  168. typedef volatile int pthread_spinlock_t;
  169. int pthread_spin_lock (pthread_spinlock_t *__lock);
  170. int pthread_spin_unlock (pthread_spinlock_t *__lock);
  171. #endif
  172. #define LOCK_COMMAND(x) pthread_spin_lock(x)
  173. #define UNLOCK_COMMAND(x) pthread_spin_unlock(x)
  174. #else
  175. #define LOCK_COMMAND(x) blas_lock(x)
  176. #define UNLOCK_COMMAND(x) blas_unlock(x)
  177. #endif
  178. #define GOTO_SHMID 0x510510
  179. #if 0
  180. #ifndef __CUDACC__
  181. #define __global__
  182. #define __device__
  183. #define __host__
  184. #define __shared__
  185. #endif
  186. #endif
  187. #ifndef ASSEMBLER
  188. #ifdef QUAD_PRECISION
  189. typedef struct {
  190. unsigned long x[2];
  191. } xdouble;
  192. #elif defined EXPRECISION
  193. #define xdouble long double
  194. #else
  195. #define xdouble double
  196. #endif
  197. #if defined(OS_WINDOWS) && defined(__64BIT__)
  198. typedef long long BLASLONG;
  199. typedef unsigned long long BLASULONG;
  200. #else
  201. typedef long BLASLONG;
  202. typedef unsigned long BLASULONG;
  203. #endif
  204. #ifdef USE64BITINT
  205. typedef BLASLONG blasint;
  206. #else
  207. typedef int blasint;
  208. #endif
  209. #else
  210. #ifdef USE64BITINT
  211. #define INTSHIFT 3
  212. #define INTSIZE 8
  213. #else
  214. #define INTSHIFT 2
  215. #define INTSIZE 4
  216. #endif
  217. #endif
  218. #ifdef XDOUBLE
  219. #define FLOAT xdouble
  220. #ifdef QUAD_PRECISION
  221. #define XFLOAT xidouble
  222. #endif
  223. #ifdef QUAD_PRECISION
  224. #define SIZE 32
  225. #define BASE_SHIFT 5
  226. #define ZBASE_SHIFT 6
  227. #else
  228. #define SIZE 16
  229. #define BASE_SHIFT 4
  230. #define ZBASE_SHIFT 5
  231. #endif
  232. #elif defined(DOUBLE)
  233. #define FLOAT double
  234. #define SIZE 8
  235. #define BASE_SHIFT 3
  236. #define ZBASE_SHIFT 4
  237. #else
  238. #define FLOAT float
  239. #define SIZE 4
  240. #define BASE_SHIFT 2
  241. #define ZBASE_SHIFT 3
  242. #endif
  243. #ifndef XFLOAT
  244. #define XFLOAT FLOAT
  245. #endif
  246. #ifndef COMPLEX
  247. #define COMPSIZE 1
  248. #else
  249. #define COMPSIZE 2
  250. #endif
  251. #if defined(C_PGI) || defined(C_SUN)
  252. #define CREAL(X) (*((FLOAT *)&X + 0))
  253. #define CIMAG(X) (*((FLOAT *)&X + 1))
  254. #else
  255. #define CREAL __real__
  256. #define CIMAG __imag__
  257. #endif
  258. #define Address_H(x) (((x)+(1<<15))>>16)
  259. #define Address_L(x) ((x)-((Address_H(x))<<16))
  260. #ifndef MAX_CPU_NUMBER
  261. #define MAX_CPU_NUMBER 2
  262. #endif
  263. #if defined(OS_SUNOS)
  264. #define YIELDING thr_yield()
  265. #endif
  266. #if defined(OS_WINDOWS)
  267. #ifdef _MSC_VER
  268. #define YIELDING YieldProcessor()
  269. #else
  270. #define YIELDING SwitchToThread()
  271. #endif
  272. #endif
  273. #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
  274. #define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
  275. #endif
  276. #ifdef BULLDOZER
  277. #ifndef YIELDING
  278. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  279. #endif
  280. #endif
  281. #ifdef PILEDRIVER
  282. #ifndef YIELDING
  283. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  284. #endif
  285. #endif
  286. /*
  287. #ifdef STEAMROLLER
  288. #ifndef YIELDING
  289. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  290. #endif
  291. #endif
  292. */
  293. #ifndef YIELDING
  294. #define YIELDING sched_yield()
  295. #endif
  296. /***
  297. To alloc job_t on heap or statck.
  298. please https://github.com/xianyi/OpenBLAS/issues/246
  299. ***/
  300. #if defined(OS_WINDOWS)
  301. #define GETRF_MEM_ALLOC_THRESHOLD 32
  302. #define BLAS3_MEM_ALLOC_THRESHOLD 32
  303. #endif
  304. #ifndef GETRF_MEM_ALLOC_THRESHOLD
  305. #define GETRF_MEM_ALLOC_THRESHOLD 80
  306. #endif
  307. #ifndef BLAS3_MEM_ALLOC_THRESHOLD
  308. #define BLAS3_MEM_ALLOC_THRESHOLD 160
  309. #endif
  310. #ifdef QUAD_PRECISION
  311. #include "common_quad.h"
  312. #endif
  313. #ifdef ARCH_ALPHA
  314. #include "common_alpha.h"
  315. #endif
  316. #ifdef ARCH_X86
  317. #include "common_x86.h"
  318. #endif
  319. #ifdef ARCH_X86_64
  320. #include "common_x86_64.h"
  321. #endif
  322. #ifdef ARCH_IA64
  323. #include "common_ia64.h"
  324. #endif
  325. #ifdef ARCH_POWER
  326. #include "common_power.h"
  327. #endif
  328. #ifdef sparc
  329. #include "common_sparc.h"
  330. #endif
  331. #ifdef ARCH_MIPS64
  332. #include "common_mips64.h"
  333. #endif
  334. #ifdef ARCH_ARM
  335. #include "common_arm.h"
  336. #endif
  337. #ifdef ARCH_ARM64
  338. #include "common_arm64.h"
  339. #endif
  340. #ifndef ASSEMBLER
  341. #ifdef OS_WINDOWS
  342. typedef char env_var_t[MAX_PATH];
  343. #define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p))
  344. #else
  345. typedef char* env_var_t;
  346. #define readenv(p, n) ((p)=getenv(n))
  347. #endif
  348. #endif
  349. #ifdef OS_LINUX
  350. #include "common_linux.h"
  351. #endif
  352. #define MMAP_ACCESS (PROT_READ | PROT_WRITE)
  353. #ifdef __NetBSD__
  354. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANON)
  355. #else
  356. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
  357. #endif
  358. #include "param.h"
  359. #include "common_param.h"
  360. #ifndef STDERR
  361. #define STDERR stderr
  362. #endif
  363. #ifndef MASK
  364. #define MASK(a, b) (((a) + ((b) - 1)) & ~((b) - 1))
  365. #endif
  366. #if defined(XDOUBLE) || defined(DOUBLE)
  367. #define FLOATRET FLOAT
  368. #else
  369. #ifdef NEED_F2CCONV
  370. #define FLOATRET double
  371. #else
  372. #define FLOATRET float
  373. #endif
  374. #endif
  375. #ifndef ASSEMBLER
  376. #ifndef NOINCLUDE
  377. /* Inclusion of a standard header file is needed for definition of __STDC_*
  378. predefined macros with some compilers (e.g. GCC 4.7 on Linux). This occurs
  379. as a side effect of including either <features.h> or <stdc-predef.h>. */
  380. #include <stdio.h>
  381. #endif // NOINCLUDE
  382. /* C99 supports complex floating numbers natively, which GCC also offers as an
  383. extension since version 3.0. If neither are available, use a compatible
  384. structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
  385. #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
  386. (__GNUC__ >= 3 && !defined(__cplusplus)))
  387. #define OPENBLAS_COMPLEX_C99
  388. typedef float _Complex openblas_complex_float;
  389. typedef double _Complex openblas_complex_double;
  390. typedef xdouble _Complex openblas_complex_xdouble;
  391. #else
  392. #define OPENBLAS_COMPLEX_STRUCT
  393. typedef struct { float real, imag; } openblas_complex_float;
  394. typedef struct { double real, imag; } openblas_complex_double;
  395. typedef struct { xdouble real, imag; } openblas_complex_xdouble;
  396. #endif
  397. #endif // ASSEMBLER
  398. #ifndef IFLUSH
  399. #define IFLUSH
  400. #endif
  401. #ifndef IFLUSH_HALF
  402. #define IFLUSH_HALF
  403. #endif
  404. #if defined(C_GCC) && (( __GNUC__ <= 3) || ((__GNUC__ == 4) && (__GNUC_MINOR__ < 2)))
  405. #ifdef USE_OPENMP
  406. #undef USE_OPENMP
  407. #endif
  408. #endif
  409. #ifndef ASSEMBLER
  410. #ifndef MIN
  411. #define MIN(a,b) (a>b? b:a)
  412. #endif
  413. #ifndef MAX
  414. #define MAX(a,b) (a<b? b:a)
  415. #endif
  416. #define TOUPPER(a) {if ((a) > 0x60) (a) -= 0x20;}
  417. #if defined(__FreeBSD__) || defined(__APPLE__)
  418. #define MAP_ANONYMOUS MAP_ANON
  419. #endif
  420. /* Common Memory Management Routine */
  421. void blas_set_parameter(void);
  422. int blas_get_cpu_number(void);
  423. void *blas_memory_alloc (int);
  424. void blas_memory_free (void *);
  425. int get_num_procs (void);
  426. #if defined(OS_LINUX) && defined(SMP) && !defined(NO_AFFINITY)
  427. int get_num_nodes (void);
  428. int get_num_proc (int);
  429. int get_node_equal (void);
  430. #endif
  431. void goto_set_num_threads(int);
  432. void gotoblas_affinity_init(void);
  433. void gotoblas_affinity_quit(void);
  434. void gotoblas_dynamic_init(void);
  435. void gotoblas_dynamic_quit(void);
  436. void gotoblas_profile_init(void);
  437. void gotoblas_profile_quit(void);
  438. #ifdef USE_OPENMP
  439. int omp_in_parallel(void);
  440. int omp_get_num_procs(void);
  441. #else
  442. #ifdef __ELF__
  443. int omp_in_parallel (void) __attribute__ ((weak));
  444. int omp_get_num_procs(void) __attribute__ ((weak));
  445. #endif
  446. #endif
  447. static __inline void blas_unlock(volatile BLASULONG *address){
  448. MB;
  449. *address = 0;
  450. }
  451. #ifdef OS_WINDOWS
  452. static __inline int readenv_atoi(char *env) {
  453. env_var_t p;
  454. return readenv(p,env) ? 0 : atoi(p);
  455. }
  456. #else
  457. static __inline int readenv_atoi(char *env) {
  458. char *p;
  459. if (( p = getenv(env) ))
  460. return (atoi(p));
  461. else
  462. return(0);
  463. }
  464. #endif
  465. #if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
  466. static __inline void compinv(FLOAT *b, FLOAT ar, FLOAT ai){
  467. #ifndef UNIT
  468. FLOAT ratio, den;
  469. if (
  470. #ifdef XDOUBLE
  471. (fabsl(ar)) >= (fabsl(ai))
  472. #elif defined DOUBLE
  473. (fabs (ar)) >= (fabs (ai))
  474. #else
  475. (fabsf(ar)) >= (fabsf(ai))
  476. #endif
  477. ) {
  478. ratio = ai / ar;
  479. den = (FLOAT)(ONE / (ar * (ONE + ratio * ratio)));
  480. ar = den;
  481. ai = -ratio * den;
  482. } else {
  483. ratio = ar / ai;
  484. den = (FLOAT)(ONE /(ai * (ONE + ratio * ratio)));
  485. ar = ratio * den;
  486. ai = -den;
  487. }
  488. b[0] = ar;
  489. b[1] = ai;
  490. #else
  491. b[0] = ONE;
  492. b[1] = ZERO;
  493. #endif
  494. }
  495. #endif
  496. #ifdef MALLOC_DEBUG
  497. void *blas_debug_alloc(int);
  498. void *blas_debug_free(void *);
  499. #undef malloc
  500. #undef free
  501. #define malloc(a) blas_debug_alloc(a)
  502. #define free(a) blas_debug_free (a)
  503. #endif
  504. #ifndef COPYOVERHEAD
  505. #define GEMMRETTYPE int
  506. #else
  507. typedef struct {
  508. double outercopy;
  509. double innercopy;
  510. double kernel;
  511. double mflops;
  512. } copyoverhead_t;
  513. #define GEMMRETTYPE copyoverhead_t
  514. #endif
  515. #endif
  516. #ifndef BUILD_KERNEL
  517. #define KNAME(A, B) A
  518. #else
  519. #define KNAME(A, B) A##B
  520. #endif
  521. #include "common_interface.h"
  522. #ifdef SANITY_CHECK
  523. #include "common_reference.h"
  524. #endif
  525. #include "common_macro.h"
  526. #include "common_level1.h"
  527. #include "common_level2.h"
  528. #include "common_level3.h"
  529. #include "common_lapack.h"
  530. #ifdef CBLAS
  531. # define OPENBLAS_CONST /* see comment in cblas.h */
  532. # include "cblas.h"
  533. #endif
  534. #ifndef ASSEMBLER
  535. #if 0
  536. #include "symcopy.h"
  537. #endif
  538. #if defined(SMP_SERVER) && defined(SMP_ONDEMAND)
  539. #error Both SMP_SERVER and SMP_ONDEMAND are specified.
  540. #endif
  541. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  542. #include "common_thread.h"
  543. #endif
  544. #endif
  545. #define INFO_NUM 99
  546. #ifndef DEFAULT_CPU_NUMBER
  547. #define DEFAULT_CPU_NUMBER 4
  548. #endif
  549. #ifndef IDEBUG_START
  550. #define IDEBUG_START
  551. #endif
  552. #ifndef IDEBUG_END
  553. #define IDEBUG_END
  554. #endif
  555. #if !defined(ASSEMBLER) && defined(FUNCTION_PROFILE)
  556. typedef struct {
  557. int func;
  558. unsigned long long calls, fops, area, cycles, tcycles;
  559. } func_profile_t;
  560. extern func_profile_t function_profile_table[];
  561. extern int gotoblas_profile;
  562. #ifdef XDOUBLE
  563. #define NUMOPT QNUMOPT
  564. #elif defined DOUBLE
  565. #define NUMOPT DNUMOPT
  566. #else
  567. #define NUMOPT SNUMOPT
  568. #endif
  569. #define FUNCTION_PROFILE_START() { unsigned long long profile_start = rpcc(), profile_end;
  570. #ifdef SMP
  571. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  572. if (gotoblas_profile) { \
  573. profile_end = rpcc(); \
  574. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  575. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  576. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  577. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  578. function_profile_table[PROFILE_FUNC_NAME].tcycles += blas_cpu_number * (profile_end - profile_start); \
  579. } \
  580. }
  581. #else
  582. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  583. if (gotoblas_profile) { \
  584. profile_end = rpcc(); \
  585. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  586. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  587. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  588. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  589. function_profile_table[PROFILE_FUNC_NAME].tcycles += (profile_end - profile_start); \
  590. } \
  591. }
  592. #endif
  593. #else
  594. #define FUNCTION_PROFILE_START()
  595. #define FUNCTION_PROFILE_END(COMP, AREA, OPS)
  596. #endif
  597. #if 1
  598. #define PRINT_DEBUG_CNAME
  599. #define PRINT_DEBUG_NAME
  600. #else
  601. #define PRINT_DEBUG_CNAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
  602. #define PRINT_DEBUG_NAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
  603. #endif
  604. #ifdef __cplusplus
  605. }
  606. #endif /* __cplusplus */
  607. #endif