You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common.h 16 kB

13 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
13 years ago
13 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #ifndef COMMON_H
  39. #define COMMON_H
  40. #ifdef __cplusplus
  41. extern "C" {
  42. /* Assume C declarations for C++ */
  43. #endif /* __cplusplus */
  44. #ifndef _GNU_SOURCE
  45. #define _GNU_SOURCE
  46. #endif
  47. #ifndef __USE_XOPEN
  48. #define __USE_XOPEN
  49. #endif
  50. #ifndef __USE_SVID
  51. #define __USE_SVID
  52. #endif
  53. #ifdef BUILD_KERNEL
  54. #include "config_kernel.h"
  55. #else
  56. #include "config.h"
  57. #endif
  58. #undef ENABLE_SSE_EXCEPTION
  59. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  60. #define SMP
  61. #endif
  62. #if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX)
  63. #define WINDOWS_ABI
  64. #define OS_WINDOWS
  65. #ifdef DOUBLE
  66. #define DOUBLE_DEFINED DOUBLE
  67. #undef DOUBLE
  68. #endif
  69. #endif
  70. #if !defined(NOINCLUDE) && !defined(ASSEMBLER)
  71. #include <stdio.h>
  72. #include <stdlib.h>
  73. #include <string.h>
  74. #include <unistd.h>
  75. #ifdef OS_LINUX
  76. #include <malloc.h>
  77. #include <sched.h>
  78. #endif
  79. #if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD)
  80. #include <sched.h>
  81. #endif
  82. #ifdef OS_WINDOWS
  83. #ifdef ATOM
  84. #define GOTO_ATOM ATOM
  85. #undef ATOM
  86. #endif
  87. #include <windows.h>
  88. #include <math.h>
  89. #ifdef GOTO_ATOM
  90. #define ATOM GOTO_ATOM
  91. #undef GOTO_ATOM
  92. #endif
  93. #else
  94. #include <sys/mman.h>
  95. #include <sys/shm.h>
  96. #include <sys/time.h>
  97. #include <unistd.h>
  98. #include <math.h>
  99. #ifdef SMP
  100. #include <pthread.h>
  101. #endif
  102. #endif
  103. #if defined(OS_SUNOS)
  104. #include <thread.h>
  105. #endif
  106. #ifdef __DECC
  107. #include <c_asm.h>
  108. #include <machine/builtins.h>
  109. #endif
  110. #if defined(ARCH_IA64) && defined(ENABLE_SSE_EXCEPTION)
  111. #include <fenv.h>
  112. #endif
  113. #endif
  114. #if defined(OS_WINDOWS) && defined(DOUBLE_DEFINED)
  115. #define DOUBLE DOUBLE_DEFINED
  116. #undef DOUBLE_DEFINED
  117. #endif
  118. #undef DEBUG_INFO
  119. #define SMP_DEBUG
  120. #undef MALLOC_DEBUG
  121. #undef SMP_ALLOC_DEBUG
  122. #ifndef ZERO
  123. #ifdef XDOUBLE
  124. #define ZERO 0.e0L
  125. #elif defined DOUBLE
  126. #define ZERO 0.e0
  127. #else
  128. #define ZERO 0.e0f
  129. #endif
  130. #endif
  131. #ifndef ONE
  132. #ifdef XDOUBLE
  133. #define ONE 1.e0L
  134. #elif defined DOUBLE
  135. #define ONE 1.e0
  136. #else
  137. #define ONE 1.e0f
  138. #endif
  139. #endif
  140. #define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
  141. #define ALLOCA_ALIGN 63UL
  142. #define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
  143. #ifdef NEEDBUNDERSCORE
  144. #define BLASFUNC(FUNC) FUNC##_
  145. #else
  146. #define BLASFUNC(FUNC) FUNC
  147. #endif
  148. #undef USE_PTHREAD_LOCK
  149. #undef USE_PTHREAD_SPINLOCK
  150. #if defined(USE_PTHREAD_LOCK) && defined(USE_PTHREAD_SPINLOCK)
  151. #error "You can't specify both LOCK operation!"
  152. #endif
  153. #ifdef SMP
  154. #define USE_PTHREAD_LOCK
  155. #undef USE_PTHREAD_SPINLOCK
  156. #endif
  157. #ifdef OS_WINDOWS
  158. #undef USE_PTHREAD_LOCK
  159. #undef USE_PTHREAD_SPINLOCK
  160. #endif
  161. #if defined(USE_PTHREAD_LOCK)
  162. #define LOCK_COMMAND(x) pthread_mutex_lock(x)
  163. #define UNLOCK_COMMAND(x) pthread_mutex_unlock(x)
  164. #elif defined(USE_PTHREAD_SPINLOCK)
  165. #ifndef ASSEMBLER
  166. typedef volatile int pthread_spinlock_t;
  167. int pthread_spin_lock (pthread_spinlock_t *__lock);
  168. int pthread_spin_unlock (pthread_spinlock_t *__lock);
  169. #endif
  170. #define LOCK_COMMAND(x) pthread_spin_lock(x)
  171. #define UNLOCK_COMMAND(x) pthread_spin_unlock(x)
  172. #else
  173. #define LOCK_COMMAND(x) blas_lock(x)
  174. #define UNLOCK_COMMAND(x) blas_unlock(x)
  175. #endif
  176. #define GOTO_SHMID 0x510510
  177. #if 0
  178. #ifndef __CUDACC__
  179. #define __global__
  180. #define __device__
  181. #define __host__
  182. #define __shared__
  183. #endif
  184. #endif
  185. #ifndef ASSEMBLER
  186. #ifdef QUAD_PRECISION
  187. typedef struct {
  188. unsigned long x[2];
  189. } xdouble;
  190. #elif defined EXPRECISION
  191. #define xdouble long double
  192. #else
  193. #define xdouble double
  194. #endif
  195. #if defined(OS_WINDOWS) && defined(__64BIT__)
  196. typedef long long BLASLONG;
  197. typedef unsigned long long BLASULONG;
  198. #else
  199. typedef long BLASLONG;
  200. typedef unsigned long BLASULONG;
  201. #endif
  202. #ifdef USE64BITINT
  203. typedef BLASLONG blasint;
  204. #else
  205. typedef int blasint;
  206. #endif
  207. #else
  208. #ifdef USE64BITINT
  209. #define INTSHIFT 3
  210. #define INTSIZE 8
  211. #else
  212. #define INTSHIFT 2
  213. #define INTSIZE 4
  214. #endif
  215. #endif
  216. #ifdef XDOUBLE
  217. #define FLOAT xdouble
  218. #ifdef QUAD_PRECISION
  219. #define XFLOAT xidouble
  220. #endif
  221. #ifdef QUAD_PRECISION
  222. #define SIZE 32
  223. #define BASE_SHIFT 5
  224. #define ZBASE_SHIFT 6
  225. #else
  226. #define SIZE 16
  227. #define BASE_SHIFT 4
  228. #define ZBASE_SHIFT 5
  229. #endif
  230. #elif defined(DOUBLE)
  231. #define FLOAT double
  232. #define SIZE 8
  233. #define BASE_SHIFT 3
  234. #define ZBASE_SHIFT 4
  235. #else
  236. #define FLOAT float
  237. #define SIZE 4
  238. #define BASE_SHIFT 2
  239. #define ZBASE_SHIFT 3
  240. #endif
  241. #ifndef XFLOAT
  242. #define XFLOAT FLOAT
  243. #endif
  244. #ifndef COMPLEX
  245. #define COMPSIZE 1
  246. #else
  247. #define COMPSIZE 2
  248. #endif
  249. #if defined(C_PGI) || defined(C_SUN)
  250. #define CREAL(X) (*((FLOAT *)&X + 0))
  251. #define CIMAG(X) (*((FLOAT *)&X + 1))
  252. #else
  253. #define CREAL __real__
  254. #define CIMAG __imag__
  255. #endif
  256. #define Address_H(x) (((x)+(1<<15))>>16)
  257. #define Address_L(x) ((x)-((Address_H(x))<<16))
  258. #ifndef MAX_CPU_NUMBER
  259. #define MAX_CPU_NUMBER 2
  260. #endif
  261. #if defined(OS_SUNOS)
  262. #define YIELDING thr_yield()
  263. #endif
  264. #if defined(OS_WINDOWS)
  265. #define YIELDING SwitchToThread()
  266. #endif
  267. #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
  268. #define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
  269. #endif
  270. #ifdef BULLDOZER
  271. #ifndef YIELDING
  272. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  273. #endif
  274. #endif
  275. #ifdef PILEDRIVER
  276. #ifndef YIELDING
  277. #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
  278. #endif
  279. #endif
  280. #ifndef YIELDING
  281. #define YIELDING sched_yield()
  282. #endif
  283. /***
  284. To alloc job_t on heap or statck.
  285. please https://github.com/xianyi/OpenBLAS/issues/246
  286. ***/
  287. #if defined(OS_WINDOWS)
  288. #define GETRF_MEM_ALLOC_THRESHOLD 32
  289. #define BLAS3_MEM_ALLOC_THRESHOLD 32
  290. #endif
  291. #ifndef GETRF_MEM_ALLOC_THRESHOLD
  292. #define GETRF_MEM_ALLOC_THRESHOLD 80
  293. #endif
  294. #ifndef BLAS3_MEM_ALLOC_THRESHOLD
  295. #define BLAS3_MEM_ALLOC_THRESHOLD 160
  296. #endif
  297. #ifdef QUAD_PRECISION
  298. #include "common_quad.h"
  299. #endif
  300. #ifdef ARCH_ALPHA
  301. #include "common_alpha.h"
  302. #endif
  303. #ifdef ARCH_X86
  304. #include "common_x86.h"
  305. #endif
  306. #ifdef ARCH_X86_64
  307. #include "common_x86_64.h"
  308. #endif
  309. #ifdef ARCH_IA64
  310. #include "common_ia64.h"
  311. #endif
  312. #ifdef ARCH_POWER
  313. #include "common_power.h"
  314. #endif
  315. #ifdef sparc
  316. #include "common_sparc.h"
  317. #endif
  318. #ifdef ARCH_MIPS64
  319. #include "common_mips64.h"
  320. #endif
  321. #ifdef ARCH_ARM
  322. #include "common_arm.h"
  323. #endif
  324. #ifdef ARCH_ARM64
  325. #include "common_arm64.h"
  326. #endif
  327. #ifndef ASSEMBLER
  328. #ifdef OS_WINDOWS
  329. typedef char env_var_t[MAX_PATH];
  330. #define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p))
  331. #else
  332. typedef char* env_var_t;
  333. #define readenv(p, n) ((p)=getenv(n))
  334. #endif
  335. #endif
  336. #ifdef OS_LINUX
  337. #include "common_linux.h"
  338. #endif
  339. #define MMAP_ACCESS (PROT_READ | PROT_WRITE)
  340. #ifdef __NetBSD__
  341. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANON)
  342. #else
  343. #define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
  344. #endif
  345. #include "param.h"
  346. #include "common_param.h"
  347. #ifndef STDERR
  348. #define STDERR stderr
  349. #endif
  350. #ifndef MASK
  351. #define MASK(a, b) (((a) + ((b) - 1)) & ~((b) - 1))
  352. #endif
  353. #if defined(XDOUBLE) || defined(DOUBLE)
  354. #define FLOATRET FLOAT
  355. #else
  356. #ifdef NEED_F2CCONV
  357. #define FLOATRET double
  358. #else
  359. #define FLOATRET float
  360. #endif
  361. #endif
  362. #ifndef ASSEMBLER
  363. #ifndef NOINCLUDE
  364. /* Inclusion of a standard header file is needed for definition of __STDC_*
  365. predefined macros with some compilers (e.g. GCC 4.7 on Linux). This occurs
  366. as a side effect of including either <features.h> or <stdc-predef.h>. */
  367. #include <stdio.h>
  368. #endif // NOINCLUDE
  369. /* C99 supports complex floating numbers natively, which GCC also offers as an
  370. extension since version 3.0. If neither are available, use a compatible
  371. structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
  372. #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
  373. (__GNUC__ >= 3 && !defined(__cplusplus)))
  374. #define OPENBLAS_COMPLEX_C99
  375. typedef float _Complex openblas_complex_float;
  376. typedef double _Complex openblas_complex_double;
  377. typedef xdouble _Complex openblas_complex_xdouble;
  378. #else
  379. #define OPENBLAS_COMPLEX_STRUCT
  380. typedef struct { float real, imag; } openblas_complex_float;
  381. typedef struct { double real, imag; } openblas_complex_double;
  382. typedef struct { xdouble real, imag; } openblas_complex_xdouble;
  383. #endif
  384. #endif // ASSEMBLER
  385. #ifndef IFLUSH
  386. #define IFLUSH
  387. #endif
  388. #ifndef IFLUSH_HALF
  389. #define IFLUSH_HALF
  390. #endif
  391. #if defined(C_GCC) && (( __GNUC__ <= 3) || ((__GNUC__ == 4) && (__GNUC_MINOR__ < 2)))
  392. #ifdef USE_OPENMP
  393. #undef USE_OPENMP
  394. #endif
  395. #endif
  396. #ifndef ASSEMBLER
  397. #ifndef MIN
  398. #define MIN(a,b) (a>b? b:a)
  399. #endif
  400. #ifndef MAX
  401. #define MAX(a,b) (a<b? b:a)
  402. #endif
  403. #define TOUPPER(a) {if ((a) > 0x60) (a) -= 0x20;}
  404. #if defined(__FreeBSD__) || defined(__APPLE__)
  405. #define MAP_ANONYMOUS MAP_ANON
  406. #endif
  407. /* Common Memory Management Routine */
  408. void blas_set_parameter(void);
  409. int blas_get_cpu_number(void);
  410. void *blas_memory_alloc (int);
  411. void blas_memory_free (void *);
  412. int get_num_procs (void);
  413. #if defined(OS_LINUX) && defined(SMP) && !defined(NO_AFFINITY)
  414. int get_num_nodes (void);
  415. int get_num_proc (int);
  416. int get_node_equal (void);
  417. #endif
  418. void goto_set_num_threads(int);
  419. void gotoblas_affinity_init(void);
  420. void gotoblas_affinity_quit(void);
  421. void gotoblas_dynamic_init(void);
  422. void gotoblas_dynamic_quit(void);
  423. void gotoblas_profile_init(void);
  424. void gotoblas_profile_quit(void);
  425. #ifdef USE_OPENMP
  426. int omp_in_parallel(void);
  427. int omp_get_num_procs(void);
  428. #else
  429. #ifdef __ELF__
  430. int omp_in_parallel (void) __attribute__ ((weak));
  431. int omp_get_num_procs(void) __attribute__ ((weak));
  432. #endif
  433. #endif
  434. static __inline void blas_unlock(volatile BLASULONG *address){
  435. MB;
  436. *address = 0;
  437. }
  438. static __inline int readenv_atoi(char *env) {
  439. env_var_t p;
  440. return readenv(p,env) ? 0 : atoi(p);
  441. }
  442. #if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
  443. static __inline void compinv(FLOAT *b, FLOAT ar, FLOAT ai){
  444. #ifndef UNIT
  445. FLOAT ratio, den;
  446. if (
  447. #ifdef XDOUBLE
  448. (fabsl(ar)) >= (fabsl(ai))
  449. #elif defined DOUBLE
  450. (fabs (ar)) >= (fabs (ai))
  451. #else
  452. (fabsf(ar)) >= (fabsf(ai))
  453. #endif
  454. ) {
  455. ratio = ai / ar;
  456. den = (FLOAT)(ONE / (ar * (ONE + ratio * ratio)));
  457. ar = den;
  458. ai = -ratio * den;
  459. } else {
  460. ratio = ar / ai;
  461. den = (FLOAT)(ONE /(ai * (ONE + ratio * ratio)));
  462. ar = ratio * den;
  463. ai = -den;
  464. }
  465. b[0] = ar;
  466. b[1] = ai;
  467. #else
  468. b[0] = ONE;
  469. b[1] = ZERO;
  470. #endif
  471. }
  472. #endif
  473. #ifdef MALLOC_DEBUG
  474. void *blas_debug_alloc(int);
  475. void *blas_debug_free(void *);
  476. #undef malloc
  477. #undef free
  478. #define malloc(a) blas_debug_alloc(a)
  479. #define free(a) blas_debug_free (a)
  480. #endif
  481. #ifndef COPYOVERHEAD
  482. #define GEMMRETTYPE int
  483. #else
  484. typedef struct {
  485. double outercopy;
  486. double innercopy;
  487. double kernel;
  488. double mflops;
  489. } copyoverhead_t;
  490. #define GEMMRETTYPE copyoverhead_t
  491. #endif
  492. #endif
  493. #ifndef BUILD_KERNEL
  494. #define KNAME(A, B) A
  495. #else
  496. #define KNAME(A, B) A##B
  497. #endif
  498. #include "common_interface.h"
  499. #ifdef SANITY_CHECK
  500. #include "common_reference.h"
  501. #endif
  502. #include "common_macro.h"
  503. #include "common_level1.h"
  504. #include "common_level2.h"
  505. #include "common_level3.h"
  506. #include "common_lapack.h"
  507. #ifdef CBLAS
  508. # define OPENBLAS_CONST /* see comment in cblas.h */
  509. # include "cblas.h"
  510. #endif
  511. #ifndef ASSEMBLER
  512. #if 0
  513. #include "symcopy.h"
  514. #endif
  515. #if defined(SMP_SERVER) && defined(SMP_ONDEMAND)
  516. #error Both SMP_SERVER and SMP_ONDEMAND are specified.
  517. #endif
  518. #if defined(SMP_SERVER) || defined(SMP_ONDEMAND)
  519. #include "common_thread.h"
  520. #endif
  521. #endif
  522. #define INFO_NUM 99
  523. #ifndef DEFAULT_CPU_NUMBER
  524. #define DEFAULT_CPU_NUMBER 4
  525. #endif
  526. #ifndef IDEBUG_START
  527. #define IDEBUG_START
  528. #endif
  529. #ifndef IDEBUG_END
  530. #define IDEBUG_END
  531. #endif
  532. #if !defined(ASSEMBLER) && defined(FUNCTION_PROFILE)
  533. typedef struct {
  534. int func;
  535. unsigned long long calls, fops, area, cycles, tcycles;
  536. } func_profile_t;
  537. extern func_profile_t function_profile_table[];
  538. extern int gotoblas_profile;
  539. #ifdef XDOUBLE
  540. #define NUMOPT QNUMOPT
  541. #elif defined DOUBLE
  542. #define NUMOPT DNUMOPT
  543. #else
  544. #define NUMOPT SNUMOPT
  545. #endif
  546. #define FUNCTION_PROFILE_START() { unsigned long long profile_start = rpcc(), profile_end;
  547. #ifdef SMP
  548. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  549. if (gotoblas_profile) { \
  550. profile_end = rpcc(); \
  551. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  552. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  553. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  554. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  555. function_profile_table[PROFILE_FUNC_NAME].tcycles += blas_cpu_number * (profile_end - profile_start); \
  556. } \
  557. }
  558. #else
  559. #define FUNCTION_PROFILE_END(COMP, AREA, OPS) \
  560. if (gotoblas_profile) { \
  561. profile_end = rpcc(); \
  562. function_profile_table[PROFILE_FUNC_NAME].calls ++; \
  563. function_profile_table[PROFILE_FUNC_NAME].area += SIZE * COMPSIZE * (AREA); \
  564. function_profile_table[PROFILE_FUNC_NAME].fops += (COMP) * (OPS) / NUMOPT; \
  565. function_profile_table[PROFILE_FUNC_NAME].cycles += (profile_end - profile_start); \
  566. function_profile_table[PROFILE_FUNC_NAME].tcycles += (profile_end - profile_start); \
  567. } \
  568. }
  569. #endif
  570. #else
  571. #define FUNCTION_PROFILE_START()
  572. #define FUNCTION_PROFILE_END(COMP, AREA, OPS)
  573. #endif
  574. #if 1
  575. #define PRINT_DEBUG_CNAME
  576. #define PRINT_DEBUG_NAME
  577. #else
  578. #define PRINT_DEBUG_CNAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_CNAME)
  579. #define PRINT_DEBUG_NAME if (readenv_atoi("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME)
  580. #endif
  581. #ifdef __cplusplus
  582. }
  583. #endif /* __cplusplus */
  584. #endif