You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

Makefile.rule 14 kB

6 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. #
  2. # Beginning of user configuration
  3. #
  4. # This library's version
  5. VERSION = 0.3.29.dev
  6. # If you set this prefix, the library name will be lib$(LIBNAMESUFFIX)openblas.a
  7. # and lib$(LIBNAMESUFFIX)openblas.so, with a matching soname in the shared library
  8. #
  9. # LIBNAMEPREFIX = scipy
  10. # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
  11. # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
  12. # is libopenblas_$(LIBNAMESUFFIX).so.0.
  13. # LIBNAMESUFFIX = omp
  14. # You can specify the target architecture, otherwise it's
  15. # automatically detected.
  16. # TARGET = PENRYN
  17. # If you want to support multiple architecture in one binary
  18. # DYNAMIC_ARCH = 1
  19. # If you want the full list of x86_64 architectures supported in DYNAMIC_ARCH
  20. # mode (including individual optimizied codes for PENRYN, DUNNINGTON, OPTERON,
  21. # OPTERON_SSE3, ATOM and NANO rather than fallbacks to older architectures)
  22. # DYNAMIC_OLDER = 1
  23. # C compiler including binary type(32bit / 64bit). Default is gcc.
  24. # Don't use Intel Compiler or PGI, it won't generate right codes as I expect.
  25. # CC = gcc
  26. # Fortran compiler. Default is g77.
  27. # FC = gfortran
  28. # Even you can specify cross compiler. Meanwhile, please set HOSTCC.
  29. # cross compiler for Windows
  30. # CC = x86_64-w64-mingw32-gcc
  31. # FC = x86_64-w64-mingw32-gfortran
  32. # cross compiler for 32bit ARM
  33. # CC = arm-linux-gnueabihf-gcc
  34. # FC = arm-linux-gnueabihf-gfortran
  35. # cross compiler for 64bit ARM
  36. # CC = aarch64-linux-gnu-gcc
  37. # FC = aarch64-linux-gnu-gfortran
  38. # If you use the cross compiler, please set this host compiler.
  39. # HOSTCC = gcc
  40. # If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
  41. # Please note that AVX is not available on 32-bit.
  42. # Setting BINARY=32 disables AVX/AVX2/AVX-512.
  43. # BINARY=64
  44. # About threaded BLAS. It will be automatically detected if you don't
  45. # specify it.
  46. # For force setting for single threaded, specify USE_THREAD = 0
  47. # For force setting for multi threaded, specify USE_THREAD = 1
  48. # USE_THREAD = 0
  49. # If you want to build a single-threaded OpenBLAS, but expect to call this
  50. # from several concurrent threads in some other program, comment this in for
  51. # thread safety. (This is done automatically for USE_THREAD=1 , and should not
  52. # be necessary when USE_OPENMP=1)
  53. # USE_LOCKING = 1
  54. # If you're going to use this library with OpenMP, please comment it in.
  55. # This flag is always set for POWER8. Don't set USE_OPENMP = 0 if you're targeting POWER8.
  56. # USE_OPENMP = 1
  57. # The OpenMP scheduler to use - by default this is "static" and you
  58. # will normally not want to change this unless you know that your main
  59. # workload will involve tasks that have highly unbalanced running times
  60. # for individual threads. Changing away from "static" may also adversely
  61. # affect memory access locality in NUMA systems. Setting to "runtime" will
  62. # allow you to select the scheduler from the environment variable OMP_SCHEDULE
  63. # CCOMMON_OPT += -DOMP_SCHED=dynamic
  64. # You can define the maximum number of threads. Basically it should be less
  65. # than or equal to the number of CPU threads. If you don't specify one, it's
  66. # automatically detected by the build system.
  67. # If SMT (aka. HT) is enabled on the system, it may or may not be beneficial to
  68. # restrict NUM_THREADS to the number of physical cores. By default, the automatic
  69. # detection includes logical CPUs, thus allowing the use of SMT.
  70. # Users may opt at runtime to use less than NUM_THREADS threads.
  71. #
  72. # Note for package maintainers: you can build OpenBLAS with a large NUM_THREADS
  73. # value (eg. 32-256) if you expect your users to use that many threads. Due to the way
  74. # some internal structures are allocated, using a large NUM_THREADS value has a RAM
  75. # footprint penalty, even if users reduce the actual number of threads at runtime.
  76. # NUM_THREADS = 24
  77. # If you have enabled USE_OPENMP and your application would call
  78. # OpenBLAS's calculation API from multiple threads, please comment this in.
  79. # This flag defines how many instances of OpenBLAS's calculation API can actually
  80. # run in parallel. If more than NUM_PARALLEL threads call OpenBLAS's calculation API,
  81. # they need to wait for the preceding API calls to finish or risk data corruption.
  82. # NUM_PARALLEL = 2
  83. # When multithreading, OpenBLAS needs to use a memory buffer for communicating
  84. # and collating results for individual subranges of the original matrix. Since
  85. # the original GotoBLAS of the early 2000s, the default size of this buffer has
  86. # been set at a value of 32<<20 (which is 32MB) on x86_64 , twice that on PPC.
  87. # If you expect to handle large problem sizes (beyond about 30000x30000) uncomment
  88. # this line and adjust the (32<<n) factor if necessary. Usually an insufficient value
  89. # manifests itself as a crash in the relevant scal kernel (sscal_k, dscal_k etc)
  90. # BUFFERSIZE = 25
  91. # If you don't need to install the static library, please comment this in.
  92. # NO_STATIC = 1
  93. # If you don't need to generate the shared library, please comment this in.
  94. # NO_SHARED = 1
  95. # If you don't need the CBLAS interface, please comment this in.
  96. # NO_CBLAS = 1
  97. # If you only want the CBLAS interface without installing a Fortran compiler,
  98. # please comment this in.
  99. # ONLY_CBLAS = 1
  100. # If you don't need LAPACK, please comment this in.
  101. # If you set NO_LAPACK=1, the build system automatically sets NO_LAPACKE=1.
  102. # NO_LAPACK = 1
  103. # If you don't need LAPACKE (C Interface to LAPACK), please comment this in.
  104. # NO_LAPACKE = 1
  105. # Build LAPACK Deprecated functions since LAPACK 3.6.0
  106. BUILD_LAPACK_DEPRECATED = 1
  107. # The variable type assumed for the length of character arguments when passing
  108. # data between Fortran LAPACK and C BLAS (defaults to "size_t", but older GCC
  109. # versions used "int"). Mismatches will not cause runtime failures but may result
  110. # in build warnings or errors when building with link-time optimization (LTO)
  111. # LAPACK_STRLEN=int
  112. # Build RecursiveLAPACK on top of LAPACK
  113. # BUILD_RELAPACK = 1
  114. # Have RecursiveLAPACK actually replace standard LAPACK routines instead of
  115. # just adding its equivalents with a RELAPACK_ prefix
  116. # RELAPACK_REPLACE = 1
  117. # If you want to use the legacy threaded Level 3 implementation.
  118. # USE_SIMPLE_THREADED_LEVEL3 = 1
  119. # If you want to use the new, still somewhat experimental code that uses
  120. # thread-local storage instead of a central memory buffer in memory.c
  121. # Note that if your system uses GLIBC, it needs to have at least glibc 2.21
  122. # for this to work.
  123. # USE_TLS = 1
  124. # If you want to drive whole 64bit region by BLAS. Not all Fortran
  125. # compilers support this. It's safe to keep this commented out if you
  126. # are not sure. (This is equivalent to the "-i8" ifort option).
  127. # INTERFACE64 = 1
  128. # Unfortunately most of kernel won't give us high quality buffer.
  129. # BLAS tries to find the best region before entering main function,
  130. # but it will consume time. If you don't like it, you can disable one.
  131. NO_WARMUP = 1
  132. # Comment this in if you want to disable OpenBLAS's CPU/Memory affinity handling.
  133. # This feature is only implemented on Linux, and is always disabled on other platforms.
  134. # Enabling affinity handling may improve performance, especially on NUMA systems, but
  135. # it may conflict with certain applications that also try to manage affinity.
  136. # This conflict can result in threads of the application calling OpenBLAS ending up locked
  137. # to the same core(s) as OpenBLAS, possibly binding all threads to a single core.
  138. # For this reason, affinity handling is disabled by default. Can be safely enabled if nothing
  139. # else modifies affinity settings.
  140. # Note: enabling affinity has been known to cause problems with NumPy and R
  141. NO_AFFINITY = 1
  142. # If you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
  143. # BIGNUMA = 1
  144. # If you are compiling for an embedded system ("bare metal") like Cortex M series
  145. # Note that you will have to provide implementations of malloc() and free() in this case
  146. # EMBEDDED = 1
  147. # Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
  148. # and OS. However, the performance is low.
  149. # NO_AVX = 1
  150. # Don't use Haswell optimizations if binutils is too old (e.g. RHEL6)
  151. # NO_AVX2 = 1
  152. # Don't use SkylakeX optimizations if binutils or compiler are too old (the build
  153. # system will try to determine this automatically)
  154. # NO_AVX512 = 1
  155. # Don't use parallel make.
  156. # NO_PARALLEL_MAKE = 1
  157. # Force number of make jobs. The default is the number of logical CPU of the host.
  158. # This is particularly useful when using distcc.
  159. # A negative value will disable adding a -j flag to make, allowing to use a parent
  160. # make -j value. This is useful to call OpenBLAS make from an other project
  161. # makefile
  162. # MAKE_NB_JOBS = 2
  163. # If you would like to know minute performance report of GotoBLAS.
  164. # FUNCTION_PROFILE = 1
  165. # Support for IEEE quad precision(it's *real* REAL*16)( under testing)
  166. # This option should not be used - it is a holdover from unfinished code present
  167. # in the original GotoBLAS2 library that may be usable as a starting point but
  168. # is not even expected to compile in its present form.
  169. # QUAD_PRECISION = 1
  170. # Theads are still working for a while after finishing BLAS operation
  171. # to reduce thread activate/deactivate overhead. You can determine
  172. # time out to improve performance. This number should be from 4 to 30
  173. # which corresponds to (1 << n) cycles. For example, if you set to 26,
  174. # thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz
  175. # system). Also you can control this number by THREAD_TIMEOUT
  176. # CCOMMON_OPT += -DTHREAD_TIMEOUT=26
  177. # Using special device driver for mapping physically contiguous memory
  178. # to the user space. If bigphysarea is enabled, it will use it.
  179. # DEVICEDRIVER_ALLOCATION = 1
  180. # Use large page allocation (called hugepage support in Linux context)
  181. # for the thread buffers (with access by shared memory operations)
  182. # HUGETLB_ALLOCATION = 1
  183. # Use large page allocation called hugepages in Linux) based on mmap accessing
  184. # a memory-backed pseudofile (requires hugetlbfs to be mounted in the system,
  185. # the example below has it mounted on /hugepages. OpenBLAS will create the backing
  186. # file as gotoblas.processid in that path)
  187. # HUGETLBFILE_ALLOCATION = /hugepages
  188. # If you need to synchronize FP CSR between threads (for x86/x86_64 and aarch64 only).
  189. # CONSISTENT_FPCSR = 1
  190. # If any gemm argument m, n or k is less or equal this threshold, gemm will be execute
  191. # with single thread. (Actually in recent versions this is a factor proportional to the
  192. # number of floating point operations necessary for the given problem size, no longer
  193. # an individual dimension). You can use this setting to avoid the overhead of multi-
  194. # threading in small matrix sizes. The default value is 4, but values as high as 50 have
  195. # been reported to be optimal for certain workloads (50 is the recommended value for Julia).
  196. # GEMM_MULTITHREAD_THRESHOLD = 4
  197. # If you need sanity check by comparing results to reference BLAS. It'll be very
  198. # slow (Not implemented yet).
  199. # SANITY_CHECK = 1
  200. # The installation directory.
  201. # PREFIX = /opt/OpenBLAS
  202. # Common Optimization Flag;
  203. # The default -O2 is enough.
  204. # Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT
  205. # COMMON_OPT = -O2
  206. # gfortran option for LAPACK to improve thread-safety
  207. # It is enabled by default in Makefile.system for gfortran
  208. # Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT
  209. # FCOMMON_OPT = -frecursive
  210. # Profiling flags
  211. COMMON_PROF = -pg
  212. # Build Debug version
  213. # DEBUG = 1
  214. # Set maximum stack allocation.
  215. # The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV
  216. # performance. For details, https://github.com/xianyi/OpenBLAS/pull/482
  217. #
  218. # MAX_STACK_ALLOC = 0
  219. # Add a prefix or suffix to all exported symbol names in the shared library.
  220. # Avoid conflicts with other BLAS libraries, especially when using
  221. # 64 bit integer interfaces in OpenBLAS.
  222. # For details, https://github.com/xianyi/OpenBLAS/pull/459
  223. #
  224. # The same prefix and suffix are also added to the library name,
  225. # i.e. you get lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) rather than libopenblas
  226. #
  227. # SYMBOLPREFIX=
  228. # SYMBOLSUFFIX=
  229. # Run a C++ based thread safety tester after the build is done.
  230. # This is mostly intended as a developer feature to spot regressions, but users and
  231. # package maintainers can enable this if they have doubts about the thread safety of
  232. # the library, given the configuration in this file.
  233. # By default, the thread safety tester launches 52 concurrent calculations at the same
  234. # time.
  235. #
  236. # Please note that the test uses ~1300 MiB of RAM for the DGEMM test.
  237. #
  238. # The test requires CBLAS to be built, a C++11 capable compiler and the presence of
  239. # an OpenMP implementation. If you are cross-compiling this test will probably not
  240. # work at all.
  241. #
  242. # CPP_THREAD_SAFETY_TEST = 1
  243. #
  244. # use this to run only the less memory-hungry GEMV test
  245. # CPP_THREAD_SAFETY_GEMV = 1
  246. # If you want to enable the experimental BFLOAT16 support
  247. # BUILD_BFLOAT16 = 1
  248. # Set the thread number threshold beyond which the job array for the threaded level3 BLAS
  249. # will be allocated on the heap rather than the stack. (This array alone requires
  250. # NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu
  251. # counts, but obviously it is not the only item that ends up on the stack.
  252. # The default value of 32 ensures that the overall requirement is compatible
  253. # with the default 1MB stacksize imposed by having the Java VM loaded without use
  254. # of its -Xss parameter.
  255. # The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible
  256. # with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java
  257. # VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code
  258. # BLAS3_MEM_ALLOC_THRESHOLD = 160
  259. # By default the library contains BLAS functions (and LAPACK if selected) for all input types.
  260. # To build a smaller library supporting e.g. only single precision real (SGEMM etc.) or only
  261. # the functions for complex numbers, uncomment the desired type(s) below
  262. # BUILD_SINGLE = 1
  263. # BUILD_DOUBLE = 1
  264. # BUILD_COMPLEX = 1
  265. # BUILD_COMPLEX16 = 1
  266. #
  267. # End of user configuration
  268. #