|
|
@@ -60,6 +60,14 @@ VERSION = 0.3.1.dev |
|
|
|
# This flag is always set for POWER8. Don't modify the flag |
|
|
|
# USE_OPENMP = 1 |
|
|
|
|
|
|
|
# The OpenMP scheduler to use - by default this is "static" and you |
|
|
|
# will normally not want to change this unless you know that your main |
|
|
|
# workload will involve tasks that have highly unbalanced running times |
|
|
|
# for individual threads. Changing away from "static" may also adversely |
|
|
|
# affect memory access locality in NUMA systems. Setting to "runtime" will |
|
|
|
# allow you to select the scheduler from the environment variable OMP_SCHEDULE |
|
|
|
# CCOMMON_OPT += -DOMP_SCHED=dynamic |
|
|
|
|
|
|
|
# You can define maximum number of threads. Basically it should be |
|
|
|
# less than actual number of cores. If you don't specify one, it's |
|
|
|
# automatically detected by the the script. |
|
|
@@ -156,8 +164,11 @@ NO_AFFINITY = 1 |
|
|
|
# CONSISTENT_FPCSR = 1 |
|
|
|
|
|
|
|
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute |
|
|
|
# with single thread. You can use this flag to avoid the overhead of multi-threading |
|
|
|
# in small matrix sizes. The default value is 4. |
|
|
|
# with single thread. (Actually in recent versions this is a factor proportional to the |
|
|
|
# number of floating point operations necessary for the given problem size, no longer |
|
|
|
# an individual dimension). You can use this setting to avoid the overhead of multi- |
|
|
|
# threading in small matrix sizes. The default value is 4, but values as high as 50 have |
|
|
|
# been reported to be optimal for certain workloads (50 is the recommended value for Julia). |
|
|
|
# GEMM_MULTITHREAD_THRESHOLD = 4 |
|
|
|
|
|
|
|
# If you need santy check by comparing reference BLAS. It'll be very |
|
|
|