@@ -70,7 +70,7 @@ endfunction () | |||||
# @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU) | # @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU) | ||||
# @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters) | # @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters) | ||||
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) | # @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) | ||||
# @param complex_only/real_only some routines have separate source files for complex and non-complex float types. | |||||
# @param complex_filename_scheme some routines have separate source files for complex and non-complex float types. | |||||
# 0 - compiles for all types | # 0 - compiles for all types | ||||
# 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE) | # 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE) | ||||
# 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX) | # 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX) | ||||
@@ -88,7 +88,7 @@ function(GenerateNamedObjects sources_in) | |||||
if (DEFINED ARGV3) | if (DEFINED ARGV3) | ||||
set(use_cblas ${ARGV3}) | set(use_cblas ${ARGV3}) | ||||
else () | else () | ||||
set(use_cblas 0) | |||||
set(use_cblas false) | |||||
endif () | endif () | ||||
if (DEFINED ARGV4) | if (DEFINED ARGV4) | ||||
@@ -108,7 +108,7 @@ function(GenerateNamedObjects sources_in) | |||||
set(real_only false) | set(real_only false) | ||||
set(complex_only false) | set(complex_only false) | ||||
set(mangle_complex_sources false) | set(mangle_complex_sources false) | ||||
if (DEFINED ARGV7) | |||||
if (DEFINED ARGV7 AND NOT "${ARGV7}" STREQUAL "") | |||||
if (${ARGV7} EQUAL 1) | if (${ARGV7} EQUAL 1) | ||||
set(real_only true) | set(real_only true) | ||||
elseif (${ARGV7} EQUAL 2) | elseif (${ARGV7} EQUAL 2) | ||||
@@ -204,6 +204,7 @@ endfunction () | |||||
# If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel | # If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel | ||||
# @param alternate_name replaces the source name as the object name (define codes are still appended) | # @param alternate_name replaces the source name as the object name (define codes are still appended) | ||||
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) | # @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) | ||||
# @param complex_filename_scheme see GenerateNamedObjects | |||||
function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme) | function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme) | ||||
if (DEFINED ARGV5) | if (DEFINED ARGV5) | ||||
@@ -214,6 +215,10 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_de | |||||
set(no_float_type ${ARGV6}) | set(no_float_type ${ARGV6}) | ||||
endif () | endif () | ||||
if (DEFINED ARGV7) | |||||
set(complex_filename_scheme ${ARGV7}) | |||||
endif () | |||||
AllCombinations("${defines_in}" "${absent_codes_in}") | AllCombinations("${defines_in}" "${absent_codes_in}") | ||||
set(define_combos ${LIST_OUT}) | set(define_combos ${LIST_OUT}) | ||||
set(define_codes ${CODES_OUT}) | set(define_codes ${CODES_OUT}) | ||||
@@ -271,7 +276,7 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_de | |||||
endif () | endif () | ||||
endif () | endif () | ||||
GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" 0 "${replace_code}" "${append_code}" "${no_float_type}") | |||||
GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" 0 "${replace_code}" "${append_code}" "${no_float_type}" "${complex_filename_scheme}") | |||||
list(APPEND COMBO_OBJ_LIST_OUT "${OBJ_LIST_OUT}") | list(APPEND COMBO_OBJ_LIST_OUT "${OBJ_LIST_OUT}") | ||||
endforeach () | endforeach () | ||||
endforeach () | endforeach () | ||||
@@ -31,6 +31,7 @@ set(BLAS2_SOURCES | |||||
tpsv.c tpmv.c | tpsv.c tpmv.c | ||||
) | ) | ||||
# these do not have separate 'z' sources | |||||
set(BLAS3_SOURCES | set(BLAS3_SOURCES | ||||
gemm.c symm.c | gemm.c symm.c | ||||
trsm.c syrk.c syr2k.c | trsm.c syrk.c syr2k.c | ||||
@@ -53,12 +54,19 @@ endif () | |||||
foreach (CBLAS_FLAG ${CBLAS_FLAGS}) | foreach (CBLAS_FLAG ${CBLAS_FLAGS}) | ||||
GenerateNamedObjects("${BLAS1_SOURCES}" "" "" ${CBLAS_FLAG}) | |||||
GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" 0 1) | |||||
GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" 0 3) | |||||
GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" 0 3) | |||||
GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG}) | |||||
GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" 0 3) | |||||
# TODO: don't compile complex sources with cblas for now, the naming schemes are all different and they will have to be handled separately from SINGLE/DOUBLE | |||||
set(DISABLE_COMPLEX 0) | |||||
set(MANGLE_COMPLEX 3) | |||||
if (CBLAS_FLAG EQUAL 1) | |||||
set(DISABLE_COMPLEX 1) | |||||
set(MANGLE_COMPLEX 1) | |||||
endif () | |||||
GenerateNamedObjects("${BLAS1_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) | |||||
GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1) | |||||
GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) | |||||
GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) | |||||
GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) | |||||
GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) | |||||
# trmm is trsm with a compiler flag set | # trmm is trsm with a compiler flag set | ||||
GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) | GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) | ||||
@@ -3,29 +3,36 @@ include_directories(${CMAKE_SOURCE_DIR}) | |||||
set(LAPACK_SOURCES | set(LAPACK_SOURCES | ||||
getf2/getf2_k.c | |||||
getrf/getrf_single.c | getrf/getrf_single.c | ||||
potrf/potrf_U_single.c | potrf/potrf_U_single.c | ||||
potrf/potrf_L_single.c | potrf/potrf_L_single.c | ||||
potf2/potf2_U.c | |||||
potf2/potf2_L.c | |||||
lauu2/lauu2_U.c | |||||
lauu2/lauu2_L.c | |||||
lauum/lauum_U_single.c | lauum/lauum_U_single.c | ||||
lauum/lauum_L_single.c | lauum/lauum_L_single.c | ||||
) | ) | ||||
# add a 'z' to filename for complex version | |||||
set(LAPACK_MANGLED_SOURCES | |||||
getf2/getf2_k.c | |||||
lauu2/lauu2_U.c | |||||
lauu2/lauu2_L.c | |||||
potf2/potf2_U.c | |||||
potf2/potf2_L.c | |||||
) | |||||
# sources that need TRANS set | # sources that need TRANS set | ||||
# this has a 'z' version | |||||
set(TRANS_SOURCES | set(TRANS_SOURCES | ||||
getrs/getrs_single.c | getrs/getrs_single.c | ||||
) | ) | ||||
# sources that need UNIT set | # sources that need UNIT set | ||||
# these do NOT have a z version | |||||
set(UNIT_SOURCES | set(UNIT_SOURCES | ||||
trtri/trtri_U_single.c | trtri/trtri_U_single.c | ||||
trtri/trtri_L_single.c | trtri/trtri_L_single.c | ||||
) | ) | ||||
# these have a 'z' version | |||||
set(UNIT_SOURCES2 | set(UNIT_SOURCES2 | ||||
trti2/trti2_U.c | trti2/trti2_U.c | ||||
trti2/trti2_L.c | trti2/trti2_L.c | ||||
@@ -51,6 +58,7 @@ set(ZLAPACK_SOURCES | |||||
) | ) | ||||
GenerateNamedObjects("${LAPACK_SOURCES}") | GenerateNamedObjects("${LAPACK_SOURCES}") | ||||
GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" "" 3) | |||||
# TODO: laswp needs arch specific code | # TODO: laswp needs arch specific code | ||||
GenerateNamedObjects("laswp/generic/laswp_k.c" "" "laswp_plus") | GenerateNamedObjects("laswp/generic/laswp_k.c" "" "laswp_plus") | ||||
@@ -64,40 +72,32 @@ if (SMP) | |||||
set(GETRF_SRC getrf/getrf_parallel.c) | set(GETRF_SRC getrf/getrf_parallel.c) | ||||
endif () | endif () | ||||
# these do not have 'z' versions | |||||
set(PARALLEL_SOURCES | set(PARALLEL_SOURCES | ||||
${GETRF_SRC} | ${GETRF_SRC} | ||||
potrf/potrf_U_parallel.c | |||||
potrf/potrf_L_parallel.c | |||||
lauum/lauum_U_parallel.c | lauum/lauum_U_parallel.c | ||||
lauum/lauum_L_parallel.c | lauum/lauum_L_parallel.c | ||||
potrf/potrf_U_parallel.c | |||||
potrf/potrf_L_parallel.c | |||||
) | ) | ||||
# this has a z version | |||||
list(APPEND TRANS_SOURCES | list(APPEND TRANS_SOURCES | ||||
getrs/getrs_parallel.c | getrs/getrs_parallel.c | ||||
) | ) | ||||
# these do NOT have a z version | |||||
list(APPEND UNIT_SOURCES | list(APPEND UNIT_SOURCES | ||||
trtri/trtri_U_parallel.c | trtri/trtri_U_parallel.c | ||||
trtri/trtri_L_parallel.c | trtri/trtri_L_parallel.c | ||||
) | ) | ||||
set(ZPARALLEL_SOURCES | |||||
${GETRF_SRC} | |||||
getrs/zgetrs_parallel.c | |||||
potrf/potrf_U_parallel.c | |||||
potrf/potrf_L_parallel.c | |||||
lauum/lauum_U_parallel.c | |||||
lauum/lauum_L_parallel.c | |||||
trtri/trtri_U_parallel.c | |||||
trtri/trtri_L_parallel.c | |||||
) | |||||
GenerateNamedObjects("${PARALLEL_SOURCES}") | GenerateNamedObjects("${PARALLEL_SOURCES}") | ||||
endif () | endif () | ||||
GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "" 4) | |||||
GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "" 4 "" "" 3) | |||||
GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "" 4) | GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "" 4) | ||||
GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "" 0) | |||||
GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "" 0 "" "" 3) | |||||
set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS | set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS | ||||