Added loop to build objects with different KERNEL defines.tags/v0.2.15^2
@@ -0,0 +1,110 @@ | |||
# helper functions for the kernel CMakeLists.txt | |||
# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file. | |||
macro(SetDefaultL1) | |||
set(SAMAXKERNEL amax.S) | |||
set(DAMAXKERNEL amax.S) | |||
set(QAMAXKERNEL amax.S) | |||
set(CAMAXKERNEL zamax.S) | |||
set(ZAMAXKERNEL zamax.S) | |||
set(XAMAXKERNEL zamax.S) | |||
set(SAMINKERNEL amin.S) | |||
set(DAMINKERNEL amin.S) | |||
set(QAMINKERNEL amin.S) | |||
set(CAMINKERNEL zamin.S) | |||
set(ZAMINKERNEL zamin.S) | |||
set(XAMINKERNEL zamin.S) | |||
set(SMAXKERNEL max.S) | |||
set(DMAXKERNEL max.S) | |||
set(QMAXKERNEL max.S) | |||
set(SMINKERNEL min.S) | |||
set(DMINKERNEL min.S) | |||
set(QMINKERNEL min.S) | |||
set(ISAMAXKERNEL iamax.S) | |||
set(IDAMAXKERNEL iamax.S) | |||
set(IQAMAXKERNEL iamax.S) | |||
set(ICAMAXKERNEL izamax.S) | |||
set(IZAMAXKERNEL izamax.S) | |||
set(IXAMAXKERNEL izamax.S) | |||
set(ISAMINKERNEL iamin.S) | |||
set(IDAMINKERNEL iamin.S) | |||
set(IQAMINKERNEL iamin.S) | |||
set(ICAMINKERNEL izamin.S) | |||
set(IZAMINKERNEL izamin.S) | |||
set(IXAMINKERNEL izamin.S) | |||
set(ISMAXKERNEL iamax.S) | |||
set(IDMAXKERNEL iamax.S) | |||
set(IQMAXKERNEL iamax.S) | |||
set(ISMINKERNEL iamin.S) | |||
set(IDMINKERNEL iamin.S) | |||
set(IQMINKERNEL iamin.S) | |||
set(SASUMKERNEL asum.S) | |||
set(DASUMKERNEL asum.S) | |||
set(CASUMKERNEL zasum.S) | |||
set(ZASUMKERNEL zasum.S) | |||
set(QASUMKERNEL asum.S) | |||
set(XASUMKERNEL zasum.S) | |||
set(SAXPYKERNEL axpy.S) | |||
set(DAXPYKERNEL axpy.S) | |||
set(CAXPYKERNEL zaxpy.S) | |||
set(ZAXPYKERNEL zaxpy.S) | |||
set(QAXPYKERNEL axpy.S) | |||
set(XAXPYKERNEL zaxpy.S) | |||
set(SCOPYKERNEL copy.S) | |||
set(DCOPYKERNEL copy.S) | |||
set(CCOPYKERNEL zcopy.S) | |||
set(ZCOPYKERNEL zcopy.S) | |||
set(QCOPYKERNEL copy.S) | |||
set(XCOPYKERNEL zcopy.S) | |||
set(SDOTKERNEL dot.S) | |||
set(DDOTKERNEL dot.S) | |||
set(CDOTKERNEL zdot.S) | |||
set(ZDOTKERNEL zdot.S) | |||
set(QDOTKERNEL dot.S) | |||
set(XDOTKERNEL zdot.S) | |||
set(SNRM2KERNEL nrm2.S) | |||
set(DNRM2KERNEL nrm2.S) | |||
set(QNRM2KERNEL nrm2.S) | |||
set(CNRM2KERNEL znrm2.S) | |||
set(ZNRM2KERNEL znrm2.S) | |||
set(XNRM2KERNEL znrm2.S) | |||
set(SROTKERNEL rot.S) | |||
set(DROTKERNEL rot.S) | |||
set(QROTKERNEL rot.S) | |||
set(CROTKERNEL zrot.S) | |||
set(ZROTKERNEL zrot.S) | |||
set(XROTKERNEL zrot.S) | |||
set(SSCALKERNEL scal.S) | |||
set(DSCALKERNEL scal.S) | |||
set(CSCALKERNEL zscal.S) | |||
set(ZSCALKERNEL zscal.S) | |||
set(QSCALKERNEL scal.S) | |||
set(XSCALKERNEL zscal.S) | |||
set(SSWAPKERNEL swap.S) | |||
set(DSWAPKERNEL swap.S) | |||
set(CSWAPKERNEL zswap.S) | |||
set(ZSWAPKERNEL zswap.S) | |||
set(QSWAPKERNEL swap.S) | |||
set(XSWAPKERNEL zswap.S) | |||
set(SGEMVNKERNEL gemv_n.S) | |||
set(SGEMVTKERNEL gemv_t.S) | |||
set(DGEMVNKERNEL gemv_n.S) | |||
set(DGEMVTKERNEL gemv_t.S) | |||
set(CGEMVNKERNEL zgemv_n.S) | |||
set(CGEMVTKERNEL zgemv_t.S) | |||
set(ZGEMVNKERNEL zgemv_n.S) | |||
set(ZGEMVTKERNEL zgemv_t.S) | |||
set(QGEMVNKERNEL gemv_n.S) | |||
set(QGEMVTKERNEL gemv_t.S) | |||
set(XGEMVNKERNEL zgemv_n.S) | |||
set(XGEMVTKERNEL zgemv_t.S) | |||
set(SCABS_KERNEL cabs.S) | |||
set(DCABS_KERNEL cabs.S) | |||
set(QCABS_KERNEL cabs.S) | |||
set(LSAME_KERNEL lsame.S) | |||
set(SAXPBYKERNEL ../arm/axpby.c) | |||
set(DAXPBYKERNEL ../arm/axpby.c) | |||
set(CAXPBYKERNEL ../arm/zaxpby.c) | |||
set(ZAXPBYKERNEL ../arm/zaxpby.c) | |||
endmacro () |
@@ -12,6 +12,27 @@ function(ParseGetArchVars GETARCH_IN) | |||
endforeach () | |||
endfunction () | |||
# Reads a Makefile into CMake vars. | |||
# TODO: read nested Makefiles (I think 1 level should do) | |||
# TODO: respect IFDEF/IFNDEF? | |||
# TODO: regex replace makefile vars, e.g. $(TSUFFIX) is set to the target arch in the var CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
# TODO: bail when makefile is missing, like -include | |||
function(ParseMakefileVars MAKEFILE_IN) | |||
message(STATUS "Reading vars from ${MAKEFILE_IN}...") | |||
file(STRINGS ${MAKEFILE_IN} makefile_contents) | |||
foreach (makefile_line ${makefile_contents}) | |||
string(REGEX MATCH "([0-9_a-zA-Z]+)[ \t]*=[ \t]*(.+)$" line_match "${makefile_line}") | |||
if (NOT "${line_match}" STREQUAL "") | |||
set(var_name ${CMAKE_MATCH_1}) | |||
set(var_value ${CMAKE_MATCH_2}) | |||
set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE) | |||
message(STATUS "found var ${var_name} = ${var_value}") | |||
else () | |||
message(STATUS "couldn't parse ${makefile_line} into a var") | |||
endif () | |||
endforeach () | |||
endfunction () | |||
# Returns all combinations of the input list, as a list with colon-separated combinations | |||
# E.g. input of A B C returns A B C A:B A:C B:C | |||
# N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")). | |||
@@ -75,6 +96,7 @@ endfunction () | |||
# 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE) | |||
# 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX) | |||
# 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c) | |||
# STRING - compiles only the given type (e.g. DOUBLE) | |||
function(GenerateNamedObjects sources_in) | |||
if (DEFINED ARGV1) | |||
@@ -105,6 +127,12 @@ function(GenerateNamedObjects sources_in) | |||
set(no_float_type false) | |||
endif () | |||
if (no_float_type) | |||
set(float_list "DUMMY") # still need to loop once | |||
else () | |||
set(float_list "${FLOAT_TYPES}") | |||
endif () | |||
set(real_only false) | |||
set(complex_only false) | |||
set(mangle_complex_sources false) | |||
@@ -115,20 +143,17 @@ function(GenerateNamedObjects sources_in) | |||
set(complex_only true) | |||
elseif (${ARGV7} EQUAL 3) | |||
set(mangle_complex_sources true) | |||
elseif (NOT ${ARGV7} EQUAL 0) | |||
set(float_list ${ARGV7}) | |||
endif () | |||
endif () | |||
if (no_float_type) | |||
set(float_list "DUMMY") # still need to loop once | |||
else () | |||
set(float_list "${FLOAT_TYPES}") | |||
if (complex_only) | |||
list(REMOVE_ITEM float_list "SINGLE") | |||
list(REMOVE_ITEM float_list "DOUBLE") | |||
elseif (real_only) | |||
list(REMOVE_ITEM float_list "COMPLEX") | |||
list(REMOVE_ITEM float_list "ZCOMPLEX") | |||
endif () | |||
if (complex_only) | |||
list(REMOVE_ITEM float_list "SINGLE") | |||
list(REMOVE_ITEM float_list "DOUBLE") | |||
elseif (real_only) | |||
list(REMOVE_ITEM float_list "COMPLEX") | |||
list(REMOVE_ITEM float_list "ZCOMPLEX") | |||
endif () | |||
set(OBJ_LIST_OUT "") | |||
@@ -28,15 +28,15 @@ set(NU_SOURCES | |||
) | |||
# objects that need LOWER set | |||
GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1) | |||
GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3) | |||
# objects that need TRANSA and UNIT set | |||
# N.B. BLAS wants to put the U/L from the filename in the *MIDDLE* because of course why not have a different naming scheme for every single object -hpa | |||
GenerateCombinationObjects("${NU_SOURCES}" "TRANSA;UNIT" "N;N" "" 3) | |||
GenerateCombinationObjects("${NU_SOURCES}" "TRANSA;UNIT" "N;N" "" 3 "" "" 3) | |||
# gbmv uses a lowercase n and t. WHY? WHO KNOWS! | |||
GenerateNamedObjects("gbmv_k.c" "" "gbmv_n") | |||
GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t") | |||
GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3) | |||
GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3) | |||
if (SMP) | |||
@@ -1,67 +1,68 @@ | |||
include_directories(${CMAKE_SOURCE_DIR}) | |||
include("${CMAKE_SOURCE_DIR}/cmake/kernel.cmake") | |||
# Makeflie | |||
if (DEFINED TARGET_CORE) | |||
#override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) | |||
set(BUILD_KERNEL 1) | |||
set(KDIR "") | |||
set(TSUFFIX "_${TARGET_CORE}") | |||
else () | |||
set(TARGET_CORE ${CORE}) | |||
set(KDIR "") | |||
set(TSUFFIX "") | |||
endif () | |||
SetDefaultL1() | |||
#-include $(KERNELDIR)/KERNEL.$(TARGET_CORE) | |||
#include $(KERNELDIR)/KERNEL | |||
ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") | |||
ParseMakefileVars("${KERNELDIR}/KERNEL") | |||
if (${ARCH} STREQUAL "x86") | |||
GenerateNamedObjects("${KERNELDIR}/cpuid.S" "") | |||
endif () | |||
# TODO: Read from ${KERNELDIR}/KERNEL - some architectures use a different lsame | |||
set(LSAME_KERNEL lsame.S) | |||
set(SCABS_KERNEL cabs.S) | |||
set(DCABS_KERNEL cabs.S) | |||
# don't use float type name mangling here | |||
GenerateNamedObjects("${KERNELDIR}/${LSAME_KERNEL}" "F_INTERFACE" "lsame" 0 "" "" 1) | |||
GenerateNamedObjects("${KERNELDIR}/${SCABS_KERNEL}" "COMPLEX;F_INTERFACE" "scabs1" "" "" 1) | |||
GenerateNamedObjects("${KERNELDIR}/${DCABS_KERNEL}" "DOUBLE;COMPLEX;F_INTERFACE" "dcabs1" 0 "" "" 1) | |||
GenerateNamedObjects("${KERNELDIR}/${LSAME_KERNEL}" "F_INTERFACE" "lsame" false "" "" true) | |||
GenerateNamedObjects("${KERNELDIR}/${SCABS_KERNEL}" "COMPLEX;F_INTERFACE" "scabs1" false "" "" true) | |||
GenerateNamedObjects("${KERNELDIR}/${DCABS_KERNEL}" "DOUBLE;COMPLEX;F_INTERFACE" "dcabs1" false "" "" true) | |||
# Makefile.L1 | |||
# TODO: need to read ${KERNELDIR}/KERNEL into CMake vars | |||
set(DAMAXKERNEL amax.S) | |||
set(DAMINKERNEL amax.S) | |||
set(DMAXKERNEL amax.S) | |||
set(DMINKERNEL amax.S) | |||
set(IDAMAXKERNEL iamax.S) | |||
set(IDAMINKERNEL iamax.S) | |||
set(IDMAXKERNEL iamax.S) | |||
set(IDMINKERNEL iamax.S) | |||
set(DASUMKERNEL asum.S) | |||
set(DAXPYKERNEL axpy.S) | |||
set(DCOPYKERNEL copy.S) | |||
set(DDOTKERNEL dot.S) | |||
set(DNRM2KERNEL nrm2.S) | |||
set(DROTKERNEL rot.S) | |||
set(DSCALKERNEL scal.S) | |||
set(DSWAPKERNEL swap.S) | |||
set(DAXPBYKERNEL ../arm/axpby.c) | |||
GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k") | |||
GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k") | |||
GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k") | |||
GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "" "min_k") | |||
GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k") | |||
GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k") | |||
GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k") | |||
GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "" "i*min_k") | |||
GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k") | |||
GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k") | |||
GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k") | |||
GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k") | |||
GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k") | |||
GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k") | |||
GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k") | |||
GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k") | |||
GenerateNamedObjects("${KERNELDIR}/${DAXPBYKERNEL}" "" "axpby_k") | |||
foreach (float_type ${FLOAT_TYPES}) | |||
# a bit of metaprogramming here to pull out the appropriate KERNEL var | |||
string(SUBSTRING ${float_type} 0 1 float_char) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}MAXKERNEL}" "" "max_k" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}MINKERNEL}" "" "min_k" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${I${float_char}MAXKERNEL}" "" "i*max_k" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${I${float_char}MINKERNEL}" "" "i*min_k" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}ASUMKERNEL}" "" "asum_k" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "" "axpy_k" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}COPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}NRM2KERNEL}" "" "nrm2_k" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "rot_k" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}SCALKERNEL}" "" "scal_k" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}SWAPKERNEL}" "" "swap_k" false "" "" false ${float_type}) | |||
GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPBYKERNEL}" "" "axpby_k" false "" "" false ${float_type}) | |||
endforeach () | |||
# Makefile.L2 | |||
GenerateNamedObjects("${KERNELDIR}/gemv_n.S" "DOUBLE") | |||
GenerateNamedObjects("${KERNELDIR}/gemv_t.S" "TRANS") | |||
GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1) | |||
GenerateNamedObjects("generic/ger.c" "" "ger_k") | |||
GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) | |||
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) | |||
# Makefile.L3 | |||
@@ -77,7 +78,7 @@ set(DGEMMITCOPYOBJ gemm_itcopy) | |||
set(DGEMMONCOPYOBJ gemm_oncopy) | |||
set(DGEMMOTCOPYOBJ gemm_otcopy) | |||
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel") | |||
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" "" 3) | |||
if (DGEMMINCOPY) | |||
GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "" "${DGEMMINCOPYOBJ}") | |||