Added HFLOAT16 support for RISCV64 Added shgemm_kernel_8x8 for RISCV64_ZVL128B and shgemm_kernel_16x8 for RISCV64_ZVL256B based on HFLOAT16 The instruction sets used are ZVFH and ZFH, which need to be supported by RVV1.0 Related to issue #5279 Co-authored-by Linjin Li <linjin_li@163.com>pull/5290/head
@@ -152,6 +152,9 @@ endif () | |||||
if (NOT DEFINED BUILD_BFLOAT16) | if (NOT DEFINED BUILD_BFLOAT16) | ||||
set (BUILD_BFLOAT16 false) | set (BUILD_BFLOAT16 false) | ||||
endif () | endif () | ||||
if (NOT DEFINED BUILD_HFLOAT16) | |||||
set (BUILD_HFLOAT16 false) | |||||
endif () | |||||
# set which float types we want to build for | # set which float types we want to build for | ||||
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16) | if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16) | ||||
# if none are defined, build for all | # if none are defined, build for all | ||||
@@ -64,11 +64,11 @@ TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d | |||||
endif | endif | ||||
ifeq ($(TARGET), RISCV64_ZVL256B) | ifeq ($(TARGET), RISCV64_ZVL256B) | ||||
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d | |||||
TARGET_FLAGS = -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | |||||
endif | endif | ||||
ifeq ($(TARGET), RISCV64_ZVL128B) | ifeq ($(TARGET), RISCV64_ZVL128B) | ||||
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d | |||||
TARGET_FLAGS = -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | |||||
endif | endif | ||||
ifeq ($(TARGET), RISCV64_GENERIC) | ifeq ($(TARGET), RISCV64_GENERIC) | ||||
@@ -7,12 +7,12 @@ CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d | |||||
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static | FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static | ||||
endif | endif | ||||
ifeq ($(CORE), RISCV64_ZVL256B) | ifeq ($(CORE), RISCV64_ZVL256B) | ||||
CCOMMON_OPT += -march=rv64imafdcv_zvl256b -mabi=lp64d | |||||
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d | |||||
CCOMMON_OPT += -march=rv64imafdcv_zvl256b_zvfh_zfh -mabi=lp64d | |||||
FCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | |||||
endif | endif | ||||
ifeq ($(CORE), RISCV64_ZVL128B) | ifeq ($(CORE), RISCV64_ZVL128B) | ||||
CCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d | |||||
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d | |||||
CCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | |||||
FCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | |||||
endif | endif | ||||
ifeq ($(CORE), RISCV64_GENERIC) | ifeq ($(CORE), RISCV64_GENERIC) | ||||
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d | CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d | ||||
@@ -308,6 +308,8 @@ COMMON_PROF = -pg | |||||
# If you want to enable the experimental BFLOAT16 support | # If you want to enable the experimental BFLOAT16 support | ||||
# BUILD_BFLOAT16 = 1 | # BUILD_BFLOAT16 = 1 | ||||
# If you want to enable the experimental HFLOAT16 support | |||||
BUILD_HFLOAT16 = 1 | |||||
# Set the thread number threshold beyond which the job array for the threaded level3 BLAS | # Set the thread number threshold beyond which the job array for the threaded level3 BLAS | ||||
# will be allocated on the heap rather than the stack. (This array alone requires | # will be allocated on the heap rather than the stack. (This array alone requires | ||||
@@ -280,6 +280,7 @@ GEMM_GEMV_FORWARD_BF16 = 1 | |||||
endif | endif | ||||
ifeq ($(ARCH), riscv) | ifeq ($(ARCH), riscv) | ||||
GEMM_GEMV_FORWARD = 1 | GEMM_GEMV_FORWARD = 1 | ||||
BUILD_HFLOAT16 = 1 | |||||
endif | endif | ||||
ifeq ($(ARCH), power) | ifeq ($(ARCH), power) | ||||
GEMM_GEMV_FORWARD = 1 | GEMM_GEMV_FORWARD = 1 | ||||
@@ -1547,6 +1548,9 @@ endif | |||||
ifeq ($(BUILD_BFLOAT16), 1) | ifeq ($(BUILD_BFLOAT16), 1) | ||||
CCOMMON_OPT += -DBUILD_BFLOAT16 | CCOMMON_OPT += -DBUILD_BFLOAT16 | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16), 1) | |||||
CCOMMON_OPT += -DBUILD_HFLOAT16 | |||||
endif | |||||
ifeq ($(BUILD_SINGLE), 1) | ifeq ($(BUILD_SINGLE), 1) | ||||
CCOMMON_OPT += -DBUILD_SINGLE=1 | CCOMMON_OPT += -DBUILD_SINGLE=1 | ||||
endif | endif | ||||
@@ -35,6 +35,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define GEMM BLASFUNC(dgemm) | #define GEMM BLASFUNC(dgemm) | ||||
#elif defined(HALF) | #elif defined(HALF) | ||||
#define GEMM BLASFUNC(sbgemm) | #define GEMM BLASFUNC(sbgemm) | ||||
#elif defined(HFLOAT16) | |||||
#define GEMM BLASFUNC(shgemm) | |||||
#else | #else | ||||
#define GEMM BLASFUNC(sgemm) | #define GEMM BLASFUNC(sgemm) | ||||
#endif | #endif | ||||
@@ -446,7 +446,7 @@ void cblas_sbgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum C | |||||
void cblas_sbgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array, | void cblas_sbgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array, | ||||
OPENBLAS_CONST float * alpha_array, OPENBLAS_CONST bfloat16 ** A_array, OPENBLAS_CONST blasint * lda_array, OPENBLAS_CONST bfloat16 ** B_array, OPENBLAS_CONST blasint * ldb_array, OPENBLAS_CONST float * beta_array, float ** C_array, OPENBLAS_CONST blasint * ldc_array, OPENBLAS_CONST blasint group_count, OPENBLAS_CONST blasint * group_size); | OPENBLAS_CONST float * alpha_array, OPENBLAS_CONST bfloat16 ** A_array, OPENBLAS_CONST blasint * lda_array, OPENBLAS_CONST bfloat16 ** B_array, OPENBLAS_CONST blasint * ldb_array, OPENBLAS_CONST float * beta_array, float ** C_array, OPENBLAS_CONST blasint * ldc_array, OPENBLAS_CONST blasint group_count, OPENBLAS_CONST blasint * group_size); | ||||
/*** FLOAT16 extensions */ | |||||
/*** FLOAT16 extensions ***/ | |||||
void cblas_shgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | void cblas_shgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | ||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST hfloat16 *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST hfloat16 *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | OPENBLAS_CONST float alpha, OPENBLAS_CONST hfloat16 *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST hfloat16 *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | ||||
@@ -266,9 +266,12 @@ typedef uint16_t bfloat16; | |||||
#define BFLOAT16CONVERSION 1 | #define BFLOAT16CONVERSION 1 | ||||
#endif | #endif | ||||
#ifndef hfloat16 | |||||
#include <stdint.h> | |||||
typedef uint16_t hfloat16; | |||||
#ifdef BUILD_HFLOAT16 | |||||
#ifndef hfloat16 | |||||
typedef _Float16 hfloat16; | |||||
#endif | |||||
#else | |||||
typedef uint16_t hfloat16; | |||||
#endif | #endif | ||||
#ifdef USE64BITINT | #ifdef USE64BITINT | ||||
@@ -18,6 +18,12 @@ foreach (GEMM_DEFINE ${GEMM_DEFINES}) | |||||
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0 "" "" false "BFLOAT16") | GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0 "" "" false "BFLOAT16") | ||||
endif () | endif () | ||||
endif () | endif () | ||||
if (BUILD_HFLOAT16) | |||||
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0 "" "" false "HFLOAT16") | |||||
if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) | |||||
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0 "" "" false "HFLOAT16") | |||||
endif () | |||||
endif () | |||||
endforeach () | endforeach () | ||||
if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | ||||
@@ -23,6 +23,10 @@ ifeq ($(BUILD_BFLOAT16),1) | |||||
SBBLASOBJS += sbgemm_nn.$(SUFFIX) sbgemm_nt.$(SUFFIX) sbgemm_tn.$(SUFFIX) sbgemm_tt.$(SUFFIX) | SBBLASOBJS += sbgemm_nn.$(SUFFIX) sbgemm_nt.$(SUFFIX) sbgemm_tn.$(SUFFIX) sbgemm_tt.$(SUFFIX) | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16),1) | |||||
SHBLASOBJS += shgemm_nn.$(SUFFIX) shgemm_nt.$(SUFFIX) shgemm_tn.$(SUFFIX) shgemm_tt.$(SUFFIX) | |||||
endif | |||||
SBLASOBJS += \ | SBLASOBJS += \ | ||||
sgemm_nn.$(SUFFIX) sgemm_nt.$(SUFFIX) sgemm_tn.$(SUFFIX) sgemm_tt.$(SUFFIX) \ | sgemm_nn.$(SUFFIX) sgemm_nt.$(SUFFIX) sgemm_tn.$(SUFFIX) sgemm_tt.$(SUFFIX) \ | ||||
strmm_LNUU.$(SUFFIX) strmm_LNUN.$(SUFFIX) strmm_LNLU.$(SUFFIX) strmm_LNLN.$(SUFFIX) \ | strmm_LNUU.$(SUFFIX) strmm_LNUN.$(SUFFIX) strmm_LNLU.$(SUFFIX) strmm_LNLN.$(SUFFIX) \ | ||||
@@ -210,6 +214,9 @@ ifneq ($(USE_SIMPLE_THREADED_LEVEL3), 1) | |||||
ifeq ($(BUILD_BFLOAT16),1) | ifeq ($(BUILD_BFLOAT16),1) | ||||
SBBLASOBJS += sbgemm_thread_nn.$(SUFFIX) sbgemm_thread_nt.$(SUFFIX) sbgemm_thread_tn.$(SUFFIX) sbgemm_thread_tt.$(SUFFIX) | SBBLASOBJS += sbgemm_thread_nn.$(SUFFIX) sbgemm_thread_nt.$(SUFFIX) sbgemm_thread_tn.$(SUFFIX) sbgemm_thread_tt.$(SUFFIX) | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16),1) | |||||
SHBLASOBJS += shgemm_thread_nn.$(SUFFIX) shgemm_thread_nt.$(SUFFIX) shgemm_thread_tn.$(SUFFIX) shgemm_thread_tt.$(SUFFIX) | |||||
endif | |||||
SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX) | SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX) | ||||
DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) | DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) | ||||
QBLASOBJS += qgemm_thread_nn.$(SUFFIX) qgemm_thread_nt.$(SUFFIX) qgemm_thread_tn.$(SUFFIX) qgemm_thread_tt.$(SUFFIX) | QBLASOBJS += qgemm_thread_nn.$(SUFFIX) qgemm_thread_nt.$(SUFFIX) qgemm_thread_tn.$(SUFFIX) qgemm_thread_tt.$(SUFFIX) | ||||
@@ -355,6 +362,18 @@ sbgemm_tn.$(SUFFIX) : gemm.c level3.c ../../param.h | |||||
sbgemm_tt.$(SUFFIX) : gemm.c level3.c ../../param.h | sbgemm_tt.$(SUFFIX) : gemm.c level3.c ../../param.h | ||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F) | $(CC) $(CFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F) | ||||
shgemm_nn.$(SUFFIX) : gemm.c level3.c ../../param.h | |||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F) | |||||
shgemm_nt.$(SUFFIX) : gemm.c level3.c ../../param.h | |||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX -DNT $< -o $(@F) | |||||
shgemm_tn.$(SUFFIX) : gemm.c level3.c ../../param.h | |||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX -DTN $< -o $(@F) | |||||
shgemm_tt.$(SUFFIX) : gemm.c level3.c ../../param.h | |||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX -DTT $< -o $(@F) | |||||
sgemm_nn.$(SUFFIX) : gemm.c level3.c ../../param.h | sgemm_nn.$(SUFFIX) : gemm.c level3.c ../../param.h | ||||
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -UCOMPLEX -DNN $< -o $(@F) | $(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -UCOMPLEX -DNN $< -o $(@F) | ||||
@@ -562,6 +581,18 @@ sbgemm_thread_tn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | |||||
sbgemm_thread_tt.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | sbgemm_thread_tt.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | ||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F) | $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F) | ||||
shgemm_thread_nn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | |||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHFLOAT16 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F) | |||||
shgemm_thread_nt.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | |||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHFLOAT16 -UDOUBLE -UCOMPLEX -DNT $< -o $(@F) | |||||
shgemm_thread_tn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | |||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHFLOAT16 -UDOUBLE -UCOMPLEX -DTN $< -o $(@F) | |||||
shgemm_thread_tt.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | |||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHFLOAT16 -UDOUBLE -UCOMPLEX -DTT $< -o $(@F) | |||||
sgemm_thread_nn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | sgemm_thread_nn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h | ||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F) | $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F) | ||||
@@ -2747,6 +2778,18 @@ sbgemm_tn.$(PSUFFIX) : gemm.c level3.c ../../param.h | |||||
sbgemm_tt.$(PSUFFIX) : gemm.c level3.c ../../param.h | sbgemm_tt.$(PSUFFIX) : gemm.c level3.c ../../param.h | ||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F) | $(CC) $(PFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F) | ||||
shgemm_nn.$(PSUFFIX) : gemm.c level3.c ../../param.h | |||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F) | |||||
shgemm_nt.$(PSUFFIX) : gemm.c level3.c ../../param.h | |||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX -DNT $< -o $(@F) | |||||
shgemm_tn.$(PSUFFIX) : gemm.c level3.c ../../param.h | |||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX -DTN $< -o $(@F) | |||||
shgemm_tt.$(PSUFFIX) : gemm.c level3.c ../../param.h | |||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX -DTT $< -o $(@F) | |||||
sgemm_nn.$(PSUFFIX) : gemm.c level3.c ../../param.h | sgemm_nn.$(PSUFFIX) : gemm.c level3.c ../../param.h | ||||
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -UCOMPLEX -DNN $< -o $(@F) | $(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -UCOMPLEX -DNN $< -o $(@F) | ||||
@@ -2970,6 +3013,18 @@ sbgemm_thread_tn.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | |||||
sbgemm_thread_tt.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | sbgemm_thread_tt.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | ||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F) | $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F) | ||||
shgemm_thread_nn.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | |||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHFLOAT16 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F) | |||||
shgemm_thread_nt.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | |||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHFLOAT16 -UDOUBLE -UCOMPLEX -DNT $< -o $(@F) | |||||
shgemm_thread_tn.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | |||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHFLOAT16 -UDOUBLE -UCOMPLEX -DTN $< -o $(@F) | |||||
shgemm_thread_tt.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | |||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHFLOAT16 -UDOUBLE -UCOMPLEX -DTT $< -o $(@F) | |||||
sgemm_thread_nn.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | sgemm_thread_nn.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h | ||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F) | $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F) | ||||
@@ -218,7 +218,7 @@ mulx.$(SUFFIX) : $(ARCH)/mulx.c | |||||
$(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $(@F) | $(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $(@F) | ||||
detect_riscv64.$(SUFFIX): detect_riscv64.c | detect_riscv64.$(SUFFIX): detect_riscv64.c | ||||
$(CC) $(CFLAGS) -c -march=rv64imafdcv $< -o $(@F) | |||||
$(CC) $(CFLAGS) -c -march=rv64imafdcv_zvfh_zfh $< -o $(@F) | |||||
xerbla.$(PSUFFIX) : xerbla.c | xerbla.$(PSUFFIX) : xerbla.c | ||||
$(CC) $(PFLAGS) -c $< -o $(@F) | $(CC) $(PFLAGS) -c $< -o $(@F) | ||||
@@ -67,6 +67,11 @@ BLASLONG sbgemm_p = DEFAULT_GEMM_P; | |||||
#else | #else | ||||
BLASLONG sbgemm_p = SBGEMM_P; | BLASLONG sbgemm_p = SBGEMM_P; | ||||
#endif | #endif | ||||
#if SHGEMM_P == shgemm_p | |||||
BLASLONG shgemm_p = DEFAULT_GEMM_P; | |||||
#else | |||||
BLASLONG shgemm_p = SHGEMM_P; | |||||
#endif | |||||
#if SGEMM_P == sgemm_p | #if SGEMM_P == sgemm_p | ||||
BLASLONG sgemm_p = DEFAULT_GEMM_P; | BLASLONG sgemm_p = DEFAULT_GEMM_P; | ||||
#else | #else | ||||
@@ -93,6 +98,11 @@ BLASLONG sbgemm_q = DEFAULT_GEMM_Q; | |||||
#else | #else | ||||
BLASLONG sbgemm_q = SBGEMM_Q; | BLASLONG sbgemm_q = SBGEMM_Q; | ||||
#endif | #endif | ||||
#if SHGEMM_Q == shgemm_q | |||||
BLASLONG shgemm_q = DEFAULT_GEMM_Q; | |||||
#else | |||||
BLASLONG shgemm_q = SHGEMM_Q; | |||||
#endif | |||||
#if SGEMM_Q == sgemm_q | #if SGEMM_Q == sgemm_q | ||||
BLASLONG sgemm_q = DEFAULT_GEMM_Q; | BLASLONG sgemm_q = DEFAULT_GEMM_Q; | ||||
#else | #else | ||||
@@ -119,6 +129,11 @@ BLASLONG sbgemm_r = DEFAULT_GEMM_R; | |||||
#else | #else | ||||
BLASLONG sbgemm_r = SBGEMM_R; | BLASLONG sbgemm_r = SBGEMM_R; | ||||
#endif | #endif | ||||
#if SHGEMM_R == shgemm_r | |||||
BLASLONG shgemm_r = DEFAULT_GEMM_R; | |||||
#else | |||||
BLASLONG shgemm_r = SHGEMM_R; | |||||
#endif | |||||
#if SGEMM_R == sgemm_r | #if SGEMM_R == sgemm_r | ||||
BLASLONG sgemm_r = DEFAULT_GEMM_R; | BLASLONG sgemm_r = DEFAULT_GEMM_R; | ||||
#else | #else | ||||
@@ -526,6 +541,9 @@ void blas_set_parameter(void){ | |||||
#ifdef BUILD_BFLOAT16 | #ifdef BUILD_BFLOAT16 | ||||
sbgemm_r = (((BUFFER_SIZE - ((SBGEMM_P * SBGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SBGEMM_Q * 4)) - 15) & ~15; | sbgemm_r = (((BUFFER_SIZE - ((SBGEMM_P * SBGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SBGEMM_Q * 4)) - 15) & ~15; | ||||
#endif | |||||
#ifdef BUILD_HFLOAT16 | |||||
shgemm_r = (((BUFFER_SIZE - ((SHGEMM_P * SHGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SHGEMM_Q * 4)) - 15) & ~15; | |||||
#endif | #endif | ||||
sgemm_r = (((BUFFER_SIZE - ((SGEMM_P * SGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SGEMM_Q * 4)) - 15) & ~15; | sgemm_r = (((BUFFER_SIZE - ((SGEMM_P * SGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SGEMM_Q * 4)) - 15) & ~15; | ||||
dgemm_r = (((BUFFER_SIZE - ((DGEMM_P * DGEMM_Q * 8 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (DGEMM_Q * 8)) - 15) & ~15; | dgemm_r = (((BUFFER_SIZE - ((DGEMM_P * DGEMM_Q * 8 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (DGEMM_Q * 8)) - 15) & ~15; | ||||
@@ -619,6 +637,7 @@ void blas_set_parameter(void){ | |||||
size = BITMASK(cpuid3, 16, 0xff); | size = BITMASK(cpuid3, 16, 0xff); | ||||
sbgemm_p = 192 * (size + 1); | sbgemm_p = 192 * (size + 1); | ||||
shgemm_p = 192 * (size + 1); | |||||
sgemm_p = 192 * (size + 1); | sgemm_p = 192 * (size + 1); | ||||
dgemm_p = 96 * (size + 1); | dgemm_p = 96 * (size + 1); | ||||
cgemm_p = 96 * (size + 1); | cgemm_p = 96 * (size + 1); | ||||
@@ -634,6 +653,9 @@ void blas_set_parameter(void){ | |||||
#ifdef BUILD_BFLOAT16 | #ifdef BUILD_BFLOAT16 | ||||
sbgemm_r = (((BUFFER_SIZE - ((SBGEMM_P * SBGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SBGEMM_Q * 4)) - 15) & ~15; | sbgemm_r = (((BUFFER_SIZE - ((SBGEMM_P * SBGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SBGEMM_Q * 4)) - 15) & ~15; | ||||
#endif | |||||
#ifdef BUILD_HFLOAT16 | |||||
shgemm_r = (((BUFFER_SIZE - ((SHGEMM_P * SHGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SHGEMM_Q * 4)) - 15) & ~15; | |||||
#endif | #endif | ||||
sgemm_r = (((BUFFER_SIZE - ((SGEMM_P * SGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SGEMM_Q * 4)) - 15) & ~15; | sgemm_r = (((BUFFER_SIZE - ((SGEMM_P * SGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SGEMM_Q * 4)) - 15) & ~15; | ||||
dgemm_r = (((BUFFER_SIZE - ((DGEMM_P * DGEMM_Q * 8 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (DGEMM_Q * 8)) - 15) & ~15; | dgemm_r = (((BUFFER_SIZE - ((DGEMM_P * DGEMM_Q * 8 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (DGEMM_Q * 8)) - 15) & ~15; | ||||
@@ -39,6 +39,9 @@ endif | |||||
ifndef BUILD_BFLOAT16 | ifndef BUILD_BFLOAT16 | ||||
BUILD_BFLOAT16 = 0 | BUILD_BFLOAT16 = 0 | ||||
endif | endif | ||||
ifndef BUILD_HFLOAT16 | |||||
BUILD_HFLOAT16 = 0 | |||||
endif | |||||
ifndef BUILD_SINGLE | ifndef BUILD_SINGLE | ||||
BUILD_SINGLE = 0 | BUILD_SINGLE = 0 | ||||
endif | endif | ||||
@@ -52,6 +52,7 @@ blasobjsz=" | |||||
blasobjs="lsame xerbla" | blasobjs="lsame xerbla" | ||||
bfblasobjs="sbgemm sbgemmt sbgemmtr sbgemv sbdot sbstobf16 sbdtobf16 sbf16tos dbf16tod" | bfblasobjs="sbgemm sbgemmt sbgemmtr sbgemv sbdot sbstobf16 sbdtobf16 sbf16tos dbf16tod" | ||||
hfblasobjs="shgemm" | |||||
cblasobjsc=" | cblasobjsc=" | ||||
cblas_caxpy cblas_ccopy cblas_cdotc cblas_cdotu cblas_cgbmv cblas_cgemm cblas_cgemv | cblas_caxpy cblas_ccopy cblas_cdotc cblas_cdotu cblas_cgbmv cblas_cgemm cblas_cgemv | ||||
cblas_cgerc cblas_cgeru cblas_chbmv cblas_chemm cblas_chemv cblas_cher2 cblas_cher2k | cblas_cgerc cblas_cgeru cblas_chbmv cblas_chemm cblas_chemv cblas_cher2 cblas_cher2k | ||||
@@ -100,6 +101,7 @@ cblasobjsz=" | |||||
cblasobjs="cblas_xerbla" | cblasobjs="cblas_xerbla" | ||||
bfcblasobjs="cblas_sbgemm cblas_sbgemv cblas_sbdot cblas_sbstobf16 cblas_sbdtobf16 cblas_sbf16tos cblas_dbf16tod cblas_sbgemm_batch" | bfcblasobjs="cblas_sbgemm cblas_sbgemv cblas_sbdot cblas_sbstobf16 cblas_sbdtobf16 cblas_sbf16tos cblas_dbf16tod cblas_sbgemm_batch" | ||||
hfcblasobjs="cblas_shgemm" | |||||
exblasobjs=" | exblasobjs=" | ||||
qamax qamin qasum qaxpy qcabs1 qcopy qdot qgbmv qgemm | qamax qamin qasum qaxpy qcabs1 qcopy qdot qgbmv qgemm | ||||
@@ -3816,8 +3818,8 @@ shift | |||||
p17=$9 | p17=$9 | ||||
if [ $p13 -eq 1 ]; then | if [ $p13 -eq 1 ]; then | ||||
blasobjs="$blasobjs $bfblasobjs" | |||||
cblasobjs="$cblasobjs $bfcblasobjs" | |||||
blasobjs="$blasobjs $bfblasobjs $hfblasobjs" | |||||
cblasobjs="$cblasobjs $bfcblasobjs $hfcblasobjs" | |||||
fi | fi | ||||
if [ $p14 -eq 1 ]; then | if [ $p14 -eq 1 ]; then | ||||
@@ -52,6 +52,7 @@ | |||||
@blasobjs = (lsame, xerbla); | @blasobjs = (lsame, xerbla); | ||||
@bfblasobjs = (sbgemm, sbgemmt, sbgemmtr, sbgemv, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod); | @bfblasobjs = (sbgemm, sbgemmt, sbgemmtr, sbgemv, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod); | ||||
@hfblasobjs = (shgemm); | |||||
@cblasobjsc = ( | @cblasobjsc = ( | ||||
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv, | cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv, | ||||
cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k, | cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k, | ||||
@@ -97,7 +98,7 @@ | |||||
@cblasobjs = ( cblas_xerbla ); | @cblasobjs = ( cblas_xerbla ); | ||||
@bfcblasobjs = (cblas_sbgemm, cblas_sbgemmt, cblas_sbgemmtr, cblas_sbgemv, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod, cblas_sbgemm_batch); | @bfcblasobjs = (cblas_sbgemm, cblas_sbgemmt, cblas_sbgemmtr, cblas_sbgemv, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod, cblas_sbgemm_batch); | ||||
@hfcblasobjs = (cblas_shgemm); | |||||
@exblasobjs = ( | @exblasobjs = ( | ||||
qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm, | qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm, | ||||
qgemv,qger,qmax,qmin, | qgemv,qger,qmax,qmin, | ||||
@@ -3773,8 +3774,8 @@ use File::Basename; | |||||
my $dirname = File::Spec->catfile(dirname(dirname(File::Spec->rel2abs(__FILE__))), "lapack-netlib"); | my $dirname = File::Spec->catfile(dirname(dirname(File::Spec->rel2abs(__FILE__))), "lapack-netlib"); | ||||
if ($ARGV[12] == 1) { | if ($ARGV[12] == 1) { | ||||
@blasobjs = (@blasobjs, @bfblasobjs); | |||||
@cblasobjs = (@cblasobjs, @bfcblasobjs); | |||||
@blasobjs = (@blasobjs, @bfblasobjs, @hfblasobjs); | |||||
@cblasobjs = (@cblasobjs, @bfcblasobjs, @hfcblasobjs); | |||||
} | } | ||||
if ($ARGV[13] == 1) { | if ($ARGV[13] == 1) { | ||||
@blasobjs = (@blasobjs, @blasobjss); | @blasobjs = (@blasobjs, @blasobjss); | ||||
@@ -19,6 +19,8 @@ int main(int argc, char **argv) { | |||||
if ( (argc <= 1) || ((argc >= 2) && (*argv[1] == '0'))) { | if ( (argc <= 1) || ((argc >= 2) && (*argv[1] == '0'))) { | ||||
printf("SBGEMM_UNROLL_M=%d\n", SBGEMM_DEFAULT_UNROLL_M); | printf("SBGEMM_UNROLL_M=%d\n", SBGEMM_DEFAULT_UNROLL_M); | ||||
printf("SBGEMM_UNROLL_N=%d\n", SBGEMM_DEFAULT_UNROLL_N); | printf("SBGEMM_UNROLL_N=%d\n", SBGEMM_DEFAULT_UNROLL_N); | ||||
printf("SHGEMM_UNROLL_M=%d\n", SHGEMM_DEFAULT_UNROLL_M); | |||||
printf("SHGEMM_UNROLL_N=%d\n", SHGEMM_DEFAULT_UNROLL_N); | |||||
printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M); | printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M); | ||||
printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N); | printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N); | ||||
printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M); | printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M); | ||||
@@ -0,0 +1,58 @@ | |||||
import numpy as np | |||||
import torch | |||||
# 设置矩阵尺寸 | |||||
M, K, N = 31, 31, 31 # 可修改为更大规模 | |||||
# 生成随机输入矩阵,类型为float16 | |||||
A = np.random.randint(0, 11, size=(M, K)).astype(np.float16) | |||||
B = np.random.randint(0, 11, size=(K, N)).astype(np.float16) | |||||
A_torch = torch.tensor(A, dtype=torch.float16, device='cuda') | |||||
B_torch = torch.tensor(B, dtype=torch.float16, device='cuda') | |||||
C_torch = torch.matmul(A_torch, B_torch) | |||||
C_ref = C_torch.cpu().numpy().astype(np.float32) | |||||
def format_array_c(name, array, c_type="hfloat16"): | |||||
flat = array.flatten() | |||||
elements = ", ".join(f"{x:.5f}" for x in flat) | |||||
return f"{c_type} {name}[{len(flat)}] = {{ {elements} }};\n" | |||||
def format_array_c_float(name, array): | |||||
flat = array.flatten() | |||||
elements = ", ".join(f"{x:.5f}" for x in flat) | |||||
return f"float {name}[{len(flat)}] = {{ {elements} }};\n" | |||||
# 写入C文件 | |||||
with open("generated_test.c", "w") as f: | |||||
f.write('#include <stdio.h>\n') | |||||
f.write('#include <stdlib.h>\n') | |||||
f.write('#include <string.h>\n') | |||||
f.write('#include <cblas.h>\n\n') | |||||
f.write(f"const int M = {M}, K = {K}, N = {N};\n") | |||||
f.write("const float alpha = 1.0f, beta = 0.0f;\n\n") | |||||
f.write(format_array_c("A", A)) | |||||
f.write(format_array_c("B", B)) | |||||
f.write(f"float C[{M*N}] = {{ 0 }};\n\n") | |||||
f.write("int main() {\n") | |||||
f.write(" cblas_shgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,\n") | |||||
f.write(" M, N, K,\n") | |||||
f.write(" alpha,\n") | |||||
f.write(" A, K,\n") | |||||
f.write(" B, N,\n") | |||||
f.write(" beta,\n") | |||||
f.write(" C, N);\n\n") | |||||
f.write(' printf("Result C = A * B:\\n");\n') | |||||
f.write(" for (int i = 0; i < M * N; i++) {\n") | |||||
f.write(" printf(\"%.5f \", C[i]);\n") | |||||
f.write(" if ((i + 1) % N == 0) printf(\"\\n\");\n") | |||||
f.write(" }\n") | |||||
f.write(" return 0;\n") | |||||
f.write("}\n\n") | |||||
f.write("// Reference result computed in Python:\n") | |||||
c_ref_flat = ", ".join(f"{x:.5f}" for x in C_ref.flatten()) | |||||
f.write(f"// C_ref = {{ {c_ref_flat} }}\n") | |||||
@@ -0,0 +1,457 @@ | |||||
#ifndef CBLAS_H | |||||
#define CBLAS_H | |||||
#include <stddef.h> | |||||
#include "openblas_config.h" | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
/* Assume C declarations for C++ */ | |||||
#endif /* __cplusplus */ | |||||
/*Set the number of threads on runtime.*/ | |||||
void openblas_set_num_threads(int num_threads); | |||||
void goto_set_num_threads(int num_threads); | |||||
int openblas_set_num_threads_local(int num_threads); | |||||
/*Get the number of threads on runtime.*/ | |||||
int openblas_get_num_threads(void); | |||||
/*Get the number of physical processors (cores).*/ | |||||
int openblas_get_num_procs(void); | |||||
/*Get the build configure on runtime.*/ | |||||
char* openblas_get_config(void); | |||||
/*Get the CPU corename on runtime.*/ | |||||
char* openblas_get_corename(void); | |||||
/*Set the threading backend to a custom callback.*/ | |||||
typedef void (*openblas_dojob_callback)(int thread_num, void *jobdata, int dojob_data); | |||||
typedef void (*openblas_threads_callback)(int sync, openblas_dojob_callback dojob, int numjobs, size_t jobdata_elsize, void *jobdata, int dojob_data); | |||||
void openblas_set_threads_callback_function(openblas_threads_callback callback); | |||||
#ifdef OPENBLAS_OS_LINUX | |||||
/* Sets thread affinity for OpenBLAS threads. `thread_idx` is in [0, openblas_get_num_threads()-1]. */ | |||||
int openblas_setaffinity(int thread_idx, size_t cpusetsize, cpu_set_t* cpu_set); | |||||
/* Queries thread affinity for OpenBLAS threads. `thread_idx` is in [0, openblas_get_num_threads()-1]. */ | |||||
int openblas_getaffinity(int thread_idx, size_t cpusetsize, cpu_set_t* cpu_set); | |||||
#endif | |||||
/* Get the parallelization type which is used by OpenBLAS */ | |||||
int openblas_get_parallel(void); | |||||
/* OpenBLAS is compiled for sequential use */ | |||||
#define OPENBLAS_SEQUENTIAL 0 | |||||
/* OpenBLAS is compiled using normal threading model */ | |||||
#define OPENBLAS_THREAD 1 | |||||
/* OpenBLAS is compiled using OpenMP threading model */ | |||||
#define OPENBLAS_OPENMP 2 | |||||
/* | |||||
* Since all of GotoBlas was written without const, | |||||
* we disable it at build time. | |||||
*/ | |||||
#ifndef OPENBLAS_CONST | |||||
# define OPENBLAS_CONST const | |||||
#endif | |||||
#define CBLAS_INDEX size_t | |||||
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; | |||||
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE; | |||||
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO; | |||||
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG; | |||||
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE; | |||||
typedef CBLAS_ORDER CBLAS_LAYOUT; | |||||
float cblas_sdsdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy); | |||||
double cblas_dsdot (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy); | |||||
float cblas_sdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy); | |||||
double cblas_ddot(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy); | |||||
openblas_complex_float cblas_cdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy); | |||||
openblas_complex_float cblas_cdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy); | |||||
openblas_complex_double cblas_zdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy); | |||||
openblas_complex_double cblas_zdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_cdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret); | |||||
void cblas_cdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret); | |||||
void cblas_zdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret); | |||||
void cblas_zdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret); | |||||
float cblas_sasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx); | |||||
double cblas_dasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx); | |||||
float cblas_scasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); | |||||
double cblas_dzasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); | |||||
float cblas_ssum (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx); | |||||
double cblas_dsum (OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx); | |||||
float cblas_scsum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); | |||||
double cblas_dzsum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); | |||||
float cblas_snrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX); | |||||
double cblas_dnrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX); | |||||
float cblas_scnrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX); | |||||
double cblas_dznrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX); | |||||
CBLAS_INDEX cblas_isamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx); | |||||
CBLAS_INDEX cblas_idamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx); | |||||
CBLAS_INDEX cblas_icamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); | |||||
CBLAS_INDEX cblas_izamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); | |||||
CBLAS_INDEX cblas_isamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx); | |||||
CBLAS_INDEX cblas_idamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx); | |||||
CBLAS_INDEX cblas_icamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); | |||||
CBLAS_INDEX cblas_izamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); | |||||
float cblas_samax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx); | |||||
double cblas_damax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx); | |||||
float cblas_scamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); | |||||
double cblas_dzamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); | |||||
float cblas_samin(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx); | |||||
double cblas_damin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx); | |||||
float cblas_scamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); | |||||
double cblas_dzamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); | |||||
CBLAS_INDEX cblas_ismax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx); | |||||
CBLAS_INDEX cblas_idmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx); | |||||
CBLAS_INDEX cblas_icmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); | |||||
CBLAS_INDEX cblas_izmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); | |||||
CBLAS_INDEX cblas_ismin(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx); | |||||
CBLAS_INDEX cblas_idmin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx); | |||||
CBLAS_INDEX cblas_icmin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); | |||||
CBLAS_INDEX cblas_izmin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx); | |||||
void cblas_saxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_daxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_caxpyc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_zaxpyc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_scopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_dcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_zcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_sswap(OPENBLAS_CONST blasint n, float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_dswap(OPENBLAS_CONST blasint n, double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_cswap(OPENBLAS_CONST blasint n, void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_zswap(OPENBLAS_CONST blasint n, void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_srot(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float c, OPENBLAS_CONST float s); | |||||
void cblas_drot(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double c, OPENBLAS_CONST double s); | |||||
void cblas_csrot(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float c, OPENBLAS_CONST float s); | |||||
void cblas_zdrot(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double c, OPENBLAS_CONST double s); | |||||
void cblas_srotg(float *a, float *b, float *c, float *s); | |||||
void cblas_drotg(double *a, double *b, double *c, double *s); | |||||
void cblas_crotg(void *a, void *b, float *c, void *s); | |||||
void cblas_zrotg(void *a, void *b, double *c, void *s); | |||||
void cblas_srotm(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float *P); | |||||
void cblas_drotm(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double *P); | |||||
void cblas_srotmg(float *d1, float *d2, float *b1, OPENBLAS_CONST float b2, float *P); | |||||
void cblas_drotmg(double *d1, double *d2, double *b1, OPENBLAS_CONST double b2, double *P); | |||||
void cblas_sscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, float *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_dscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, double *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_cscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, void *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_zscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, void *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_csscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, void *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_zdscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, void *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_sgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, | |||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_dgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, | |||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_cgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_sger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda); | |||||
void cblas_dger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda); | |||||
void cblas_cgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda); | |||||
void cblas_cgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda); | |||||
void cblas_zgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda); | |||||
void cblas_zgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda); | |||||
void cblas_strsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_dtrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_ctrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_ztrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_strmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_dtrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_ctrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_ztrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_ssyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A, OPENBLAS_CONST blasint lda); | |||||
void cblas_dsyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *A, OPENBLAS_CONST blasint lda); | |||||
void cblas_cher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A, OPENBLAS_CONST blasint lda); | |||||
void cblas_zher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A, OPENBLAS_CONST blasint lda); | |||||
void cblas_ssyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo,OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, | |||||
OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda); | |||||
void cblas_dsyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, | |||||
OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda); | |||||
void cblas_cher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, | |||||
OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda); | |||||
void cblas_zher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, | |||||
OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda); | |||||
void cblas_sgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, | |||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY); | |||||
void cblas_dgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, | |||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY); | |||||
void cblas_cgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, | |||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY); | |||||
void cblas_zgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, | |||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY); | |||||
void cblas_ssbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, | |||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY); | |||||
void cblas_dsbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, | |||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY); | |||||
void cblas_stbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_dtbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_ctbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_ztbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_stbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_dtbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_ctbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_ztbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_stpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_dtpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_ctpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_ztpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_stpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_dtpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_ctpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_ztpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX); | |||||
void cblas_ssymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, | |||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY); | |||||
void cblas_dsymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, | |||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY); | |||||
void cblas_chemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, | |||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY); | |||||
void cblas_zhemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, | |||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY); | |||||
void cblas_sspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *Ap, | |||||
OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY); | |||||
void cblas_dspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *Ap, | |||||
OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY); | |||||
void cblas_sspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *Ap); | |||||
void cblas_dspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *Ap); | |||||
void cblas_chpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A); | |||||
void cblas_zhpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST void *X,OPENBLAS_CONST blasint incX, void *A); | |||||
void cblas_sspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A); | |||||
void cblas_dspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A); | |||||
void cblas_chpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *Ap); | |||||
void cblas_zhpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *Ap); | |||||
void cblas_chbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY); | |||||
void cblas_zhbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY); | |||||
void cblas_chpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *Ap, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY); | |||||
void cblas_zhpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *Ap, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY); | |||||
void cblas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_cgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_cgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_sgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_dgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_cgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_zgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, | |||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_dsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, | |||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_csymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_zsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_ssyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_dsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_csyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_zsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_ssyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_dsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_csyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_zsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, | |||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_strmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, | |||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb); | |||||
void cblas_dtrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, | |||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb); | |||||
void cblas_ctrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, | |||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb); | |||||
void cblas_ztrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, | |||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb); | |||||
void cblas_strsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, | |||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb); | |||||
void cblas_dtrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, | |||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb); | |||||
void cblas_ctrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, | |||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb); | |||||
void cblas_ztrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, | |||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb); | |||||
void cblas_chemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_zhemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_cherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_zherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_cher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_zher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_xerbla(blasint p, OPENBLAS_CONST char *rout, OPENBLAS_CONST char *form, ...); | |||||
/*** BLAS extensions ***/ | |||||
void cblas_saxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_daxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_caxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_zaxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_somatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, OPENBLAS_CONST float *a, | |||||
OPENBLAS_CONST blasint clda, float *b, OPENBLAS_CONST blasint cldb); | |||||
void cblas_domatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, OPENBLAS_CONST double *a, | |||||
OPENBLAS_CONST blasint clda, double *b, OPENBLAS_CONST blasint cldb); | |||||
void cblas_comatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float* calpha, OPENBLAS_CONST float* a, | |||||
OPENBLAS_CONST blasint clda, float*b, OPENBLAS_CONST blasint cldb); | |||||
void cblas_zomatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, OPENBLAS_CONST double* a, | |||||
OPENBLAS_CONST blasint clda, double *b, OPENBLAS_CONST blasint cldb); | |||||
void cblas_simatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a, | |||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb); | |||||
void cblas_dimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a, | |||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb); | |||||
void cblas_cimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float* calpha, float* a, | |||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb); | |||||
void cblas_zimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, double* a, | |||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb); | |||||
void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta, | |||||
float *c, OPENBLAS_CONST blasint cldc); | |||||
void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta, | |||||
double *c, OPENBLAS_CONST blasint cldc); | |||||
void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta, | |||||
float *c, OPENBLAS_CONST blasint cldc); | |||||
void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta, | |||||
double *c, OPENBLAS_CONST blasint cldc); | |||||
void cblas_sgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array, | |||||
OPENBLAS_CONST float * alpha_array, OPENBLAS_CONST float ** A_array, OPENBLAS_CONST blasint * lda_array, OPENBLAS_CONST float ** B_array, OPENBLAS_CONST blasint * ldb_array, OPENBLAS_CONST float * beta_array, float ** C_array, OPENBLAS_CONST blasint * ldc_array, OPENBLAS_CONST blasint group_count, OPENBLAS_CONST blasint * group_size); | |||||
void cblas_dgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array, | |||||
OPENBLAS_CONST double * alpha_array, OPENBLAS_CONST double ** A_array, OPENBLAS_CONST blasint * lda_array, OPENBLAS_CONST double ** B_array, OPENBLAS_CONST blasint * ldb_array, OPENBLAS_CONST double * beta_array, double ** C_array, OPENBLAS_CONST blasint * ldc_array, OPENBLAS_CONST blasint group_count, OPENBLAS_CONST blasint * group_size); | |||||
void cblas_cgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array, | |||||
OPENBLAS_CONST void * alpha_array, OPENBLAS_CONST void ** A_array, OPENBLAS_CONST blasint * lda_array, OPENBLAS_CONST void ** B_array, OPENBLAS_CONST blasint * ldb_array, OPENBLAS_CONST void * beta_array, void ** C_array, OPENBLAS_CONST blasint * ldc_array, OPENBLAS_CONST blasint group_count, OPENBLAS_CONST blasint * group_size); | |||||
void cblas_zgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array, | |||||
OPENBLAS_CONST void * alpha_array, OPENBLAS_CONST void ** A_array, OPENBLAS_CONST blasint * lda_array, OPENBLAS_CONST void ** B_array, OPENBLAS_CONST blasint * ldb_array, OPENBLAS_CONST void * beta_array, void ** C_array, OPENBLAS_CONST blasint * ldc_array, OPENBLAS_CONST blasint group_count, OPENBLAS_CONST blasint * group_size); | |||||
/*** BFLOAT16 and INT8 extensions ***/ | |||||
/* convert float array to BFLOAT16 array by rounding */ | |||||
void cblas_sbstobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *in, OPENBLAS_CONST blasint incin, bfloat16 *out, OPENBLAS_CONST blasint incout); | |||||
/* convert double array to BFLOAT16 array by rounding */ | |||||
void cblas_sbdtobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *in, OPENBLAS_CONST blasint incin, bfloat16 *out, OPENBLAS_CONST blasint incout); | |||||
/* convert BFLOAT16 array to float array */ | |||||
void cblas_sbf16tos(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, float *out, OPENBLAS_CONST blasint incout); | |||||
/* convert BFLOAT16 array to double array */ | |||||
void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout); | |||||
/* dot production of BFLOAT16 input arrays, and output as float */ | |||||
float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_sbgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy); | |||||
void cblas_sbgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_sbgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array, | |||||
OPENBLAS_CONST float * alpha_array, OPENBLAS_CONST bfloat16 ** A_array, OPENBLAS_CONST blasint * lda_array, OPENBLAS_CONST bfloat16 ** B_array, OPENBLAS_CONST blasint * ldb_array, OPENBLAS_CONST float * beta_array, float ** C_array, OPENBLAS_CONST blasint * ldc_array, OPENBLAS_CONST blasint group_count, OPENBLAS_CONST blasint * group_size); | |||||
/*** FLOAT16 extensions ***/ | |||||
void cblas_shgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST hfloat16 *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST hfloat16 *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif /* __cplusplus */ | |||||
#endif |
@@ -0,0 +1,811 @@ | |||||
#ifndef OPENBLAS_F77BLAS_H | |||||
#define OPENBLAS_F77BLAS_H | |||||
#include "openblas_config.h" | |||||
/*********************************************************************/ | |||||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | |||||
/* All rights reserved. */ | |||||
/* */ | |||||
/* Redistribution and use in source and binary forms, with or */ | |||||
/* without modification, are permitted provided that the following */ | |||||
/* conditions are met: */ | |||||
/* */ | |||||
/* 1. Redistributions of source code must retain the above */ | |||||
/* copyright notice, this list of conditions and the following */ | |||||
/* disclaimer. */ | |||||
/* */ | |||||
/* 2. Redistributions in binary form must reproduce the above */ | |||||
/* copyright notice, this list of conditions and the following */ | |||||
/* disclaimer in the documentation and/or other materials */ | |||||
/* provided with the distribution. */ | |||||
/* */ | |||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||||
/* POSSIBILITY OF SUCH DAMAGE. */ | |||||
/* */ | |||||
/* The views and conclusions contained in the software and */ | |||||
/* documentation are those of the authors and should not be */ | |||||
/* interpreted as representing official policies, either expressed */ | |||||
/* or implied, of The University of Texas at Austin. */ | |||||
/*********************************************************************/ | |||||
#ifndef ASSEMBLER | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
/* Assume C declarations for C++ */ | |||||
#endif /* __cplusplus */ | |||||
int BLASFUNC(xerbla)(char *, blasint *info, blasint); | |||||
void openblas_set_num_threads_(int *); | |||||
/*Set the threading backend to a custom callback.*/ | |||||
typedef void (*openblas_dojob_callback)(int thread_num, void *jobdata, int dojob_data); | |||||
typedef void (*openblas_threads_callback)(int sync, openblas_dojob_callback dojob, int numjobs, size_t jobdata_elsize, void *jobdata, int dojob_data); | |||||
extern openblas_threads_callback openblas_threads_callback_; | |||||
FLOATRET BLASFUNC(sdot) (blasint *, float *, blasint *, float *, blasint *); | |||||
FLOATRET BLASFUNC(sdsdot)(blasint *, float *, float *, blasint *, float *, blasint *); | |||||
double BLASFUNC(dsdot) (blasint *, float *, blasint *, float *, blasint *); | |||||
double BLASFUNC(ddot) (blasint *, double *, blasint *, double *, blasint *); | |||||
xdouble BLASFUNC(qdot) (blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
float BLASFUNC(sbdot) (blasint *, bfloat16 *, blasint *, bfloat16 *, blasint *); | |||||
void BLASFUNC(sbstobf16) (blasint *, float *, blasint *, bfloat16 *, blasint *); | |||||
void BLASFUNC(sbdtobf16) (blasint *, double *, blasint *, bfloat16 *, blasint *); | |||||
void BLASFUNC(sbf16tos) (blasint *, bfloat16 *, blasint *, float *, blasint *); | |||||
void BLASFUNC(dbf16tod) (blasint *, bfloat16 *, blasint *, double *, blasint *); | |||||
#ifdef RETURN_BY_STRUCT | |||||
typedef struct { | |||||
float r, i; | |||||
} myccomplex_t; | |||||
typedef struct { | |||||
double r, i; | |||||
} myzcomplex_t; | |||||
typedef struct { | |||||
xdouble r, i; | |||||
} myxcomplex_t; | |||||
myccomplex_t BLASFUNC(cdotu) (blasint *, float *, blasint *, float *, blasint *); | |||||
myccomplex_t BLASFUNC(cdotc) (blasint *, float *, blasint *, float *, blasint *); | |||||
myzcomplex_t BLASFUNC(zdotu) (blasint *, double *, blasint *, double *, blasint *); | |||||
myzcomplex_t BLASFUNC(zdotc) (blasint *, double *, blasint *, double *, blasint *); | |||||
myxcomplex_t BLASFUNC(xdotu) (blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
myxcomplex_t BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
#elif defined RETURN_BY_STACK | |||||
void BLASFUNC(cdotu) (openblas_complex_float *, blasint *, float * , blasint *, float *, blasint *); | |||||
void BLASFUNC(cdotc) (openblas_complex_float *, blasint *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(zdotu) (openblas_complex_double *, blasint *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(zdotc) (openblas_complex_double *, blasint *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(xdotu) (openblas_complex_xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(xdotc) (openblas_complex_xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
#else | |||||
openblas_complex_float BLASFUNC(cdotu) (blasint *, float *, blasint *, float *, blasint *); | |||||
openblas_complex_float BLASFUNC(cdotc) (blasint *, float *, blasint *, float *, blasint *); | |||||
openblas_complex_double BLASFUNC(zdotu) (blasint *, double *, blasint *, double *, blasint *); | |||||
openblas_complex_double BLASFUNC(zdotc) (blasint *, double *, blasint *, double *, blasint *); | |||||
openblas_complex_xdouble BLASFUNC(xdotu) (blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
openblas_complex_xdouble BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
#endif | |||||
void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(qaxpy) (blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(xaxpy) (blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(caxpyc)(blasint *, float *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(zaxpyc)(blasint *, double *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(xaxpyc)(blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(scopy) (blasint *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(dcopy) (blasint *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(qcopy) (blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(ccopy) (blasint *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(zcopy) (blasint *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(xcopy) (blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(sswap) (blasint *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(dswap) (blasint *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(qswap) (blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(cswap) (blasint *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(zswap) (blasint *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(xswap) (blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
FLOATRET BLASFUNC(sasum) (blasint *, float *, blasint *); | |||||
FLOATRET BLASFUNC(scasum)(blasint *, float *, blasint *); | |||||
double BLASFUNC(dasum) (blasint *, double *, blasint *); | |||||
xdouble BLASFUNC(qasum) (blasint *, xdouble *, blasint *); | |||||
double BLASFUNC(dzasum)(blasint *, double *, blasint *); | |||||
xdouble BLASFUNC(qxasum)(blasint *, xdouble *, blasint *); | |||||
FLOATRET BLASFUNC(ssum) (blasint *, float *, blasint *); | |||||
FLOATRET BLASFUNC(scsum)(blasint *, float *, blasint *); | |||||
double BLASFUNC(dsum) (blasint *, double *, blasint *); | |||||
xdouble BLASFUNC(qsum) (blasint *, xdouble *, blasint *); | |||||
double BLASFUNC(dzsum)(blasint *, double *, blasint *); | |||||
xdouble BLASFUNC(qxsum)(blasint *, xdouble *, blasint *); | |||||
blasint BLASFUNC(isamax)(blasint *, float *, blasint *); | |||||
blasint BLASFUNC(idamax)(blasint *, double *, blasint *); | |||||
blasint BLASFUNC(iqamax)(blasint *, xdouble *, blasint *); | |||||
blasint BLASFUNC(icamax)(blasint *, float *, blasint *); | |||||
blasint BLASFUNC(izamax)(blasint *, double *, blasint *); | |||||
blasint BLASFUNC(ixamax)(blasint *, xdouble *, blasint *); | |||||
blasint BLASFUNC(ismax) (blasint *, float *, blasint *); | |||||
blasint BLASFUNC(idmax) (blasint *, double *, blasint *); | |||||
blasint BLASFUNC(iqmax) (blasint *, xdouble *, blasint *); | |||||
blasint BLASFUNC(icmax) (blasint *, float *, blasint *); | |||||
blasint BLASFUNC(izmax) (blasint *, double *, blasint *); | |||||
blasint BLASFUNC(ixmax) (blasint *, xdouble *, blasint *); | |||||
blasint BLASFUNC(isamin)(blasint *, float *, blasint *); | |||||
blasint BLASFUNC(idamin)(blasint *, double *, blasint *); | |||||
blasint BLASFUNC(iqamin)(blasint *, xdouble *, blasint *); | |||||
blasint BLASFUNC(icamin)(blasint *, float *, blasint *); | |||||
blasint BLASFUNC(izamin)(blasint *, double *, blasint *); | |||||
blasint BLASFUNC(ixamin)(blasint *, xdouble *, blasint *); | |||||
blasint BLASFUNC(ismin)(blasint *, float *, blasint *); | |||||
blasint BLASFUNC(idmin)(blasint *, double *, blasint *); | |||||
blasint BLASFUNC(iqmin)(blasint *, xdouble *, blasint *); | |||||
blasint BLASFUNC(icmin)(blasint *, float *, blasint *); | |||||
blasint BLASFUNC(izmin)(blasint *, double *, blasint *); | |||||
blasint BLASFUNC(ixmin)(blasint *, xdouble *, blasint *); | |||||
FLOATRET BLASFUNC(samax) (blasint *, float *, blasint *); | |||||
double BLASFUNC(damax) (blasint *, double *, blasint *); | |||||
xdouble BLASFUNC(qamax) (blasint *, xdouble *, blasint *); | |||||
FLOATRET BLASFUNC(scamax)(blasint *, float *, blasint *); | |||||
double BLASFUNC(dzamax)(blasint *, double *, blasint *); | |||||
xdouble BLASFUNC(qxamax)(blasint *, xdouble *, blasint *); | |||||
FLOATRET BLASFUNC(samin) (blasint *, float *, blasint *); | |||||
double BLASFUNC(damin) (blasint *, double *, blasint *); | |||||
xdouble BLASFUNC(qamin) (blasint *, xdouble *, blasint *); | |||||
FLOATRET BLASFUNC(scamin)(blasint *, float *, blasint *); | |||||
double BLASFUNC(dzamin)(blasint *, double *, blasint *); | |||||
xdouble BLASFUNC(qxamin)(blasint *, xdouble *, blasint *); | |||||
FLOATRET BLASFUNC(smax) (blasint *, float *, blasint *); | |||||
double BLASFUNC(dmax) (blasint *, double *, blasint *); | |||||
xdouble BLASFUNC(qmax) (blasint *, xdouble *, blasint *); | |||||
FLOATRET BLASFUNC(scmax) (blasint *, float *, blasint *); | |||||
double BLASFUNC(dzmax) (blasint *, double *, blasint *); | |||||
xdouble BLASFUNC(qxmax) (blasint *, xdouble *, blasint *); | |||||
FLOATRET BLASFUNC(smin) (blasint *, float *, blasint *); | |||||
double BLASFUNC(dmin) (blasint *, double *, blasint *); | |||||
xdouble BLASFUNC(qmin) (blasint *, xdouble *, blasint *); | |||||
FLOATRET BLASFUNC(scmin) (blasint *, float *, blasint *); | |||||
double BLASFUNC(dzmin) (blasint *, double *, blasint *); | |||||
xdouble BLASFUNC(qxmin) (blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(sscal) (blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(dscal) (blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(qscal) (blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(cscal) (blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zscal) (blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xscal) (blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(csscal)(blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zdscal)(blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xqscal)(blasint *, xdouble *, xdouble *, blasint *); | |||||
FLOATRET BLASFUNC(snrm2) (blasint *, float *, blasint *); | |||||
FLOATRET BLASFUNC(scnrm2)(blasint *, float *, blasint *); | |||||
double BLASFUNC(dnrm2) (blasint *, double *, blasint *); | |||||
xdouble BLASFUNC(qnrm2) (blasint *, xdouble *, blasint *); | |||||
double BLASFUNC(dznrm2)(blasint *, double *, blasint *); | |||||
xdouble BLASFUNC(qxnrm2)(blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(srot) (blasint *, float *, blasint *, float *, blasint *, float *, float *); | |||||
void BLASFUNC(drot) (blasint *, double *, blasint *, double *, blasint *, double *, double *); | |||||
void BLASFUNC(qrot) (blasint *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *); | |||||
void BLASFUNC(csrot) (blasint *, float *, blasint *, float *, blasint *, float *, float *); | |||||
void BLASFUNC(zdrot) (blasint *, double *, blasint *, double *, blasint *, double *, double *); | |||||
void BLASFUNC(xqrot) (blasint *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *); | |||||
void BLASFUNC(srotg) (float *, float *, float *, float *); | |||||
void BLASFUNC(drotg) (double *, double *, double *, double *); | |||||
void BLASFUNC(qrotg) (xdouble *, xdouble *, xdouble *, xdouble *); | |||||
void BLASFUNC(crotg) (float *, float *, float *, float *); | |||||
void BLASFUNC(zrotg) (double *, double *, double *, double *); | |||||
void BLASFUNC(xrotg) (xdouble *, xdouble *, xdouble *, xdouble *); | |||||
void BLASFUNC(srotmg)(float *, float *, float *, float *, float *); | |||||
void BLASFUNC(drotmg)(double *, double *, double *, double *, double *); | |||||
void BLASFUNC(srotm) (blasint *, float *, blasint *, float *, blasint *, float *); | |||||
void BLASFUNC(drotm) (blasint *, double *, blasint *, double *, blasint *, double *); | |||||
void BLASFUNC(qrotm) (blasint *, xdouble *, blasint *, xdouble *, blasint *, xdouble *); | |||||
/* Level 2 routines */ | |||||
void BLASFUNC(sger)(blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(dger)(blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(qger)(blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(cgeru)(blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(cgerc)(blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(zgeru)(blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(zgerc)(blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(xgeru)(blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(sbgemv)(char *, blasint *, blasint *, float *, bfloat16 *, blasint *, | |||||
bfloat16 *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(qgemv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(cgemv)(char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zgemv)(char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xgemv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(strsv) (char *, char *, char *, blasint *, float *, blasint *, | |||||
float *, blasint *); | |||||
void BLASFUNC(dtrsv) (char *, char *, char *, blasint *, double *, blasint *, | |||||
double *, blasint *); | |||||
void BLASFUNC(qtrsv) (char *, char *, char *, blasint *, xdouble *, blasint *, | |||||
xdouble *, blasint *); | |||||
void BLASFUNC(ctrsv) (char *, char *, char *, blasint *, float *, blasint *, | |||||
float *, blasint *); | |||||
void BLASFUNC(ztrsv) (char *, char *, char *, blasint *, double *, blasint *, | |||||
double *, blasint *); | |||||
void BLASFUNC(xtrsv) (char *, char *, char *, blasint *, xdouble *, blasint *, | |||||
xdouble *, blasint *); | |||||
void BLASFUNC(strmv) (char *, char *, char *, blasint *, float *, blasint *, | |||||
float *, blasint *); | |||||
void BLASFUNC(dtrmv) (char *, char *, char *, blasint *, double *, blasint *, | |||||
double *, blasint *); | |||||
void BLASFUNC(qtrmv) (char *, char *, char *, blasint *, xdouble *, blasint *, | |||||
xdouble *, blasint *); | |||||
void BLASFUNC(ctrmv) (char *, char *, char *, blasint *, float *, blasint *, | |||||
float *, blasint *); | |||||
void BLASFUNC(ztrmv) (char *, char *, char *, blasint *, double *, blasint *, | |||||
double *, blasint *); | |||||
void BLASFUNC(xtrmv) (char *, char *, char *, blasint *, xdouble *, blasint *, | |||||
xdouble *, blasint *); | |||||
void BLASFUNC(stpsv) (char *, char *, char *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(dtpsv) (char *, char *, char *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(qtpsv) (char *, char *, char *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(ctpsv) (char *, char *, char *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(ztpsv) (char *, char *, char *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xtpsv) (char *, char *, char *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(stpmv) (char *, char *, char *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(dtpmv) (char *, char *, char *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(qtpmv) (char *, char *, char *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(ctpmv) (char *, char *, char *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(ztpmv) (char *, char *, char *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xtpmv) (char *, char *, char *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(stbmv) (char *, char *, char *, blasint *, blasint *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(dtbmv) (char *, char *, char *, blasint *, blasint *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(qtbmv) (char *, char *, char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(ctbmv) (char *, char *, char *, blasint *, blasint *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(ztbmv) (char *, char *, char *, blasint *, blasint *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(xtbmv) (char *, char *, char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(stbsv) (char *, char *, char *, blasint *, blasint *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(dtbsv) (char *, char *, char *, blasint *, blasint *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(qtbsv) (char *, char *, char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(ctbsv) (char *, char *, char *, blasint *, blasint *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(ztbsv) (char *, char *, char *, blasint *, blasint *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(xtbsv) (char *, char *, char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(ssymv) (char *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(dsymv) (char *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(qsymv) (char *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(csymv) (char *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zsymv) (char *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xsymv) (char *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(sspmv) (char *, blasint *, float *, float *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(dspmv) (char *, blasint *, double *, double *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(qspmv) (char *, blasint *, xdouble *, xdouble *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(cspmv) (char *, blasint *, float *, float *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zspmv) (char *, blasint *, double *, double *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xspmv) (char *, blasint *, xdouble *, xdouble *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(ssyr) (char *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *); | |||||
void BLASFUNC(dsyr) (char *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *); | |||||
void BLASFUNC(qsyr) (char *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *); | |||||
void BLASFUNC(csyr) (char *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *); | |||||
void BLASFUNC(zsyr) (char *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *); | |||||
void BLASFUNC(xsyr) (char *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *); | |||||
void BLASFUNC(ssyr2) (char *, blasint *, float *, | |||||
float *, blasint *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(dsyr2) (char *, blasint *, double *, | |||||
double *, blasint *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(qsyr2) (char *, blasint *, xdouble *, | |||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(csyr2) (char *, blasint *, float *, | |||||
float *, blasint *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(zsyr2) (char *, blasint *, double *, | |||||
double *, blasint *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(xsyr2) (char *, blasint *, xdouble *, | |||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(sspr) (char *, blasint *, float *, float *, blasint *, | |||||
float *); | |||||
void BLASFUNC(dspr) (char *, blasint *, double *, double *, blasint *, | |||||
double *); | |||||
void BLASFUNC(qspr) (char *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *); | |||||
void BLASFUNC(cspr) (char *, blasint *, float *, float *, blasint *, | |||||
float *); | |||||
void BLASFUNC(zspr) (char *, blasint *, double *, double *, blasint *, | |||||
double *); | |||||
void BLASFUNC(xspr) (char *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *); | |||||
void BLASFUNC(sspr2) (char *, blasint *, float *, | |||||
float *, blasint *, float *, blasint *, float *); | |||||
void BLASFUNC(dspr2) (char *, blasint *, double *, | |||||
double *, blasint *, double *, blasint *, double *); | |||||
void BLASFUNC(qspr2) (char *, blasint *, xdouble *, | |||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *); | |||||
void BLASFUNC(cspr2) (char *, blasint *, float *, | |||||
float *, blasint *, float *, blasint *, float *); | |||||
void BLASFUNC(zspr2) (char *, blasint *, double *, | |||||
double *, blasint *, double *, blasint *, double *); | |||||
void BLASFUNC(xspr2) (char *, blasint *, xdouble *, | |||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *); | |||||
void BLASFUNC(cher) (char *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *); | |||||
void BLASFUNC(zher) (char *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *); | |||||
void BLASFUNC(xher) (char *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *); | |||||
void BLASFUNC(chpr) (char *, blasint *, float *, float *, blasint *, float *); | |||||
void BLASFUNC(zhpr) (char *, blasint *, double *, double *, blasint *, double *); | |||||
void BLASFUNC(xhpr) (char *, blasint *, xdouble *, xdouble *, blasint *, xdouble *); | |||||
void BLASFUNC(cher2) (char *, blasint *, float *, | |||||
float *, blasint *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(zher2) (char *, blasint *, double *, | |||||
double *, blasint *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(xher2) (char *, blasint *, xdouble *, | |||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(chpr2) (char *, blasint *, float *, | |||||
float *, blasint *, float *, blasint *, float *); | |||||
void BLASFUNC(zhpr2) (char *, blasint *, double *, | |||||
double *, blasint *, double *, blasint *, double *); | |||||
void BLASFUNC(xhpr2) (char *, blasint *, xdouble *, | |||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *); | |||||
void BLASFUNC(chemv) (char *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zhemv) (char *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xhemv) (char *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(chpmv) (char *, blasint *, float *, float *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zhpmv) (char *, blasint *, double *, double *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xhpmv) (char *, blasint *, xdouble *, xdouble *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
int BLASFUNC(snorm)(char *, blasint *, blasint *, float *, blasint *); | |||||
int BLASFUNC(dnorm)(char *, blasint *, blasint *, double *, blasint *); | |||||
int BLASFUNC(cnorm)(char *, blasint *, blasint *, float *, blasint *); | |||||
int BLASFUNC(znorm)(char *, blasint *, blasint *, double *, blasint *); | |||||
void BLASFUNC(sgbmv)(char *, blasint *, blasint *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(dgbmv)(char *, blasint *, blasint *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(qgbmv)(char *, blasint *, blasint *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(cgbmv)(char *, blasint *, blasint *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zgbmv)(char *, blasint *, blasint *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xgbmv)(char *, blasint *, blasint *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(ssbmv)(char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(dsbmv)(char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(qsbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(csbmv)(char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zsbmv)(char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xsbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(chbmv)(char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zhbmv)(char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xhbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
/* Level 3 routines */ | |||||
void BLASFUNC(shgemm)(char *, char *, blasint *, blasint *, blasint *, float *, | |||||
hfloat16 *, blasint *, hfloat16 *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(sbgemm)(char *, char *, blasint *, blasint *, blasint *, float *, | |||||
bfloat16 *, blasint *, bfloat16 *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(sgemm)(char *, char *, blasint *, blasint *, blasint *, float *, | |||||
float *, blasint *, float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(dgemm)(char *, char *, blasint *, blasint *, blasint *, double *, | |||||
double *, blasint *, double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(qgemm)(char *, char *, blasint *, blasint *, blasint *, xdouble *, | |||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(cgemm)(char *, char *, blasint *, blasint *, blasint *, float *, | |||||
float *, blasint *, float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zgemm)(char *, char *, blasint *, blasint *, blasint *, double *, | |||||
double *, blasint *, double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xgemm)(char *, char *, blasint *, blasint *, blasint *, xdouble *, | |||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(cgemm3m)(char *, char *, blasint *, blasint *, blasint *, float *, | |||||
float *, blasint *, float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zgemm3m)(char *, char *, blasint *, blasint *, blasint *, double *, | |||||
double *, blasint *, double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xgemm3m)(char *, char *, blasint *, blasint *, blasint *, xdouble *, | |||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(sgemmt)(char*, char *, char *, blasint *, blasint *, float *, | |||||
float *, blasint *, float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(dgemmt)(char*, char *, char *, blasint *, blasint *, double *, | |||||
double *, blasint *, double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(cgemmt)(char*, char *, char *, blasint *, blasint *, float *, | |||||
float *, blasint *, float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zgemmt)(char*, char *, char *, blasint *, blasint *, double *, | |||||
double *, blasint *, double *, blasint *, double *, double *, blasint *); | |||||
int BLASFUNC(sge2mm)(char *, char *, char *, blasint *, blasint *, | |||||
float *, float *, blasint *, float *, blasint *, | |||||
float *, float *, blasint *); | |||||
int BLASFUNC(dge2mm)(char *, char *, char *, blasint *, blasint *, | |||||
double *, double *, blasint *, double *, blasint *, | |||||
double *, double *, blasint *); | |||||
int BLASFUNC(cge2mm)(char *, char *, char *, blasint *, blasint *, | |||||
float *, float *, blasint *, float *, blasint *, | |||||
float *, float *, blasint *); | |||||
int BLASFUNC(zge2mm)(char *, char *, char *, blasint *, blasint *, | |||||
double *, double *, blasint *, double *, blasint *, | |||||
double *, double *, blasint *); | |||||
void BLASFUNC(strsm)(char *, char *, char *, char *, blasint *, blasint *, | |||||
float *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(dtrsm)(char *, char *, char *, char *, blasint *, blasint *, | |||||
double *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(qtrsm)(char *, char *, char *, char *, blasint *, blasint *, | |||||
xdouble *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(ctrsm)(char *, char *, char *, char *, blasint *, blasint *, | |||||
float *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(ztrsm)(char *, char *, char *, char *, blasint *, blasint *, | |||||
double *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(xtrsm)(char *, char *, char *, char *, blasint *, blasint *, | |||||
xdouble *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(strmm)(char *, char *, char *, char *, blasint *, blasint *, | |||||
float *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(dtrmm)(char *, char *, char *, char *, blasint *, blasint *, | |||||
double *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(qtrmm)(char *, char *, char *, char *, blasint *, blasint *, | |||||
xdouble *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(ctrmm)(char *, char *, char *, char *, blasint *, blasint *, | |||||
float *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(ztrmm)(char *, char *, char *, char *, blasint *, blasint *, | |||||
double *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(xtrmm)(char *, char *, char *, char *, blasint *, blasint *, | |||||
xdouble *, xdouble *, blasint *, xdouble *, blasint *); | |||||
void BLASFUNC(ssymm)(char *, char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(dsymm)(char *, char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(qsymm)(char *, char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(csymm)(char *, char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zsymm)(char *, char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xsymm)(char *, char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(csymm3m)(char *, char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zsymm3m)(char *, char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xsymm3m)(char *, char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(ssyrk)(char *, char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, float *, blasint *); | |||||
void BLASFUNC(dsyrk)(char *, char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, double *, blasint *); | |||||
void BLASFUNC(qsyrk)(char *, char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(csyrk)(char *, char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, float *, blasint *); | |||||
void BLASFUNC(zsyrk)(char *, char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, double *, blasint *); | |||||
void BLASFUNC(xsyrk)(char *, char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(ssyr2k)(char *, char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(dsyr2k)(char *, char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double*, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(qsyr2k)(char *, char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble*, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(csyr2k)(char *, char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zsyr2k)(char *, char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double*, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xsyr2k)(char *, char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble*, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(chemm)(char *, char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zhemm)(char *, char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xhemm)(char *, char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(chemm3m)(char *, char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zhemm3m)(char *, char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xhemm3m)(char *, char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(cherk)(char *, char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, float *, blasint *); | |||||
void BLASFUNC(zherk)(char *, char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, double *, blasint *); | |||||
void BLASFUNC(xherk)(char *, char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble *, xdouble *, blasint *); | |||||
void BLASFUNC(cher2k)(char *, char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zher2k)(char *, char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double*, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(xher2k)(char *, char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble*, blasint *, xdouble *, xdouble *, blasint *); | |||||
int BLASFUNC(cher2m)(char *, char *, char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *, float *, float *, blasint *); | |||||
int BLASFUNC(zher2m)(char *, char *, char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double*, blasint *, double *, double *, blasint *); | |||||
int BLASFUNC(xher2m)(char *, char *, char *, blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
xdouble*, blasint *, xdouble *, xdouble *, blasint *); | |||||
int BLASFUNC(sgemt)(char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *); | |||||
int BLASFUNC(dgemt)(char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *); | |||||
int BLASFUNC(cgemt)(char *, blasint *, blasint *, float *, float *, blasint *, | |||||
float *, blasint *); | |||||
int BLASFUNC(zgemt)(char *, blasint *, blasint *, double *, double *, blasint *, | |||||
double *, blasint *); | |||||
int BLASFUNC(sgema)(char *, char *, blasint *, blasint *, float *, | |||||
float *, blasint *, float *, float *, blasint *, float *, blasint *); | |||||
int BLASFUNC(dgema)(char *, char *, blasint *, blasint *, double *, | |||||
double *, blasint *, double*, double *, blasint *, double*, blasint *); | |||||
int BLASFUNC(cgema)(char *, char *, blasint *, blasint *, float *, | |||||
float *, blasint *, float *, float *, blasint *, float *, blasint *); | |||||
int BLASFUNC(zgema)(char *, char *, blasint *, blasint *, double *, | |||||
double *, blasint *, double*, double *, blasint *, double*, blasint *); | |||||
int BLASFUNC(sgems)(char *, char *, blasint *, blasint *, float *, | |||||
float *, blasint *, float *, float *, blasint *, float *, blasint *); | |||||
int BLASFUNC(dgems)(char *, char *, blasint *, blasint *, double *, | |||||
double *, blasint *, double*, double *, blasint *, double*, blasint *); | |||||
int BLASFUNC(cgems)(char *, char *, blasint *, blasint *, float *, | |||||
float *, blasint *, float *, float *, blasint *, float *, blasint *); | |||||
int BLASFUNC(zgems)(char *, char *, blasint *, blasint *, double *, | |||||
double *, blasint *, double*, double *, blasint *, double*, blasint *); | |||||
int BLASFUNC(sgemc)(char *, char *, blasint *, blasint *, blasint *, float *, | |||||
float *, blasint *, float *, blasint *, float *, blasint *, float *, float *, blasint *); | |||||
int BLASFUNC(dgemc)(char *, char *, blasint *, blasint *, blasint *, double *, | |||||
double *, blasint *, double *, blasint *, double *, blasint *, double *, double *, blasint *); | |||||
int BLASFUNC(qgemc)(char *, char *, blasint *, blasint *, blasint *, xdouble *, | |||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
int BLASFUNC(cgemc)(char *, char *, blasint *, blasint *, blasint *, float *, | |||||
float *, blasint *, float *, blasint *, float *, blasint *, float *, float *, blasint *); | |||||
int BLASFUNC(zgemc)(char *, char *, blasint *, blasint *, blasint *, double *, | |||||
double *, blasint *, double *, blasint *, double *, blasint *, double *, double *, blasint *); | |||||
int BLASFUNC(xgemc)(char *, char *, blasint *, blasint *, blasint *, xdouble *, | |||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||||
/* Lapack routines */ | |||||
int BLASFUNC(sgetf2)(blasint *, blasint *, float *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(dgetf2)(blasint *, blasint *, double *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(qgetf2)(blasint *, blasint *, xdouble *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(cgetf2)(blasint *, blasint *, float *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(zgetf2)(blasint *, blasint *, double *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(xgetf2)(blasint *, blasint *, xdouble *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(sgetrf)(blasint *, blasint *, float *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(dgetrf)(blasint *, blasint *, double *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(qgetrf)(blasint *, blasint *, xdouble *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(cgetrf)(blasint *, blasint *, float *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(zgetrf)(blasint *, blasint *, double *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(xgetrf)(blasint *, blasint *, xdouble *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(slaswp)(blasint *, float *, blasint *, blasint *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(dlaswp)(blasint *, double *, blasint *, blasint *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(qlaswp)(blasint *, xdouble *, blasint *, blasint *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(claswp)(blasint *, float *, blasint *, blasint *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(zlaswp)(blasint *, double *, blasint *, blasint *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(xlaswp)(blasint *, xdouble *, blasint *, blasint *, blasint *, blasint *, blasint *); | |||||
int BLASFUNC(sgetrs)(char *, blasint *, blasint *, float *, blasint *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(dgetrs)(char *, blasint *, blasint *, double *, blasint *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(qgetrs)(char *, blasint *, blasint *, xdouble *, blasint *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(cgetrs)(char *, blasint *, blasint *, float *, blasint *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(zgetrs)(char *, blasint *, blasint *, double *, blasint *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(xgetrs)(char *, blasint *, blasint *, xdouble *, blasint *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(sgesv)(blasint *, blasint *, float *, blasint *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(dgesv)(blasint *, blasint *, double *, blasint *, blasint *, double*, blasint *, blasint *); | |||||
int BLASFUNC(qgesv)(blasint *, blasint *, xdouble *, blasint *, blasint *, xdouble*, blasint *, blasint *); | |||||
int BLASFUNC(cgesv)(blasint *, blasint *, float *, blasint *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(zgesv)(blasint *, blasint *, double *, blasint *, blasint *, double*, blasint *, blasint *); | |||||
int BLASFUNC(xgesv)(blasint *, blasint *, xdouble *, blasint *, blasint *, xdouble*, blasint *, blasint *); | |||||
int BLASFUNC(spotf2)(char *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(dpotf2)(char *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(qpotf2)(char *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(cpotf2)(char *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(zpotf2)(char *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(xpotf2)(char *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(spotrf)(char *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(dpotrf)(char *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(qpotrf)(char *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(cpotrf)(char *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(zpotrf)(char *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(xpotrf)(char *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(spotri)(char *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(dpotri)(char *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(qpotri)(char *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(cpotri)(char *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(zpotri)(char *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(xpotri)(char *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(spotrs)(char *, blasint *, blasint *, float *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(dpotrs)(char *, blasint *, blasint *, double *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(qpotrs)(char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(cpotrs)(char *, blasint *, blasint *, float *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(zpotrs)(char *, blasint *, blasint *, double *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(xpotrs)(char *, blasint *, blasint *, xdouble *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(slauu2)(char *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(dlauu2)(char *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(qlauu2)(char *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(clauu2)(char *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(zlauu2)(char *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(xlauu2)(char *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(slauum)(char *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(dlauum)(char *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(qlauum)(char *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(clauum)(char *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(zlauum)(char *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(xlauum)(char *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(strti2)(char *, char *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(dtrti2)(char *, char *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(qtrti2)(char *, char *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(ctrti2)(char *, char *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(ztrti2)(char *, char *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(xtrti2)(char *, char *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(strtri)(char *, char *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(dtrtri)(char *, char *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(qtrtri)(char *, char *, blasint *, xdouble *, blasint *, blasint *); | |||||
int BLASFUNC(ctrtri)(char *, char *, blasint *, float *, blasint *, blasint *); | |||||
int BLASFUNC(ztrtri)(char *, char *, blasint *, double *, blasint *, blasint *); | |||||
int BLASFUNC(xtrtri)(char *, char *, blasint *, xdouble *, blasint *, blasint *); | |||||
FLOATRET BLASFUNC(slamch)(char *); | |||||
double BLASFUNC(dlamch)(char *); | |||||
xdouble BLASFUNC(qlamch)(char *); | |||||
FLOATRET BLASFUNC(slamc3)(float *, float *); | |||||
double BLASFUNC(dlamc3)(double *, double *); | |||||
xdouble BLASFUNC(qlamc3)(xdouble *, xdouble *); | |||||
/* BLAS extensions */ | |||||
void BLASFUNC(saxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(daxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(caxpby) (blasint *, void *, float *, blasint *, void *, float *, blasint *); | |||||
void BLASFUNC(zaxpby) (blasint *, void *, double *, blasint *, void *, double *, blasint *); | |||||
void BLASFUNC(somatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(domatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(comatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *); | |||||
void BLASFUNC(zomatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *); | |||||
void BLASFUNC(simatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *); | |||||
void BLASFUNC(dimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *); | |||||
void BLASFUNC(cimatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *); | |||||
void BLASFUNC(zimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *); | |||||
void BLASFUNC(sgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*); | |||||
void BLASFUNC(dgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*); | |||||
void BLASFUNC(cgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*); | |||||
void BLASFUNC(zgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*); | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif /* __cplusplus */ | |||||
#endif | |||||
#endif |
@@ -0,0 +1,159 @@ | |||||
/***************************************************************************** | |||||
Copyright (c) 2010, Intel Corp. | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are met: | |||||
* Redistributions of source code must retain the above copyright notice, | |||||
this list of conditions and the following disclaimer. | |||||
* Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in the | |||||
documentation and/or other materials provided with the distribution. | |||||
* Neither the name of Intel Corporation nor the names of its contributors | |||||
may be used to endorse or promote products derived from this software | |||||
without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | |||||
THE POSSIBILITY OF SUCH DAMAGE. | |||||
****************************************************************************** | |||||
* Contents: Native C interface to LAPACK | |||||
* Author: Intel Corporation | |||||
*****************************************************************************/ | |||||
#ifndef _LAPACKE_CONFIG_H_ | |||||
#define _LAPACKE_CONFIG_H_ | |||||
#ifdef __cplusplus | |||||
#if defined(LAPACK_COMPLEX_CPP) | |||||
#include <complex> | |||||
#endif | |||||
extern "C" { | |||||
#endif /* __cplusplus */ | |||||
#include <stdlib.h> | |||||
#include <stdint.h> | |||||
#include <inttypes.h> | |||||
#ifndef lapack_int | |||||
#if defined(LAPACK_ILP64) | |||||
#define lapack_int int64_t | |||||
#else | |||||
#define lapack_int int32_t | |||||
#endif | |||||
#endif | |||||
/* | |||||
* Integer format string | |||||
*/ | |||||
#ifndef LAPACK_IFMT | |||||
#if defined(LAPACK_ILP64) | |||||
#define LAPACK_IFMT PRId64 | |||||
#else | |||||
#define LAPACK_IFMT PRId32 | |||||
#endif | |||||
#endif | |||||
#ifndef lapack_logical | |||||
#define lapack_logical lapack_int | |||||
#endif | |||||
#if defined(_MSC_VER) && defined(__INTEL_CLANG_COMPILER) | |||||
#define LAPACK_COMPLEX_STRUCTURE | |||||
#define LAPACK_GLOBAL(lcname,UCNAME) lcname | |||||
#define NOCHANGE | |||||
#endif | |||||
#ifndef LAPACK_COMPLEX_CUSTOM | |||||
#if defined(_MSC_VER) && !defined(__INTEL_CLANG_COMPILER) | |||||
#if defined(LAPACK_COMPLEX_CPP) | |||||
#include <complex> | |||||
#define lapack_complex_float std::complex<float> | |||||
#define lapack_complex_double std::complex<double> | |||||
#define lapack_complex_float_real(z) ((z).real()) | |||||
#define lapack_complex_float_imag(z) ((z).imag()) | |||||
#define lapack_complex_double_real(z) ((z).real()) | |||||
#define lapack_complex_double_imag(z) ((z).imag()) | |||||
#define _CRT_USE_C_COMPLEX_H | |||||
#else | |||||
#include <complex.h> | |||||
#define LAPACK_COMPLEX_CUSTOM | |||||
#define lapack_complex_float _Fcomplex | |||||
#define lapack_complex_double _Dcomplex | |||||
#define lapack_complex_float_real(z) (creal(z)) | |||||
#define lapack_complex_float_imag(z) (cimag(z)) | |||||
#define lapack_complex_double_real(z) (creal(z)) | |||||
#define lapack_complex_double_imag(z) (cimag(z)) | |||||
#endif | |||||
#else | |||||
#if defined(LAPACK_COMPLEX_STRUCTURE) | |||||
typedef struct { float real, imag; } _lapack_complex_float; | |||||
typedef struct { double real, imag; } _lapack_complex_double; | |||||
#define lapack_complex_float _lapack_complex_float | |||||
#define lapack_complex_double _lapack_complex_double | |||||
#define lapack_complex_float_real(z) ((z).real) | |||||
#define lapack_complex_float_imag(z) ((z).imag) | |||||
#define lapack_complex_double_real(z) ((z).real) | |||||
#define lapack_complex_double_imag(z) ((z).imag) | |||||
#elif defined(LAPACK_COMPLEX_C99) | |||||
#include <complex.h> | |||||
#define lapack_complex_float float _Complex | |||||
#define lapack_complex_double double _Complex | |||||
#define lapack_complex_float_real(z) (creal(z)) | |||||
#define lapack_complex_float_imag(z) (cimag(z)) | |||||
#define lapack_complex_double_real(z) (creal(z)) | |||||
#define lapack_complex_double_imag(z) (cimag(z)) | |||||
#elif defined(LAPACK_COMPLEX_CPP) | |||||
#define lapack_complex_float std::complex<float> | |||||
#define lapack_complex_double std::complex<double> | |||||
#define lapack_complex_float_real(z) ((z).real()) | |||||
#define lapack_complex_float_imag(z) ((z).imag()) | |||||
#define lapack_complex_double_real(z) ((z).real()) | |||||
#define lapack_complex_double_imag(z) ((z).imag()) | |||||
#else | |||||
#include <complex.h> | |||||
#define lapack_complex_float float _Complex | |||||
#define lapack_complex_double double _Complex | |||||
#define lapack_complex_float_real(z) (creal(z)) | |||||
#define lapack_complex_float_imag(z) (cimag(z)) | |||||
#define lapack_complex_double_real(z) (creal(z)) | |||||
#define lapack_complex_double_imag(z) (cimag(z)) | |||||
#endif | |||||
#endif | |||||
lapack_complex_float lapack_make_complex_float( float re, float im ); | |||||
lapack_complex_double lapack_make_complex_double( double re, double im ); | |||||
#endif | |||||
#ifndef LAPACK_malloc | |||||
#define LAPACK_malloc( size ) malloc( size ) | |||||
#endif | |||||
#ifndef LAPACK_free | |||||
#define LAPACK_free( p ) free( p ) | |||||
#endif | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif /* __cplusplus */ | |||||
#endif /* _LAPACKE_CONFIG_H_ */ |
@@ -0,0 +1,17 @@ | |||||
#ifndef LAPACK_HEADER_INCLUDED | |||||
#define LAPACK_HEADER_INCLUDED | |||||
#ifndef LAPACK_GLOBAL | |||||
#if defined(LAPACK_GLOBAL_PATTERN_LC) || defined(ADD_) | |||||
#define LAPACK_GLOBAL(lcname,UCNAME) lcname##_ | |||||
#elif defined(LAPACK_GLOBAL_PATTERN_UC) || defined(UPPER) | |||||
#define LAPACK_GLOBAL(lcname,UCNAME) UCNAME | |||||
#elif defined(LAPACK_GLOBAL_PATTERN_MC) || defined(NOCHANGE) | |||||
#define LAPACK_GLOBAL(lcname,UCNAME) lcname | |||||
#else | |||||
#define LAPACK_GLOBAL(lcname,UCNAME) lcname##_ | |||||
#endif | |||||
#endif | |||||
#endif | |||||
@@ -0,0 +1,612 @@ | |||||
/***************************************************************************** | |||||
Copyright (c) 2014, Intel Corp. | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are met: | |||||
* Redistributions of source code must retain the above copyright notice, | |||||
this list of conditions and the following disclaimer. | |||||
* Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in the | |||||
documentation and/or other materials provided with the distribution. | |||||
* Neither the name of Intel Corporation nor the names of its contributors | |||||
may be used to endorse or promote products derived from this software | |||||
without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | |||||
THE POSSIBILITY OF SUCH DAMAGE. | |||||
****************************************************************************** | |||||
* Contents: Native C interface to LAPACK utility functions | |||||
* Author: Intel Corporation | |||||
*****************************************************************************/ | |||||
#ifndef _LAPACKE_UTILS_H_ | |||||
#define _LAPACKE_UTILS_H_ | |||||
#include "lapacke.h" | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif /* __cplusplus */ | |||||
#ifndef ABS | |||||
#define ABS(x) (((x) < 0) ? -(x) : (x)) | |||||
#endif | |||||
#ifndef MAX | |||||
#define MAX(x,y) (((x) > (y)) ? (x) : (y)) | |||||
#endif | |||||
#ifndef MIN | |||||
#define MIN(x,y) (((x) < (y)) ? (x) : (y)) | |||||
#endif | |||||
#ifndef MAX3 | |||||
#define MAX3(x,y,z) (((x) > MAX(y,z)) ? (x) : MAX(y,z)) | |||||
#endif | |||||
#ifndef MIN3 | |||||
#define MIN3(x,y,z) (((x) < MIN(y,z)) ? (x) : MIN(y,z)) | |||||
#endif | |||||
#define IS_S_NONZERO(x) ( (x) < 0 || (x) > 0 ) | |||||
#define IS_D_NONZERO(x) ( (x) < 0 || (x) > 0 ) | |||||
#define IS_C_NONZERO(x) ( IS_S_NONZERO(*((float*)&x)) || \ | |||||
IS_S_NONZERO(*(((float*)&x)+1)) ) | |||||
#define IS_Z_NONZERO(x) ( IS_D_NONZERO(*((double*)&x)) || \ | |||||
IS_D_NONZERO(*(((double*)&x)+1)) ) | |||||
/* Error handler */ | |||||
void LAPACKE_xerbla( const char *name, lapack_int info ); | |||||
/* Compare two chars (case-insensitive) */ | |||||
lapack_logical LAPACKE_lsame( char ca, char cb ) | |||||
#if defined __GNUC__ | |||||
__attribute__((const)) | |||||
#endif | |||||
; | |||||
/* Functions to convert column-major to row-major 2d arrays and vice versa. */ | |||||
void LAPACKE_cgb_trans( int matrix_layout, lapack_int m, lapack_int n, | |||||
lapack_int kl, lapack_int ku, | |||||
const lapack_complex_float *in, lapack_int ldin, | |||||
lapack_complex_float *out, lapack_int ldout ); | |||||
void LAPACKE_cge_trans( int matrix_layout, lapack_int m, lapack_int n, | |||||
const lapack_complex_float* in, lapack_int ldin, | |||||
lapack_complex_float* out, lapack_int ldout ); | |||||
void LAPACKE_cgg_trans( int matrix_layout, lapack_int m, lapack_int n, | |||||
const lapack_complex_float* in, lapack_int ldin, | |||||
lapack_complex_float* out, lapack_int ldout ); | |||||
void LAPACKE_chb_trans( int matrix_layout, char uplo, lapack_int n, | |||||
lapack_int kd, | |||||
const lapack_complex_float *in, lapack_int ldin, | |||||
lapack_complex_float *out, lapack_int ldout ); | |||||
void LAPACKE_che_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const lapack_complex_float *in, lapack_int ldin, | |||||
lapack_complex_float *out, lapack_int ldout ); | |||||
void LAPACKE_chp_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const lapack_complex_float *in, | |||||
lapack_complex_float *out ); | |||||
void LAPACKE_chs_trans( int matrix_layout, lapack_int n, | |||||
const lapack_complex_float *in, lapack_int ldin, | |||||
lapack_complex_float *out, lapack_int ldout ); | |||||
void LAPACKE_cpb_trans( int matrix_layout, char uplo, lapack_int n, | |||||
lapack_int kd, | |||||
const lapack_complex_float *in, lapack_int ldin, | |||||
lapack_complex_float *out, lapack_int ldout ); | |||||
void LAPACKE_cpf_trans( int matrix_layout, char transr, char uplo, | |||||
lapack_int n, const lapack_complex_float *in, | |||||
lapack_complex_float *out ); | |||||
void LAPACKE_cpo_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const lapack_complex_float *in, lapack_int ldin, | |||||
lapack_complex_float *out, lapack_int ldout ); | |||||
void LAPACKE_cpp_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const lapack_complex_float *in, | |||||
lapack_complex_float *out ); | |||||
void LAPACKE_csp_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const lapack_complex_float *in, | |||||
lapack_complex_float *out ); | |||||
void LAPACKE_csy_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const lapack_complex_float *in, lapack_int ldin, | |||||
lapack_complex_float *out, lapack_int ldout ); | |||||
void LAPACKE_ctb_trans( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, lapack_int kd, | |||||
const lapack_complex_float *in, lapack_int ldin, | |||||
lapack_complex_float *out, lapack_int ldout ); | |||||
void LAPACKE_ctf_trans( int matrix_layout, char transr, char uplo, char diag, | |||||
lapack_int n, const lapack_complex_float *in, | |||||
lapack_complex_float *out ); | |||||
void LAPACKE_ctp_trans( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, const lapack_complex_float *in, | |||||
lapack_complex_float *out ); | |||||
void LAPACKE_ctr_trans( int matrix_layout, char uplo, char diag, lapack_int n, | |||||
const lapack_complex_float *in, lapack_int ldin, | |||||
lapack_complex_float *out, lapack_int ldout ); | |||||
void LAPACKE_ctz_trans( int matrix_layout, char direct, char uplo, | |||||
char diag, lapack_int m, lapack_int n, | |||||
const lapack_complex_float *in, lapack_int ldin, | |||||
lapack_complex_float *out, lapack_int ldout ); | |||||
void LAPACKE_dgb_trans( int matrix_layout, lapack_int m, lapack_int n, | |||||
lapack_int kl, lapack_int ku, | |||||
const double *in, lapack_int ldin, | |||||
double *out, lapack_int ldout ); | |||||
void LAPACKE_dge_trans( int matrix_layout, lapack_int m, lapack_int n, | |||||
const double* in, lapack_int ldin, | |||||
double* out, lapack_int ldout ); | |||||
void LAPACKE_dgg_trans( int matrix_layout, lapack_int m, lapack_int n, | |||||
const double* in, lapack_int ldin, | |||||
double* out, lapack_int ldout ); | |||||
void LAPACKE_dhs_trans( int matrix_layout, lapack_int n, | |||||
const double *in, lapack_int ldin, | |||||
double *out, lapack_int ldout ); | |||||
void LAPACKE_dpb_trans( int matrix_layout, char uplo, lapack_int n, | |||||
lapack_int kd, | |||||
const double *in, lapack_int ldin, | |||||
double *out, lapack_int ldout ); | |||||
void LAPACKE_dpf_trans( int matrix_layout, char transr, char uplo, | |||||
lapack_int n, const double *in, | |||||
double *out ); | |||||
void LAPACKE_dpo_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const double *in, lapack_int ldin, | |||||
double *out, lapack_int ldout ); | |||||
void LAPACKE_dpp_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const double *in, | |||||
double *out ); | |||||
void LAPACKE_dsb_trans( int matrix_layout, char uplo, lapack_int n, | |||||
lapack_int kd, | |||||
const double *in, lapack_int ldin, | |||||
double *out, lapack_int ldout ); | |||||
void LAPACKE_dsp_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const double *in, | |||||
double *out ); | |||||
void LAPACKE_dsy_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const double *in, lapack_int ldin, | |||||
double *out, lapack_int ldout ); | |||||
void LAPACKE_dtb_trans( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, lapack_int kd, | |||||
const double *in, lapack_int ldin, | |||||
double *out, lapack_int ldout ); | |||||
void LAPACKE_dtf_trans( int matrix_layout, char transr, char uplo, char diag, | |||||
lapack_int n, const double *in, | |||||
double *out ); | |||||
void LAPACKE_dtp_trans( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, const double *in, | |||||
double *out ); | |||||
void LAPACKE_dtr_trans( int matrix_layout, char uplo, char diag, lapack_int n, | |||||
const double *in, lapack_int ldin, | |||||
double *out, lapack_int ldout ); | |||||
void LAPACKE_dtz_trans( int matrix_layout, char direct, char uplo, | |||||
char diag, lapack_int m, lapack_int n, | |||||
const double *in, lapack_int ldin, | |||||
double *out, lapack_int ldout ); | |||||
void LAPACKE_sgb_trans( int matrix_layout, lapack_int m, lapack_int n, | |||||
lapack_int kl, lapack_int ku, | |||||
const float *in, lapack_int ldin, | |||||
float *out, lapack_int ldout ); | |||||
void LAPACKE_sge_trans( int matrix_layout, lapack_int m, lapack_int n, | |||||
const float* in, lapack_int ldin, | |||||
float* out, lapack_int ldout ); | |||||
void LAPACKE_sgg_trans( int matrix_layout, lapack_int m, lapack_int n, | |||||
const float* in, lapack_int ldin, | |||||
float* out, lapack_int ldout ); | |||||
void LAPACKE_shs_trans( int matrix_layout, lapack_int n, | |||||
const float *in, lapack_int ldin, | |||||
float *out, lapack_int ldout ); | |||||
void LAPACKE_spb_trans( int matrix_layout, char uplo, lapack_int n, | |||||
lapack_int kd, | |||||
const float *in, lapack_int ldin, | |||||
float *out, lapack_int ldout ); | |||||
void LAPACKE_spf_trans( int matrix_layout, char transr, char uplo, | |||||
lapack_int n, const float *in, | |||||
float *out ); | |||||
void LAPACKE_spo_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const float *in, lapack_int ldin, | |||||
float *out, lapack_int ldout ); | |||||
void LAPACKE_spp_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const float *in, | |||||
float *out ); | |||||
void LAPACKE_ssb_trans( int matrix_layout, char uplo, lapack_int n, | |||||
lapack_int kd, | |||||
const float *in, lapack_int ldin, | |||||
float *out, lapack_int ldout ); | |||||
void LAPACKE_ssp_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const float *in, | |||||
float *out ); | |||||
void LAPACKE_ssy_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const float *in, lapack_int ldin, | |||||
float *out, lapack_int ldout ); | |||||
void LAPACKE_stb_trans( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, lapack_int kd, | |||||
const float *in, lapack_int ldin, | |||||
float *out, lapack_int ldout ); | |||||
void LAPACKE_stf_trans( int matrix_layout, char transr, char uplo, char diag, | |||||
lapack_int n, const float *in, | |||||
float *out ); | |||||
void LAPACKE_stp_trans( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, const float *in, | |||||
float *out ); | |||||
void LAPACKE_str_trans( int matrix_layout, char uplo, char diag, lapack_int n, | |||||
const float *in, lapack_int ldin, | |||||
float *out, lapack_int ldout ); | |||||
void LAPACKE_stz_trans( int matrix_layout, char direct, char uplo, | |||||
char diag, lapack_int m, lapack_int n, | |||||
const float *in, lapack_int ldin, | |||||
float *out, lapack_int ldout ); | |||||
void LAPACKE_zgb_trans( int matrix_layout, lapack_int m, lapack_int n, | |||||
lapack_int kl, lapack_int ku, | |||||
const lapack_complex_double *in, lapack_int ldin, | |||||
lapack_complex_double *out, lapack_int ldout ); | |||||
void LAPACKE_zge_trans( int matrix_layout, lapack_int m, lapack_int n, | |||||
const lapack_complex_double* in, lapack_int ldin, | |||||
lapack_complex_double* out, lapack_int ldout ); | |||||
void LAPACKE_zgg_trans( int matrix_layout, lapack_int m, lapack_int n, | |||||
const lapack_complex_double* in, lapack_int ldin, | |||||
lapack_complex_double* out, lapack_int ldout ); | |||||
void LAPACKE_zhb_trans( int matrix_layout, char uplo, lapack_int n, | |||||
lapack_int kd, | |||||
const lapack_complex_double *in, lapack_int ldin, | |||||
lapack_complex_double *out, lapack_int ldout ); | |||||
void LAPACKE_zhe_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const lapack_complex_double *in, lapack_int ldin, | |||||
lapack_complex_double *out, lapack_int ldout ); | |||||
void LAPACKE_zhp_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const lapack_complex_double *in, | |||||
lapack_complex_double *out ); | |||||
void LAPACKE_zhs_trans( int matrix_layout, lapack_int n, | |||||
const lapack_complex_double *in, lapack_int ldin, | |||||
lapack_complex_double *out, lapack_int ldout ); | |||||
void LAPACKE_zpb_trans( int matrix_layout, char uplo, lapack_int n, | |||||
lapack_int kd, | |||||
const lapack_complex_double *in, lapack_int ldin, | |||||
lapack_complex_double *out, lapack_int ldout ); | |||||
void LAPACKE_zpf_trans( int matrix_layout, char transr, char uplo, | |||||
lapack_int n, const lapack_complex_double *in, | |||||
lapack_complex_double *out ); | |||||
void LAPACKE_zpo_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const lapack_complex_double *in, lapack_int ldin, | |||||
lapack_complex_double *out, lapack_int ldout ); | |||||
void LAPACKE_zpp_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const lapack_complex_double *in, | |||||
lapack_complex_double *out ); | |||||
void LAPACKE_zsp_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const lapack_complex_double *in, | |||||
lapack_complex_double *out ); | |||||
void LAPACKE_zsy_trans( int matrix_layout, char uplo, lapack_int n, | |||||
const lapack_complex_double *in, lapack_int ldin, | |||||
lapack_complex_double *out, lapack_int ldout ); | |||||
void LAPACKE_ztb_trans( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, lapack_int kd, | |||||
const lapack_complex_double *in, lapack_int ldin, | |||||
lapack_complex_double *out, lapack_int ldout ); | |||||
void LAPACKE_ztf_trans( int matrix_layout, char transr, char uplo, char diag, | |||||
lapack_int n, const lapack_complex_double *in, | |||||
lapack_complex_double *out ); | |||||
void LAPACKE_ztp_trans( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, const lapack_complex_double *in, | |||||
lapack_complex_double *out ); | |||||
void LAPACKE_ztr_trans( int matrix_layout, char uplo, char diag, lapack_int n, | |||||
const lapack_complex_double *in, lapack_int ldin, | |||||
lapack_complex_double *out, lapack_int ldout ); | |||||
void LAPACKE_ztz_trans( int matrix_layout, char direct, char uplo, | |||||
char diag, lapack_int m, lapack_int n, | |||||
const lapack_complex_double *in, lapack_int ldin, | |||||
lapack_complex_double *out, lapack_int ldout ); | |||||
/* NaN checkers */ | |||||
#define LAPACK_SISNAN( x ) ( x != x ) | |||||
#define LAPACK_DISNAN( x ) ( x != x ) | |||||
#define LAPACK_CISNAN( x ) ( LAPACK_SISNAN(*((float*) &x)) || \ | |||||
LAPACK_SISNAN(*(((float*) &x)+1)) ) | |||||
#define LAPACK_ZISNAN( x ) ( LAPACK_DISNAN(*((double*)&x)) || \ | |||||
LAPACK_DISNAN(*(((double*)&x)+1)) ) | |||||
/* NaN checkers for vectors */ | |||||
lapack_logical LAPACKE_c_nancheck( lapack_int n, | |||||
const lapack_complex_float *x, | |||||
lapack_int incx ); | |||||
lapack_logical LAPACKE_d_nancheck( lapack_int n, | |||||
const double *x, | |||||
lapack_int incx ); | |||||
lapack_logical LAPACKE_s_nancheck( lapack_int n, | |||||
const float *x, | |||||
lapack_int incx ); | |||||
lapack_logical LAPACKE_z_nancheck( lapack_int n, | |||||
const lapack_complex_double *x, | |||||
lapack_int incx ); | |||||
/* NaN checkers for matrices */ | |||||
lapack_logical LAPACKE_cgb_nancheck( int matrix_layout, lapack_int m, | |||||
lapack_int n, lapack_int kl, | |||||
lapack_int ku, | |||||
const lapack_complex_float *ab, | |||||
lapack_int ldab ); | |||||
lapack_logical LAPACKE_cge_nancheck( int matrix_layout, lapack_int m, | |||||
lapack_int n, | |||||
const lapack_complex_float *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_cgg_nancheck( int matrix_layout, lapack_int m, | |||||
lapack_int n, | |||||
const lapack_complex_float *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_cgt_nancheck( lapack_int n, | |||||
const lapack_complex_float *dl, | |||||
const lapack_complex_float *d, | |||||
const lapack_complex_float *du ); | |||||
lapack_logical LAPACKE_chb_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, lapack_int kd, | |||||
const lapack_complex_float* ab, | |||||
lapack_int ldab ); | |||||
lapack_logical LAPACKE_che_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, | |||||
const lapack_complex_float *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_chp_nancheck( lapack_int n, | |||||
const lapack_complex_float *ap ); | |||||
lapack_logical LAPACKE_chs_nancheck( int matrix_layout, lapack_int n, | |||||
const lapack_complex_float *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_cpb_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, lapack_int kd, | |||||
const lapack_complex_float* ab, | |||||
lapack_int ldab ); | |||||
lapack_logical LAPACKE_cpf_nancheck( lapack_int n, | |||||
const lapack_complex_float *a ); | |||||
lapack_logical LAPACKE_cpo_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, | |||||
const lapack_complex_float *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_cpp_nancheck( lapack_int n, | |||||
const lapack_complex_float *ap ); | |||||
lapack_logical LAPACKE_cpt_nancheck( lapack_int n, | |||||
const float *d, | |||||
const lapack_complex_float *e ); | |||||
lapack_logical LAPACKE_csp_nancheck( lapack_int n, | |||||
const lapack_complex_float *ap ); | |||||
lapack_logical LAPACKE_cst_nancheck( lapack_int n, | |||||
const lapack_complex_float *d, | |||||
const lapack_complex_float *e ); | |||||
lapack_logical LAPACKE_csy_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, | |||||
const lapack_complex_float *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_ctb_nancheck( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, lapack_int kd, | |||||
const lapack_complex_float* ab, | |||||
lapack_int ldab ); | |||||
lapack_logical LAPACKE_ctf_nancheck( int matrix_layout, char transr, | |||||
char uplo, char diag, | |||||
lapack_int n, | |||||
const lapack_complex_float *a ); | |||||
lapack_logical LAPACKE_ctp_nancheck( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, | |||||
const lapack_complex_float *ap ); | |||||
lapack_logical LAPACKE_ctr_nancheck( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, | |||||
const lapack_complex_float *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_ctz_nancheck( int matrix_layout, char direct, char uplo, | |||||
char diag, lapack_int m, lapack_int n, | |||||
const lapack_complex_float *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_dgb_nancheck( int matrix_layout, lapack_int m, | |||||
lapack_int n, lapack_int kl, | |||||
lapack_int ku, | |||||
const double *ab, | |||||
lapack_int ldab ); | |||||
lapack_logical LAPACKE_dge_nancheck( int matrix_layout, lapack_int m, | |||||
lapack_int n, | |||||
const double *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_dgg_nancheck( int matrix_layout, lapack_int m, | |||||
lapack_int n, | |||||
const double *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_dgt_nancheck( lapack_int n, | |||||
const double *dl, | |||||
const double *d, | |||||
const double *du ); | |||||
lapack_logical LAPACKE_dhs_nancheck( int matrix_layout, lapack_int n, | |||||
const double *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_dpb_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, lapack_int kd, | |||||
const double* ab, | |||||
lapack_int ldab ); | |||||
lapack_logical LAPACKE_dpf_nancheck( lapack_int n, | |||||
const double *a ); | |||||
lapack_logical LAPACKE_dpo_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, | |||||
const double *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_dpp_nancheck( lapack_int n, | |||||
const double *ap ); | |||||
lapack_logical LAPACKE_dpt_nancheck( lapack_int n, | |||||
const double *d, | |||||
const double *e ); | |||||
lapack_logical LAPACKE_dsb_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, lapack_int kd, | |||||
const double* ab, | |||||
lapack_int ldab ); | |||||
lapack_logical LAPACKE_dsp_nancheck( lapack_int n, | |||||
const double *ap ); | |||||
lapack_logical LAPACKE_dst_nancheck( lapack_int n, | |||||
const double *d, | |||||
const double *e ); | |||||
lapack_logical LAPACKE_dsy_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, | |||||
const double *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_dtb_nancheck( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, lapack_int kd, | |||||
const double* ab, | |||||
lapack_int ldab ); | |||||
lapack_logical LAPACKE_dtf_nancheck( int matrix_layout, char transr, | |||||
char uplo, char diag, | |||||
lapack_int n, | |||||
const double *a ); | |||||
lapack_logical LAPACKE_dtp_nancheck( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, | |||||
const double *ap ); | |||||
lapack_logical LAPACKE_dtr_nancheck( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, | |||||
const double *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_dtz_nancheck( int matrix_layout, char direct, char uplo, | |||||
char diag, lapack_int m, lapack_int n, | |||||
const double *a, lapack_int lda ); | |||||
lapack_logical LAPACKE_sgb_nancheck( int matrix_layout, lapack_int m, | |||||
lapack_int n, lapack_int kl, | |||||
lapack_int ku, | |||||
const float *ab, | |||||
lapack_int ldab ); | |||||
lapack_logical LAPACKE_sge_nancheck( int matrix_layout, lapack_int m, | |||||
lapack_int n, | |||||
const float *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_sgg_nancheck( int matrix_layout, lapack_int m, | |||||
lapack_int n, | |||||
const float *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_sgt_nancheck( lapack_int n, | |||||
const float *dl, | |||||
const float *d, | |||||
const float *du ); | |||||
lapack_logical LAPACKE_shs_nancheck( int matrix_layout, lapack_int n, | |||||
const float *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_spb_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, lapack_int kd, | |||||
const float* ab, | |||||
lapack_int ldab ); | |||||
lapack_logical LAPACKE_spf_nancheck( lapack_int n, | |||||
const float *a ); | |||||
lapack_logical LAPACKE_spo_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, | |||||
const float *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_spp_nancheck( lapack_int n, | |||||
const float *ap ); | |||||
lapack_logical LAPACKE_spt_nancheck( lapack_int n, | |||||
const float *d, | |||||
const float *e ); | |||||
lapack_logical LAPACKE_ssb_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, lapack_int kd, | |||||
const float* ab, | |||||
lapack_int ldab ); | |||||
lapack_logical LAPACKE_ssp_nancheck( lapack_int n, | |||||
const float *ap ); | |||||
lapack_logical LAPACKE_sst_nancheck( lapack_int n, | |||||
const float *d, | |||||
const float *e ); | |||||
lapack_logical LAPACKE_ssy_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, | |||||
const float *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_stb_nancheck( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, lapack_int kd, | |||||
const float* ab, | |||||
lapack_int ldab ); | |||||
lapack_logical LAPACKE_stf_nancheck( int matrix_layout, char transr, | |||||
char uplo, char diag, | |||||
lapack_int n, | |||||
const float *a ); | |||||
lapack_logical LAPACKE_stp_nancheck( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, | |||||
const float *ap ); | |||||
lapack_logical LAPACKE_str_nancheck( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, | |||||
const float *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_stz_nancheck( int matrix_layout, char direct, char uplo, | |||||
char diag, lapack_int m, lapack_int n, | |||||
const float *a, lapack_int lda ); | |||||
lapack_logical LAPACKE_zgb_nancheck( int matrix_layout, lapack_int m, | |||||
lapack_int n, lapack_int kl, | |||||
lapack_int ku, | |||||
const lapack_complex_double *ab, | |||||
lapack_int ldab ); | |||||
lapack_logical LAPACKE_zge_nancheck( int matrix_layout, lapack_int m, | |||||
lapack_int n, | |||||
const lapack_complex_double *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_zgg_nancheck( int matrix_layout, lapack_int m, | |||||
lapack_int n, | |||||
const lapack_complex_double *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_zgt_nancheck( lapack_int n, | |||||
const lapack_complex_double *dl, | |||||
const lapack_complex_double *d, | |||||
const lapack_complex_double *du ); | |||||
lapack_logical LAPACKE_zhb_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, lapack_int kd, | |||||
const lapack_complex_double* ab, | |||||
lapack_int ldab ); | |||||
lapack_logical LAPACKE_zhe_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, | |||||
const lapack_complex_double *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_zhp_nancheck( lapack_int n, | |||||
const lapack_complex_double *ap ); | |||||
lapack_logical LAPACKE_zhs_nancheck( int matrix_layout, lapack_int n, | |||||
const lapack_complex_double *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_zpb_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, lapack_int kd, | |||||
const lapack_complex_double* ab, | |||||
lapack_int ldab ); | |||||
lapack_logical LAPACKE_zpf_nancheck( lapack_int n, | |||||
const lapack_complex_double *a ); | |||||
lapack_logical LAPACKE_zpo_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, | |||||
const lapack_complex_double *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_zpp_nancheck( lapack_int n, | |||||
const lapack_complex_double *ap ); | |||||
lapack_logical LAPACKE_zpt_nancheck( lapack_int n, | |||||
const double *d, | |||||
const lapack_complex_double *e ); | |||||
lapack_logical LAPACKE_zsp_nancheck( lapack_int n, | |||||
const lapack_complex_double *ap ); | |||||
lapack_logical LAPACKE_zst_nancheck( lapack_int n, | |||||
const lapack_complex_double *d, | |||||
const lapack_complex_double *e ); | |||||
lapack_logical LAPACKE_zsy_nancheck( int matrix_layout, char uplo, | |||||
lapack_int n, | |||||
const lapack_complex_double *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_ztb_nancheck( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, lapack_int kd, | |||||
const lapack_complex_double* ab, | |||||
lapack_int ldab ); | |||||
lapack_logical LAPACKE_ztf_nancheck( int matrix_layout, char transr, | |||||
char uplo, char diag, | |||||
lapack_int n, | |||||
const lapack_complex_double *a ); | |||||
lapack_logical LAPACKE_ztp_nancheck( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, | |||||
const lapack_complex_double *ap ); | |||||
lapack_logical LAPACKE_ztr_nancheck( int matrix_layout, char uplo, char diag, | |||||
lapack_int n, | |||||
const lapack_complex_double *a, | |||||
lapack_int lda ); | |||||
lapack_logical LAPACKE_ztz_nancheck( int matrix_layout, char direct, char uplo, | |||||
char diag, lapack_int m, lapack_int n, | |||||
const lapack_complex_double *a, | |||||
lapack_int lda ); | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif /* __cplusplus */ | |||||
#endif /* _LAPACKE_UTILS_H_ */ |
@@ -0,0 +1,136 @@ | |||||
#ifndef OPENBLAS_CONFIG_H | |||||
#define OPENBLAS_CONFIG_H | |||||
#define OPENBLAS_OS_LINUX 1 | |||||
#define OPENBLAS_ARCH_RISCV64 1 | |||||
#define OPENBLAS_C_GCC 1 | |||||
#define OPENBLAS___64BIT__ 1 | |||||
#define OPENBLAS_HAVE_C11 1 | |||||
#define OPENBLAS_PTHREAD_CREATE_FUNC pthread_create | |||||
#define OPENBLAS_BUNDERSCORE _ | |||||
#define OPENBLAS_NEEDBUNDERSCORE 1 | |||||
#define OPENBLAS_RISCV64_ZVL128B | |||||
#define OPENBLAS_L1_DATA_SIZE 32768 | |||||
#define OPENBLAS_L1_DATA_LINESIZE 32 | |||||
#define OPENBLAS_L2_SIZE 1048576 | |||||
#define OPENBLAS_L2_LINESIZE 32 | |||||
#define OPENBLAS_DTB_DEFAULT_ENTRIES 128 | |||||
#define OPENBLAS_DTB_SIZE 4096 | |||||
#define OPENBLAS_L2_ASSOCIATIVE 4 | |||||
#define OPENBLAS_CORE_RISCV64_ZVL128B | |||||
#define OPENBLAS_CHAR_CORENAME "RISCV64_ZVL128B" | |||||
#define OPENBLAS_GEMM_MULTITHREAD_THRESHOLD 4 | |||||
#define OPENBLAS_VERSION " OpenBLAS 0.3.29.dev " | |||||
/*This is only for "make install" target.*/ | |||||
#if defined(OPENBLAS_OS_WINNT) || defined(OPENBLAS_OS_CYGWIN_NT) || defined(OPENBLAS_OS_INTERIX) | |||||
#define OPENBLAS_WINDOWS_ABI | |||||
#define OPENBLAS_OS_WINDOWS | |||||
#ifdef DOUBLE | |||||
#define DOUBLE_DEFINED DOUBLE | |||||
#undef DOUBLE | |||||
#endif | |||||
#endif | |||||
#ifdef OPENBLAS_NEEDBUNDERSCORE | |||||
#define BLASFUNC(FUNC) FUNC##_ | |||||
#else | |||||
#define BLASFUNC(FUNC) FUNC | |||||
#endif | |||||
#ifdef OPENBLAS_QUAD_PRECISION | |||||
typedef struct { | |||||
unsigned long x[2]; | |||||
} xdouble; | |||||
#elif defined OPENBLAS_EXPRECISION | |||||
#define xdouble long double | |||||
#else | |||||
#define xdouble double | |||||
#endif | |||||
#if defined(OPENBLAS_OS_WINDOWS) && defined(OPENBLAS___64BIT__) | |||||
typedef long long BLASLONG; | |||||
typedef unsigned long long BLASULONG; | |||||
#else | |||||
typedef long BLASLONG; | |||||
typedef unsigned long BLASULONG; | |||||
#endif | |||||
#ifndef BFLOAT16 | |||||
#include <stdint.h> | |||||
typedef uint16_t bfloat16; | |||||
#endif | |||||
#if defined(__GNUC__) && (__GNUC__ >= 12) | |||||
typedef _Float16 hfloat16; | |||||
#else | |||||
#include <stdint.h> | |||||
typedef uint16_t hfloat16; | |||||
#endif | |||||
#ifdef OPENBLAS_USE64BITINT | |||||
typedef BLASLONG blasint; | |||||
#else | |||||
typedef int blasint; | |||||
#endif | |||||
#if defined(XDOUBLE) || defined(DOUBLE) | |||||
#define FLOATRET FLOAT | |||||
#else | |||||
#ifdef NEED_F2CCONV | |||||
#define FLOATRET double | |||||
#else | |||||
#define FLOATRET float | |||||
#endif | |||||
#endif | |||||
/* Inclusion of a standard header file is needed for definition of __STDC_* | |||||
predefined macros with some compilers (e.g. GCC 4.7 on Linux). This occurs | |||||
as a side effect of including either <features.h> or <stdc-predef.h>. */ | |||||
#include <stdio.h> | |||||
/* C99 supports complex floating numbers natively, which GCC also offers as an | |||||
extension since version 3.0. If neither are available, use a compatible | |||||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ | |||||
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ | |||||
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) && !defined(_MSC_VER) | |||||
#define OPENBLAS_COMPLEX_C99 | |||||
#ifndef __cplusplus | |||||
#include <complex.h> | |||||
#endif | |||||
typedef float _Complex openblas_complex_float; | |||||
typedef double _Complex openblas_complex_double; | |||||
typedef xdouble _Complex openblas_complex_xdouble; | |||||
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I)) | |||||
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I)) | |||||
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I)) | |||||
#define openblas_complex_float_real(z) (creal(z)) | |||||
#define openblas_complex_float_imag(z) (cimag(z)) | |||||
#define openblas_complex_double_real(z) (creal(z)) | |||||
#define openblas_complex_double_imag(z) (cimag(z)) | |||||
#define openblas_complex_xdouble_real(z) (creal(z)) | |||||
#define openblas_complex_xdouble_imag(z) (cimag(z)) | |||||
#else | |||||
#define OPENBLAS_COMPLEX_STRUCT | |||||
typedef struct { float real, imag; } openblas_complex_float; | |||||
typedef struct { double real, imag; } openblas_complex_double; | |||||
typedef struct { xdouble real, imag; } openblas_complex_xdouble; | |||||
#define openblas_make_complex_float(real, imag) {(real), (imag)} | |||||
#define openblas_make_complex_double(real, imag) {(real), (imag)} | |||||
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)} | |||||
#define openblas_complex_float_real(z) ((z).real) | |||||
#define openblas_complex_float_imag(z) ((z).imag) | |||||
#define openblas_complex_double_real(z) ((z).real) | |||||
#define openblas_complex_double_imag(z) ((z).imag) | |||||
#define openblas_complex_xdouble_real(z) ((z).real) | |||||
#define openblas_complex_xdouble_imag(z) ((z).imag) | |||||
#endif | |||||
/* Inclusion of Linux-specific header is needed for definition of cpu_set_t. */ | |||||
#ifdef OPENBLAS_OS_LINUX | |||||
#ifndef _GNU_SOURCE | |||||
#define _GNU_SOURCE | |||||
#endif | |||||
#include <sched.h> | |||||
#endif | |||||
#endif /* OPENBLAS_CONFIG_H */ |
@@ -0,0 +1,4 @@ | |||||
SET(OpenBLAS_VERSION "0.3.29.dev") | |||||
file(REAL_PATH "../../.." _OpenBLAS_ROOT_DIR BASE_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} ) | |||||
SET(OpenBLAS_INCLUDE_DIRS ${_OpenBLAS_ROOT_DIR}/include) | |||||
SET(OpenBLAS_LIBRARIES ${_OpenBLAS_ROOT_DIR}/lib/libopenblas.so) |
@@ -0,0 +1,9 @@ | |||||
set (PACKAGE_VERSION "0.3.29.dev") | |||||
if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION) | |||||
set (PACKAGE_VERSION_COMPATIBLE FALSE) | |||||
else () | |||||
set (PACKAGE_VERSION_COMPATIBLE TRUE) | |||||
if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION) | |||||
set (PACKAGE_VERSION_EXACT TRUE) | |||||
endif () | |||||
endif () |
@@ -0,0 +1,16 @@ | |||||
libdir=/home/da/OpenBLAS/install/lib | |||||
libprefix= | |||||
libnamesuffix= | |||||
libsuffix= | |||||
includedir=/home/da/OpenBLAS/install/include | |||||
omp_opt= | |||||
openblas_config= USE_64BITINT= DYNAMIC_ARCH= DYNAMIC_OLDER= NO_CBLAS= NO_LAPACK= NO_LAPACKE= NO_AFFINITY=1 USE_OPENMP= RISCV64_ZVL128B MAX_THREADS=32 | |||||
version=0.3.29.dev | |||||
extralib=-lm -lpthread -lgfortran -lm -lpthread -lgfortran | |||||
Name: openblas | |||||
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version | |||||
Version: ${version} | |||||
URL: https://github.com/xianyi/OpenBLAS | |||||
Libs: -L${libdir} -l${libprefix}openblas${libsuffix}${libnamesuffix} | |||||
Libs.private: ${extralib} | |||||
Cflags: -I${includedir} ${omp_opt} |
@@ -0,0 +1,45 @@ | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#include <string.h> | |||||
#include <cblas.h> | |||||
#include <riscv_vector.h> | |||||
void print_matrix(float *C, int M, int N) { | |||||
for (int i = 0; i < M; i++) { | |||||
for (int j = 0; j < N; j++) { | |||||
printf("%f ", C[i * N + j]); | |||||
} | |||||
printf("\n"); | |||||
} | |||||
} | |||||
int main() { | |||||
const int M = 2, N = 2, K = 2; | |||||
const float alpha = 1.0f; | |||||
const float beta = 0.0f; | |||||
// A[M x K], row-major | |||||
hfloat16 A[4] = {1.0, 2.0, | |||||
3.0, 4.0}; | |||||
// B[K x N], row-major | |||||
hfloat16 B[4] = {5.0, 6.0, | |||||
7.0, 8.0}; | |||||
// C[M x N], row-major | |||||
float C[4] = {0}; | |||||
// Call OpenBLAS float16 GEMM | |||||
cblas_shgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, | |||||
M, N, K, | |||||
alpha, | |||||
A, K, // lda = K | |||||
B, N, // ldb = N | |||||
beta, | |||||
C, N); // ldc = N | |||||
printf("Result C = A*B:\n"); | |||||
print_matrix(C, M, N); | |||||
return 0; | |||||
} | |||||
@@ -0,0 +1,22 @@ | |||||
#include <riscv_vector.h> | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
int main(){ | |||||
unsigned int gvl = __riscv_vsetvl_e32m2(8); | |||||
float *A = (float *)malloc(4 * 4 * sizeof(float)); | |||||
for (int i =0;i<4*4;i++){ | |||||
A[i]=i%10; | |||||
} | |||||
vfloat32m2_t A0 = __riscv_vle32_v_f32m2(&A[0], gvl); | |||||
float tmp[8]; | |||||
__riscv_vse32_v_f32m2(tmp, A0, gvl); | |||||
printf("A0 vector contents:\n"); | |||||
for (int i = 0; i < gvl; i++) { | |||||
printf("tmp[%d] = %.2f\n", i, tmp[i]); | |||||
} | |||||
free(A); | |||||
return 0; | |||||
} |
@@ -136,6 +136,9 @@ if (BUILD_BFLOAT16) | |||||
GenerateNamedObjects("gemm_batch.c" "" "sbgemm_batch" ${CBLAS_FLAG} "" "" true "BFLOAT16") | GenerateNamedObjects("gemm_batch.c" "" "sbgemm_batch" ${CBLAS_FLAG} "" "" true "BFLOAT16") | ||||
endif () | endif () | ||||
endif () | endif () | ||||
if (BUILD_HFLOAT16) | |||||
GenerateNamedObjects("gemm.c" "" "shgemm" ${CBLAS_FLAG} "" "" true "HFLOAT16") | |||||
endif () | |||||
# complex-specific sources | # complex-specific sources | ||||
foreach (float_type ${FLOAT_TYPES}) | foreach (float_type ${FLOAT_TYPES}) | ||||
@@ -53,6 +53,10 @@ SBBLAS3OBJS = sbgemm.$(SUFFIX) sbgemmt.$(SUFFIX) sbgemmtr.$(SUFFIX) | |||||
SBEXTOBJS = sbstobf16.$(SUFFIX) sbdtobf16.$(SUFFIX) sbf16tos.$(SUFFIX) dbf16tod.$(SUFFIX) | SBEXTOBJS = sbstobf16.$(SUFFIX) sbdtobf16.$(SUFFIX) sbf16tos.$(SUFFIX) dbf16tod.$(SUFFIX) | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16),1) | |||||
SHBLAS3OBJS = shgemm.$(SUFFIX) | |||||
endif | |||||
DBLAS1OBJS = \ | DBLAS1OBJS = \ | ||||
daxpy.$(SUFFIX) dswap.$(SUFFIX) \ | daxpy.$(SUFFIX) dswap.$(SUFFIX) \ | ||||
dcopy.$(SUFFIX) dscal.$(SUFFIX) \ | dcopy.$(SUFFIX) dscal.$(SUFFIX) \ | ||||
@@ -291,6 +295,10 @@ CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX) cblas_sbgemmt.$(SUFFIX) cblas_sbgemmtr.$(S | |||||
CSBEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX) | CSBEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX) | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16),1) | |||||
CSHBLAS3OBJS = cblas_shgemm.$(SUFFIX) | |||||
endif | |||||
CDBLAS1OBJS = \ | CDBLAS1OBJS = \ | ||||
cblas_idamax.$(SUFFIX) cblas_idamin.$(SUFFIX) cblas_dasum.$(SUFFIX) cblas_daxpy.$(SUFFIX) \ | cblas_idamax.$(SUFFIX) cblas_idamin.$(SUFFIX) cblas_dasum.$(SUFFIX) cblas_daxpy.$(SUFFIX) \ | ||||
cblas_dcopy.$(SUFFIX) cblas_ddot.$(SUFFIX) \ | cblas_dcopy.$(SUFFIX) cblas_ddot.$(SUFFIX) \ | ||||
@@ -388,6 +396,7 @@ SBLAS3OBJS += $(CSBLAS3OBJS) | |||||
SBBLAS1OBJS += $(CSBBLAS1OBJS) | SBBLAS1OBJS += $(CSBBLAS1OBJS) | ||||
SBBLAS2OBJS += $(CSBBLAS2OBJS) | SBBLAS2OBJS += $(CSBBLAS2OBJS) | ||||
SBBLAS3OBJS += $(CSBBLAS3OBJS) | SBBLAS3OBJS += $(CSBBLAS3OBJS) | ||||
SHBLAS3OBJS += $(CSHBLAS3OBJS) | |||||
DBLAS1OBJS += $(CDBLAS1OBJS) | DBLAS1OBJS += $(CDBLAS1OBJS) | ||||
DBLAS2OBJS += $(CDBLAS2OBJS) | DBLAS2OBJS += $(CDBLAS2OBJS) | ||||
DBLAS3OBJS += $(CDBLAS3OBJS) | DBLAS3OBJS += $(CDBLAS3OBJS) | ||||
@@ -405,6 +414,7 @@ endif | |||||
SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS) | SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS) | ||||
SBBLASOBJS = $(SBBLAS1OBJS) $(SBBLAS2OBJS) $(SBBLAS3OBJS) | SBBLASOBJS = $(SBBLAS1OBJS) $(SBBLAS2OBJS) $(SBBLAS3OBJS) | ||||
SHBLASOBJS = $(SHBLAS3OBJS) | |||||
DBLASOBJS = $(DBLAS1OBJS) $(DBLAS2OBJS) $(DBLAS3OBJS) | DBLASOBJS = $(DBLAS1OBJS) $(DBLAS2OBJS) $(DBLAS3OBJS) | ||||
QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS) | QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS) | ||||
CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS) | CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS) | ||||
@@ -512,7 +522,7 @@ ifneq ($(BUILD_COMPLEX16),1) | |||||
ZBLASOBJS= | ZBLASOBJS= | ||||
endif | endif | ||||
FUNCOBJS = $(SBEXTOBJS) $(CXERBLAOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) | |||||
FUNCOBJS = $(SBEXTOBJS) $(CXERBLAOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(SHBLASOBJS) | |||||
ifeq ($(EXPRECISION), 1) | ifeq ($(EXPRECISION), 1) | ||||
FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) | FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) | ||||
@@ -550,7 +560,7 @@ level1 : $(SBEXTOBJS) $(SBBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $ | |||||
level2 : $(SBBLAS2OBJS) $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) | level2 : $(SBBLAS2OBJS) $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) | ||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | ||||
level3 : $(SBBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) | |||||
level3 : $(SBBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) $(SHBLAS3OBJS) | |||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | ||||
aux : $(CBAUXOBJS) | aux : $(CBAUXOBJS) | ||||
@@ -1309,6 +1319,11 @@ sbgemmtr.$(SUFFIX) sbgemmtr.$(PSUFFIX) : sbgemmt.c ../param.h | |||||
$(CC) -c $(CFLAGS) -DRNAME $< -o $(@F) | $(CC) -c $(CFLAGS) -DRNAME $< -o $(@F) | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16),1) | |||||
shgemm.$(SUFFIX) shgemm.$(PSUFFIX) : gemm.c ../param.h | |||||
$(CC) -c $(CFLAGS) $< -o $(@F) | |||||
endif | |||||
sgemm.$(SUFFIX) sgemm.$(PSUFFIX) : gemm.c ../param.h | sgemm.$(SUFFIX) sgemm.$(PSUFFIX) : gemm.c ../param.h | ||||
$(CC) -c $(CFLAGS) $< -o $(@F) | $(CC) -c $(CFLAGS) $< -o $(@F) | ||||
@@ -1968,6 +1983,11 @@ cblas_sbgemm.$(SUFFIX) cblas_sbgemm.$(PSUFFIX) : gemm.c ../param.h | |||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) | $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16),1) | |||||
cblas_shgemm.$(SUFFIX) cblas_shgemm.$(PSUFFIX) : gemm.c ../param.h | |||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) | |||||
endif | |||||
cblas_dgemm.$(SUFFIX) cblas_dgemm.$(PSUFFIX) : gemm.c ../param.h | cblas_dgemm.$(SUFFIX) cblas_dgemm.$(PSUFFIX) : gemm.c ../param.h | ||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) | $(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F) | ||||
@@ -351,6 +351,22 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||||
GenerateNamedObjects("${KERNELDIR}/${SBGEMMKERNEL}" "" "gemm_kernel" false "" "" false "BFLOAT16") | GenerateNamedObjects("${KERNELDIR}/${SBGEMMKERNEL}" "" "gemm_kernel" false "" "" false "BFLOAT16") | ||||
GenerateNamedObjects("${KERNELDIR}/${SBGEMM_BETA}" "" "gemm_beta" false "" "" false "BFLOAT16") | GenerateNamedObjects("${KERNELDIR}/${SBGEMM_BETA}" "" "gemm_beta" false "" "" false "BFLOAT16") | ||||
endif () | endif () | ||||
if (BUILD_HFLOAT16) | |||||
if (SHGEMMINCOPY) | |||||
GenerateNamedObjects("${KERNELDIR}/${SHGEMMINCOPY}" "" "${SHGEMMINCOPYOBJ}" false "" "" true "HFLOAT16") | |||||
endif () | |||||
if (SHGEMMITCOPY) | |||||
GenerateNamedObjects("${KERNELDIR}/${SHGEMMITCOPY}" "" "${SHGEMMITCOPYOBJ}" false "" "" true "HFLOAT16") | |||||
endif () | |||||
if (SHGEMMONCOPY) | |||||
GenerateNamedObjects("${KERNELDIR}/${SHGEMMONCOPY}" "" "${SHGEMMONCOPYOBJ}" false "" "" true "HFLOAT16") | |||||
endif () | |||||
if (SHGEMMOTCOPY) | |||||
GenerateNamedObjects("${KERNELDIR}/${SHGEMMOTCOPY}" "" "${SHGEMMOTCOPYOBJ}" false "" "" true "HFLOAT16") | |||||
endif () | |||||
GenerateNamedObjects("${KERNELDIR}/${SHGEMMKERNEL}" "" "gemm_kernel" false "" "" false "HFLOAT16") | |||||
GenerateNamedObjects("${KERNELDIR}/${SHGEMM_BETA}" "" "gemm_beta" false "" "" false "HFLOAT16") | |||||
endif () | |||||
foreach (float_type ${FLOAT_TYPES}) | foreach (float_type ${FLOAT_TYPES}) | ||||
string(SUBSTRING ${float_type} 0 1 float_char) | string(SUBSTRING ${float_type} 0 1 float_char) | ||||
if (${float_char}GEMMINCOPY) | if (${float_char}GEMMINCOPY) | ||||
@@ -769,6 +785,45 @@ endif () | |||||
GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_B0_TN}" "B0" "gemm_small_kernel_b0_tn" false "" "" false "BFLOAT16") | GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_B0_TN}" "B0" "gemm_small_kernel_b0_tn" false "" "" false "BFLOAT16") | ||||
GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_B0_TT}" "B0" "gemm_small_kernel_b0_tt" false "" "" false "BFLOAT16") | GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_B0_TT}" "B0" "gemm_small_kernel_b0_tt" false "" "" false "BFLOAT16") | ||||
endif () | endif () | ||||
if (BUILD_HFLOAT16) | |||||
if (NOT DEFINED SHGEMM_SMALL_M_PERMIT) | |||||
set(SHGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c) | |||||
endif () | |||||
if (NOT DEFINED SHGEMM_SMALL_K_NN) | |||||
set(SHGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c) | |||||
endif () | |||||
if (NOT DEFINED SHGEMM_SMALL_K_NT) | |||||
set(SHGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c) | |||||
endif () | |||||
if (NOT DEFINED SHGEMM_SMALL_K_TN) | |||||
set(SHGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c) | |||||
endif () | |||||
if (NOT DEFINED SHGEMM_SMALL_K_TT) | |||||
set(SHGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c) | |||||
endif () | |||||
if (NOT DEFINED SHGEMM_SMALL_K_B0_NN) | |||||
set(SHGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c) | |||||
endif () | |||||
if (NOT DEFINED SHGEMM_SMALL_K_B0_NT) | |||||
set(SHGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c) | |||||
endif () | |||||
if (NOT DEFINED SHGEMM_SMALL_K_B0_TN) | |||||
set(SHGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) | |||||
endif () | |||||
if (NOT DEFINED SHGEMM_SMALL_K_B0_TT) | |||||
set(SHGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) | |||||
endif () | |||||
GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "HFLOAT16") | |||||
GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "HFLOAT16") | |||||
GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_NT}" "" "gemm_small_kernel_nt" false "" "" false "HFLOAT16") | |||||
GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_TN}" "" "gemm_small_kernel_tn" false "" "" false "HFLOAT16") | |||||
GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_TT}" "" "gemm_small_kernel_tt" false "" "" false "HFLOAT16") | |||||
GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_B0_NN}" "B0" "gemm_small_kernel_b0_nn" false "" "" false "HFLOAT16") | |||||
GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_B0_NT}" "B0" "gemm_small_kernel_b0_nt" false "" "" false "HFLOAT16") | |||||
GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_B0_TN}" "B0" "gemm_small_kernel_b0_tn" false "" "" false "HFLOAT16") | |||||
GenerateNamedObjects("${KERNELDIR}/${SHGEMM_SMALL_K_B0_TT}" "B0" "gemm_small_kernel_b0_tt" false "" "" false "HFLOAT16") | |||||
endif () | |||||
endif () | endif () | ||||
if (NOT DEFINED ${float_char}OMATCOPY_CN) | if (NOT DEFINED ${float_char}OMATCOPY_CN) | ||||
@@ -129,6 +129,26 @@ SBKERNELOBJS += \ | |||||
$(SBGEMMONCOPYOBJ) $(SBGEMMOTCOPYOBJ) | $(SBGEMMONCOPYOBJ) $(SBGEMMOTCOPYOBJ) | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16), 1) | |||||
ifndef SHGEMMKERNEL | |||||
SHGEMM_BETA = ../generic/gemm_beta.c | |||||
SHGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||||
SHGEMMINCOPY = ../generic/gemm_ncopy_2.c | |||||
SHGEMMITCOPY = ../generic/gemm_tcopy_2.c | |||||
SHGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||||
SHGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||||
SHGEMMINCOPYOBJ = shgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
SHGEMMITCOPYOBJ = shgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
endif | |||||
SHKERNELOBJS += \ | |||||
shgemm_kernel$(TSUFFIX).$(SUFFIX) \ | |||||
$(SHGEMMINCOPYOBJ) $(SHGEMMITCOPYOBJ) \ | |||||
$(SHGEMMONCOPYOBJ) $(SHGEMMOTCOPYOBJ) | |||||
endif | |||||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" | ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" | ||||
SKERNELOBJS += \ | SKERNELOBJS += \ | ||||
sgemm_kernel$(TSUFFIX).$(SUFFIX) \ | sgemm_kernel$(TSUFFIX).$(SUFFIX) \ | ||||
@@ -192,6 +212,9 @@ XKERNELOBJS += \ | |||||
ifeq ($(BUILD_BFLOAT16),1) | ifeq ($(BUILD_BFLOAT16),1) | ||||
SBBLASOBJS += $(SBKERNELOBJS) | SBBLASOBJS += $(SBKERNELOBJS) | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16),1) | |||||
SHBLASOBJS += $(SHKERNELOBJS) | |||||
endif | |||||
SBLASOBJS += $(SKERNELOBJS) | SBLASOBJS += $(SKERNELOBJS) | ||||
DBLASOBJS += $(DKERNELOBJS) | DBLASOBJS += $(DKERNELOBJS) | ||||
QBLASOBJS += $(QKERNELOBJS) | QBLASOBJS += $(QKERNELOBJS) | ||||
@@ -202,6 +225,9 @@ XBLASOBJS += $(XKERNELOBJS) | |||||
ifeq ($(BUILD_BFLOAT16),1) | ifeq ($(BUILD_BFLOAT16),1) | ||||
SBBLASOBJS += sbgemm_beta$(TSUFFIX).$(SUFFIX) | SBBLASOBJS += sbgemm_beta$(TSUFFIX).$(SUFFIX) | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16),1) | |||||
SHBLASOBJS += shgemm_beta$(TSUFFIX).$(SUFFIX) | |||||
endif | |||||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" | ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" | ||||
SBLASOBJS += \ | SBLASOBJS += \ | ||||
@@ -493,6 +519,15 @@ SBBLASOBJS += \ | |||||
sbgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) sbgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) | sbgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) sbgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16),1) | |||||
SHBLASOBJS += \ | |||||
shgemm_small_matrix_permit$(TSUFFIX).$(SUFFIX) \ | |||||
shgemm_small_kernel_nn$(TSUFFIX).$(SUFFIX) shgemm_small_kernel_nt$(TSUFFIX).$(SUFFIX) \ | |||||
shgemm_small_kernel_tn$(TSUFFIX).$(SUFFIX) shgemm_small_kernel_tt$(TSUFFIX).$(SUFFIX) \ | |||||
shgemm_small_kernel_b0_nn$(TSUFFIX).$(SUFFIX) shgemm_small_kernel_b0_nt$(TSUFFIX).$(SUFFIX) \ | |||||
shgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) shgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) | |||||
endif | |||||
SBLASOBJS += \ | SBLASOBJS += \ | ||||
sgemm_small_matrix_permit$(TSUFFIX).$(SUFFIX) \ | sgemm_small_matrix_permit$(TSUFFIX).$(SUFFIX) \ | ||||
sgemm_small_kernel_nn$(TSUFFIX).$(SUFFIX) sgemm_small_kernel_nt$(TSUFFIX).$(SUFFIX) \ | sgemm_small_kernel_nn$(TSUFFIX).$(SUFFIX) sgemm_small_kernel_nt$(TSUFFIX).$(SUFFIX) \ | ||||
@@ -599,6 +634,13 @@ SBGEMMONCOPYOBJ_P = $(SBGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||||
SBGEMMOTCOPYOBJ_P = $(SBGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | SBGEMMOTCOPYOBJ_P = $(SBGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16), 1) | |||||
SHGEMMINCOPYOBJ_P = $(SHGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||||
SHGEMMITCOPYOBJ_P = $(SHGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||||
SHGEMMONCOPYOBJ_P = $(SHGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||||
SHGEMMOTCOPYOBJ_P = $(SHGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | |||||
endif | |||||
SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | ||||
SGEMMITCOPYOBJ_P = $(SGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | SGEMMITCOPYOBJ_P = $(SGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | ||||
SGEMMONCOPYOBJ_P = $(SGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | SGEMMONCOPYOBJ_P = $(SGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) | ||||
@@ -629,6 +671,11 @@ $(KDIR)sbgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_BETA) | |||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16),1) | |||||
$(KDIR)shgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA) | |||||
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
endif | |||||
$(KDIR)sgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_BETA) | $(KDIR)sgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_BETA) | ||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | ||||
@@ -671,6 +718,25 @@ $(KDIR)$(SBGEMMITCOPYOBJ) : $(KERNELDIR)/$(SBGEMMITCOPY) | |||||
endif | endif | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16), 1) | |||||
$(KDIR)$(SHGEMMONCOPYOBJ) : $(KERNELDIR)/$(SHGEMMONCOPY) | |||||
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
$(KDIR)$(SHGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SHGEMMOTCOPY) | |||||
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N)) | |||||
$(KDIR)$(SHGEMMINCOPYOBJ) : $(KERNELDIR)/$(SHGEMMINCOPY) | |||||
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
$(KDIR)$(SHGEMMITCOPYOBJ) : $(KERNELDIR)/$(SHGEMMITCOPY) | |||||
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
endif | |||||
endif | |||||
$(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY) | $(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY) | ||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | ||||
@@ -853,6 +919,12 @@ $(KDIR)sbgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMM | |||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16), 1) | |||||
$(KDIR)shgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND) | |||||
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
endif | |||||
$(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND) | $(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND) | ||||
ifeq ($(OS), AIX) | ifeq ($(OS), AIX) | ||||
$(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_kernel$(TSUFFIX).s | $(CC) $(CFLAGS) -S -DDOUBLE -UCOMPLEX $< -o - > dgemm_kernel$(TSUFFIX).s | ||||
@@ -2840,6 +2912,11 @@ $(KDIR)sbgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMM_BETA) | |||||
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16),1) | |||||
$(KDIR)shgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA) | |||||
$(CC) $(PFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
endif | |||||
$(KDIR)dgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMM_BETA) | $(KDIR)dgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMM_BETA) | ||||
$(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ | ||||
@@ -2873,6 +2950,23 @@ $(SBGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMITCOPY) | |||||
endif | endif | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16), 1) | |||||
$(SHGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMONCOPY) | |||||
$(CC) $(PFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
$(SHGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMOTCOPY) | |||||
$(CC) $(PFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N)) | |||||
$(SHGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMINCOPY) | |||||
$(CC) $(PFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
$(SHGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMITCOPY) | |||||
$(CC) $(PFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
endif | |||||
endif | |||||
$(SGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SGEMMONCOPY) | $(SGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SGEMMONCOPY) | ||||
$(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | ||||
@@ -2983,6 +3077,11 @@ $(KDIR)sbgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEM | |||||
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16), 1) | |||||
$(KDIR)shgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND) | |||||
$(CC) $(PFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
endif | |||||
$(KDIR)sgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND) | $(KDIR)sgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND) | ||||
$(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | $(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ | ||||
@@ -4843,6 +4942,71 @@ $(KDIR)sbgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_SMA | |||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DB0 $< -o $@ | $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DB0 $< -o $@ | ||||
endif | endif | ||||
ifeq ($(BUILD_HFLOAT16), 1) | |||||
ifndef SHGEMM_SMALL_M_PERMIT | |||||
SHGEMM_SMALL_M_PERMIT = ../generic/gemm_small_matrix_permit.c | |||||
endif | |||||
ifndef SHGEMM_SMALL_K_NN | |||||
SHGEMM_SMALL_K_NN = ../generic/gemm_small_matrix_kernel_nn.c | |||||
endif | |||||
ifndef SHGEMM_SMALL_K_NT | |||||
SHGEMM_SMALL_K_NT = ../generic/gemm_small_matrix_kernel_nt.c | |||||
endif | |||||
ifndef SHGEMM_SMALL_K_TN | |||||
SHGEMM_SMALL_K_TN = ../generic/gemm_small_matrix_kernel_tn.c | |||||
endif | |||||
ifndef SHGEMM_SMALL_K_TT | |||||
SHGEMM_SMALL_K_TT = ../generic/gemm_small_matrix_kernel_tt.c | |||||
endif | |||||
$(KDIR)shgemm_small_matrix_permit$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_SMALL_M_PERMIT) | |||||
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
$(KDIR)shgemm_small_kernel_nn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_SMALL_K_NN) | |||||
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
$(KDIR)shgemm_small_kernel_nt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_SMALL_K_NT) | |||||
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
$(KDIR)shgemm_small_kernel_tn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_SMALL_K_TN) | |||||
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
$(KDIR)shgemm_small_kernel_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_SMALL_K_TT) | |||||
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@ | |||||
ifndef SHGEMM_SMALL_K_B0_NN | |||||
SHGEMM_SMALL_K_B0_NN = ../generic/gemm_small_matrix_kernel_nn.c | |||||
endif | |||||
ifndef SHGEMM_SMALL_K_B0_NT | |||||
SHGEMM_SMALL_K_B0_NT = ../generic/gemm_small_matrix_kernel_nt.c | |||||
endif | |||||
ifndef SHGEMM_SMALL_K_B0_TN | |||||
SHGEMM_SMALL_K_B0_TN = ../generic/gemm_small_matrix_kernel_tn.c | |||||
endif | |||||
ifndef SHGEMM_SMALL_K_B0_TT | |||||
SHGEMM_SMALL_K_B0_TT = ../generic/gemm_small_matrix_kernel_tt.c | |||||
endif | |||||
$(KDIR)shgemm_small_kernel_b0_nn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_SMALL_K_B0_NN) | |||||
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX -DB0 $< -o $@ | |||||
$(KDIR)shgemm_small_kernel_b0_nt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_SMALL_K_B0_NT) | |||||
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX -DB0 $< -o $@ | |||||
$(KDIR)shgemm_small_kernel_b0_tn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_SMALL_K_B0_TN) | |||||
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX -DB0 $< -o $@ | |||||
$(KDIR)shgemm_small_kernel_b0_tt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_SMALL_K_B0_TT) | |||||
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX -DB0 $< -o $@ | |||||
endif | |||||
ifndef CGEMM_SMALL_M_PERMIT | ifndef CGEMM_SMALL_M_PERMIT | ||||
CGEMM_SMALL_M_PERMIT = ../generic/zgemm_small_matrix_permit.c | CGEMM_SMALL_M_PERMIT = ../generic/zgemm_small_matrix_permit.c | ||||
endif | endif | ||||
@@ -245,3 +245,12 @@ endif | |||||
ifndef ZGEMM_BETA | ifndef ZGEMM_BETA | ||||
ZGEMM_BETA = zgemm_beta_rvv.c | ZGEMM_BETA = zgemm_beta_rvv.c | ||||
endif | endif | ||||
SHGEMMKERNEL = shgemm_kernel_$(SHGEMM_UNROLL_M)x$(SHGEMM_UNROLL_N)_zvl128b.c | |||||
SHGEMMONCOPY = ../generic/gemm_ncopy_$(SHGEMM_UNROLL_N).c | |||||
SHGEMMOTCOPY = ../generic/gemm_tcopy_$(SHGEMM_UNROLL_N).c | |||||
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
ifndef SHGEMM_BETA | |||||
SHGEMM_BETA = gemm_beta_rvv.c | |||||
endif |
@@ -207,3 +207,19 @@ COMATCOPY_CN = zomatcopy_cn_vector.c | |||||
DOMATCOPY_CN = omatcopy_cn_vector.c | DOMATCOPY_CN = omatcopy_cn_vector.c | ||||
SOMATCOPY_CN = omatcopy_cn_vector.c | SOMATCOPY_CN = omatcopy_cn_vector.c | ||||
SHGEMMKERNEL = shgemm_kernel_$(SHGEMM_UNROLL_M)x$(SHGEMM_UNROLL_N)_zvl256b.c | |||||
ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N)) | |||||
SHGEMMINCOPY = ../generic/gemm_ncopy_$(SHGEMM_UNROLL_M).c | |||||
SHGEMMITCOPY = ../generic/gemm_tcopy_$(SHGEMM_UNROLL_M).c | |||||
SHGEMMINCOPYOBJ = shgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
SHGEMMITCOPYOBJ = shgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
endif | |||||
SHGEMMONCOPY = ../generic/gemm_ncopy_$(SHGEMM_UNROLL_N).c | |||||
SHGEMMOTCOPY = ../generic/gemm_tcopy_$(SHGEMM_UNROLL_N).c | |||||
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
ifndef SHGEMM_BETA | |||||
SHGEMM_BETA = gemm_beta_rvv.c | |||||
endif |
@@ -1,5 +1,6 @@ | |||||
#include "common.h" | #include "common.h" | ||||
#include <riscv_vector.h> | |||||
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, IFLOAT *A, IFLOAT *B, FLOAT *C, BLASLONG ldc) | int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, IFLOAT *A, IFLOAT *B, FLOAT *C, BLASLONG ldc) | ||||
{ | { | ||||
@@ -14,7 +15,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, IFLOAT *A, IFLOAT *B, | |||||
for (BLASLONG i=0; i<M/8; i+=1) { | for (BLASLONG i=0; i<M/8; i+=1) { | ||||
BLASLONG ai=m_top*K; | BLASLONG ai=m_top*K; | ||||
BLASLONG bi=n_top*K; | |||||
BLASLONG bi=n_top*K; | |||||
_Float16 B0 = B[bi+0]; | _Float16 B0 = B[bi+0]; | ||||
_Float16 B1 = B[bi+1]; | _Float16 B1 = B[bi+1]; | ||||
@@ -50,17 +51,17 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, IFLOAT *A, IFLOAT *B, | |||||
bi += 8; | bi += 8; | ||||
A0 = __riscv_vle16_v_f16m1( &A[ai+0*gvl], gvl ); | A0 = __riscv_vle16_v_f16m1( &A[ai+0*gvl], gvl ); | ||||
ai += 16; | |||||
ai += 8; | |||||
result0 = __riscv_vfwmacc_vf_f32m2(result0, A0, B0, gvl); | |||||
result1 = __riscv_vfwmacc_vf_f32m2(result1, A0, B1, gvl); | |||||
result2 = __riscv_vfwmacc_vf_f32m2(result2, A0, B2, gvl); | |||||
result3 = __riscv_vfwmacc_vf_f32m2(result3, A0, B3, gvl); | |||||
result4 = __riscv_vfwmacc_vf_f32m2(result4, A0, B4, gvl); | |||||
result5 = __riscv_vfwmacc_vf_f32m2(result5, A0, B5, gvl); | |||||
result6 = __riscv_vfwmacc_vf_f32m2(result6, A0, B6, gvl); | |||||
result7 = __riscv_vfwmacc_vf_f32m2(result7, A0, B7, gvl); | |||||
result0 = __riscv_vfwmacc_vf_f32m2(result0, B0, A0, gvl); | |||||
result1 = __riscv_vfwmacc_vf_f32m2(result1, B1, A0, gvl); | |||||
result2 = __riscv_vfwmacc_vf_f32m2(result2, B2, A0, gvl); | |||||
result3 = __riscv_vfwmacc_vf_f32m2(result3, B3, A0, gvl); | |||||
result4 = __riscv_vfwmacc_vf_f32m2(result4, B4, A0, gvl); | |||||
result5 = __riscv_vfwmacc_vf_f32m2(result5, B5, A0, gvl); | |||||
result6 = __riscv_vfwmacc_vf_f32m2(result6, B6, A0, gvl); | |||||
result7 = __riscv_vfwmacc_vf_f32m2(result7, B7, A0, gvl); | |||||
} | } | ||||
@@ -86,14 +87,14 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, IFLOAT *A, IFLOAT *B, | |||||
ci = n_top * ldc + m_top; | ci = n_top * ldc + m_top; | ||||
__riscv_vse16_v_f16m1( &C[ci], c0, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse16_v_f16m1( &C[ci], c1, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse16_v_f16m1( &C[ci], c2, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse16_v_f16m1( &C[ci], c3, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse16_v_f16m1( &C[ci], c4, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse16_v_f16m1( &C[ci], c5, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse16_v_f16m1( &C[ci], c6, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse16_v_f16m1( &C[ci], c7, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse32_v_f32m2( &C[ci], c0, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse32_v_f32m2( &C[ci], c1, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse32_v_f32m2( &C[ci], c2, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse32_v_f32m2( &C[ci], c3, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse32_v_f32m2( &C[ci], c4, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse32_v_f32m2( &C[ci], c5, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse32_v_f32m2( &C[ci], c6, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse32_v_f32m2( &C[ci], c7, gvl); ci += ldc-gvl*0; | |||||
m_top += 8; | m_top += 8; | ||||
} | } | ||||
@@ -332,10 +333,10 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, IFLOAT *A, IFLOAT *B, | |||||
A0 = __riscv_vle16_v_f16m1( &A[ai+0*gvl], gvl ); | A0 = __riscv_vle16_v_f16m1( &A[ai+0*gvl], gvl ); | ||||
ai += 8; | ai += 8; | ||||
result0 = __riscv_vfwmacc_vf_f32m2(result0, A0, B0, gvl); | |||||
result1 = __riscv_vfwmacc_vf_f32m2(result1, A0, B1, gvl); | |||||
result2 = __riscv_vfwmacc_vf_f32m2(result2, A0, B2, gvl); | |||||
result3 = __riscv_vfwmacc_vf_f32m2(result3, A0, B3, gvl); | |||||
result0 = __riscv_vfwmacc_vf_f32m2(result0, B0, A0, gvl); | |||||
result1 = __riscv_vfwmacc_vf_f32m2(result1, B1, A0, gvl); | |||||
result2 = __riscv_vfwmacc_vf_f32m2(result2, B2, A0, gvl); | |||||
result3 = __riscv_vfwmacc_vf_f32m2(result3, B3, A0, gvl); | |||||
} | } | ||||
@@ -353,10 +354,10 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, IFLOAT *A, IFLOAT *B, | |||||
ci = n_top * ldc + m_top; | ci = n_top * ldc + m_top; | ||||
__riscv_vse16_v_f16m1( &C[ci], c0, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse16_v_f16m1( &C[ci], c1, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse16_v_f16m1( &C[ci], c2, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse16_v_f16m1( &C[ci], c3, gvl); | |||||
__riscv_vse32_v_f32m2( &C[ci], c0, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse32_v_f32m2( &C[ci], c1, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse32_v_f32m2( &C[ci], c2, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse32_v_f32m2( &C[ci], c3, gvl); | |||||
m_top += 8; | m_top += 8; | ||||
} | } | ||||
@@ -521,8 +522,8 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, IFLOAT *A, IFLOAT *B, | |||||
A0 = __riscv_vle16_v_f16m1( &A[ai+0*gvl], gvl ); | A0 = __riscv_vle16_v_f16m1( &A[ai+0*gvl], gvl ); | ||||
ai += 8; | ai += 8; | ||||
result0 = __riscv_vfwmacc_vf_f32m2(result0, A0, B0, gvl); | |||||
result1 = __riscv_vfwmacc_vf_f32m2(result1, A0, B1, gvl); | |||||
result0 = __riscv_vfwmacc_vf_f32m2(result0, B0, A0, gvl); | |||||
result1 = __riscv_vfwmacc_vf_f32m2(result1, B1, A0, gvl); | |||||
} | } | ||||
@@ -536,8 +537,8 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, IFLOAT *A, IFLOAT *B, | |||||
ci = n_top * ldc + m_top; | ci = n_top * ldc + m_top; | ||||
__riscv_vse16_v_f16m1( &C[ci], c0, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse16_v_f16m1( &C[ci], c1, gvl); | |||||
__riscv_vse32_v_f32m2( &C[ci], c0, gvl); ci += ldc-gvl*0; | |||||
__riscv_vse32_v_f32m2( &C[ci], c1, gvl); | |||||
m_top += 8; | m_top += 8; | ||||
} | } | ||||
@@ -604,7 +605,6 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, IFLOAT *A, IFLOAT *B, | |||||
bi+=2; | bi+=2; | ||||
} | } | ||||
BLASLONG ci=n_top*ldc+m_top; | BLASLONG ci=n_top*ldc+m_top; | ||||
C[ci + 0 * ldc + 0] += alpha * result0; | C[ci + 0 * ldc + 0] += alpha * result0; | ||||
C[ci + 0 * ldc + 1] += alpha * result1; | C[ci + 0 * ldc + 1] += alpha * result1; | ||||
@@ -665,7 +665,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, IFLOAT *A, IFLOAT *B, | |||||
A0 = __riscv_vle16_v_f16m1( &A[ai+0*gvl], gvl ); | A0 = __riscv_vle16_v_f16m1( &A[ai+0*gvl], gvl ); | ||||
ai += 8; | ai += 8; | ||||
result0 = __riscv_vfwmacc_vf_f32m2(result0, A0, B0, gvl); | |||||
result0 = __riscv_vfwmacc_vf_f32m2(result0, B0, A0, gvl); | |||||
} | } | ||||
@@ -677,7 +677,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, IFLOAT *A, IFLOAT *B, | |||||
ci = n_top * ldc + m_top; | ci = n_top * ldc + m_top; | ||||
__riscv_vse16_v_f16m1( &C[ci], c0, gvl); | |||||
__riscv_vse32_v_f32m2( &C[ci], c0, gvl); | |||||
m_top += 8; | m_top += 8; | ||||
} | } | ||||
@@ -125,6 +125,23 @@ gotoblas_t TABLE_NAME = { | |||||
#endif | #endif | ||||
#endif | #endif | ||||
#ifdef BUILD_HFLOAT16 | |||||
0, 0, 0, | |||||
SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N, | |||||
#ifdef SHGEMM_DEFAULT_UNROLL_MN | |||||
SHGEMM_DEFAULT_UNROLL_MN, | |||||
#else | |||||
MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N), | |||||
#endif | |||||
shgemm_kernelTS, shgemm_betaTS, | |||||
#if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N | |||||
shgemm_incopyTS, shgemm_itcopyTS, | |||||
#else | |||||
shgemm_oncopyTS, shgemm_otcopyTS, | |||||
#endif | |||||
shgemm_oncopyTS, shgemm_otcopyTS, | |||||
#endif | |||||
#if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1) | #if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1) | ||||
0, 0, 0, | 0, 0, 0, | ||||
SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, | SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, | ||||
@@ -1252,6 +1269,9 @@ static void init_parameter(void) { | |||||
#ifdef BUILD_BFLOAT16 | #ifdef BUILD_BFLOAT16 | ||||
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P; | TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P; | ||||
#endif | |||||
#ifdef BUILD_HFLOAT16 | |||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | |||||
#endif | #endif | ||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; | ||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; | ||||
@@ -1260,6 +1280,9 @@ static void init_parameter(void) { | |||||
#ifdef BUILD_BFLOAT16 | #ifdef BUILD_BFLOAT16 | ||||
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R; | TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R; | ||||
#endif | |||||
#ifdef BUILD_HFLOAT16 | |||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; | |||||
#endif | #endif | ||||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; | ||||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; | ||||
@@ -1269,6 +1292,9 @@ static void init_parameter(void) { | |||||
#ifdef BUILD_BFLOAT16 | #ifdef BUILD_BFLOAT16 | ||||
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q; | TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q; | ||||
#endif | |||||
#ifdef BUILD_HFLOAT16 | |||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | |||||
#endif | #endif | ||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | ||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; | ||||
@@ -1417,6 +1443,10 @@ static void init_parameter(void) { | |||||
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P; | TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P; | ||||
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q; | TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q; | ||||
#endif | #endif | ||||
#ifdef BUILD_HFLOAT16 | |||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; | |||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; | |||||
#endif | |||||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) | ||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; | ||||
#endif | #endif | ||||
@@ -2012,6 +2042,13 @@ static void init_parameter(void) { | |||||
) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15); | ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15); | ||||
#endif | #endif | ||||
#if BUILD_HFLOAT16==1 | |||||
TABLE_NAME.shgemm_r = (((BUFFER_SIZE - | |||||
((TABLE_NAME.shgemm_p * TABLE_NAME.shgemm_q * 4 + TABLE_NAME.offsetA | |||||
+ TABLE_NAME.align) & ~TABLE_NAME.align) | |||||
) / (TABLE_NAME.shgemm_q * 4) - 15) & ~15); | |||||
#endif | |||||
#if BUILD_SINGLE==1 | #if BUILD_SINGLE==1 | ||||
TABLE_NAME.sgemm_r = (((BUFFER_SIZE - | TABLE_NAME.sgemm_r = (((BUFFER_SIZE - | ||||
((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA | ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA | ||||
@@ -3,6 +3,7 @@ include_directories(${PROJECT_SOURCE_DIR}) | |||||
include_directories(${PROJECT_BINARY_DIR}) | include_directories(${PROJECT_BINARY_DIR}) | ||||
list (REMOVE_ITEM FLOAT_TYPES "BFLOAT16") | list (REMOVE_ITEM FLOAT_TYPES "BFLOAT16") | ||||
list (REMOVE_ITEM FLOAT_TYPES "HFLOAT16") | |||||
set(LAPACK_SOURCES | set(LAPACK_SOURCES | ||||
potrf/potrf_U_single.c | potrf/potrf_U_single.c | ||||
@@ -39,7 +39,9 @@ typedef unsigned long BLASULONG; | |||||
typedef uint16_t bfloat16; | typedef uint16_t bfloat16; | ||||
#endif | #endif | ||||
#ifndef HFLOAT16 | |||||
#if defined(__GNUC__) && (__GNUC__ >= 12) | |||||
typedef _Float16 hfloat16; | |||||
#else | |||||
#include <stdint.h> | #include <stdint.h> | ||||
typedef uint16_t hfloat16; | typedef uint16_t hfloat16; | ||||
#endif | #endif | ||||
@@ -74,6 +74,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define SHGEMM_DEFAULT_UNROLL_N 8 | #define SHGEMM_DEFAULT_UNROLL_N 8 | ||||
#define SHGEMM_DEFAULT_UNROLL_M 8 | #define SHGEMM_DEFAULT_UNROLL_M 8 | ||||
#define SHGEMM_DEFAULT_UNROLL_MN 32 | |||||
#define SHGEMM_DEFAULT_P 128 | #define SHGEMM_DEFAULT_P 128 | ||||
#define SHGEMM_DEFAULT_R 240 | #define SHGEMM_DEFAULT_R 240 | ||||
#define SHGEMM_DEFAULT_Q 12288 | #define SHGEMM_DEFAULT_Q 12288 | ||||