Browse Source

Merge pull request #13 from xianyi/develop

rebase
tags/v0.3.14^2
Martin Kroeker GitHub 4 years ago
parent
commit
86a5f98e4a
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 67 additions and 25 deletions
  1. +0
    -1
      cmake/lapacke.cmake
  2. +19
    -0
      cpuid_x86.c
  3. +19
    -18
      driver/others/blas_server.c
  4. +15
    -0
      driver/others/dynamic.c
  5. +2
    -2
      driver/others/memory.c
  6. +1
    -0
      f_check
  7. +2
    -0
      kernel/x86_64/KERNEL.ZEN
  8. +1
    -1
      kernel/x86_64/dasum.c
  9. +1
    -1
      kernel/x86_64/drot.c
  10. +1
    -1
      kernel/x86_64/sasum.c
  11. +1
    -1
      kernel/x86_64/srot.c
  12. +5
    -0
      param.h

+ 0
- 1
cmake/lapacke.cmake View File

@@ -2499,6 +2499,5 @@ foreach (Utils_FILE ${Utils_SRC})
endforeach ()

set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/LAPACKE/include")
configure_file("${lapacke_include_dir}/lapacke_mangling_with_flags.h.in" "${lapacke_include_dir}/lapacke_mangling.h" COPYONLY)
include_directories(${lapacke_include_dir})
set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}")

+ 19
- 0
cpuid_x86.c View File

@@ -1418,6 +1418,15 @@ int get_cpuname(void){
case 9:
case 8:
switch (model) {
case 12: // Tiger Lake
if(support_avx512())
return CPUTYPE_SKYLAKEX;
if(support_avx2())
return CPUTYPE_HASWELL;
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
else
return CPUTYPE_NEHALEM;
case 14: // Kaby Lake and refreshes
if(support_avx2())
return CPUTYPE_HASWELL;
@@ -2124,6 +2133,16 @@ int get_coretype(void){
break;
case 9:
case 8:
if (model == 12) { // Tiger Lake
if(support_avx512())
return CPUTYPE_SKYLAKEX;
if(support_avx2())
return CPUTYPE_HASWELL;
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
else
return CPUTYPE_NEHALEM;
}
if (model == 14) { // Kaby Lake
if(support_avx())
#ifndef NO_AVX2


+ 19
- 18
driver/others/blas_server.c View File

@@ -1024,38 +1024,39 @@ int BLASFUNC(blas_thread_shutdown)(void){

int i;

if (!blas_server_avail) return 0;

LOCK_COMMAND(&server_lock);

for (i = 0; i < blas_num_threads - 1; i++) {
if (blas_server_avail) {

for (i = 0; i < blas_num_threads - 1; i++) {

pthread_mutex_lock (&thread_status[i].lock);

atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)-1);
thread_status[i].status = THREAD_STATUS_WAKEUP;
pthread_cond_signal (&thread_status[i].wakeup);
pthread_mutex_lock (&thread_status[i].lock);

pthread_mutex_unlock(&thread_status[i].lock);
atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)-1);
thread_status[i].status = THREAD_STATUS_WAKEUP;
pthread_cond_signal (&thread_status[i].wakeup);

}
pthread_mutex_unlock(&thread_status[i].lock);

for(i = 0; i < blas_num_threads - 1; i++){
pthread_join(blas_threads[i], NULL);
}
}

for(i = 0; i < blas_num_threads - 1; i++){
pthread_mutex_destroy(&thread_status[i].lock);
pthread_cond_destroy (&thread_status[i].wakeup);
}
for(i = 0; i < blas_num_threads - 1; i++){
pthread_join(blas_threads[i], NULL);
}

for(i = 0; i < blas_num_threads - 1; i++){
pthread_mutex_destroy(&thread_status[i].lock);
pthread_cond_destroy (&thread_status[i].wakeup);
}

#ifdef NEED_STACKATTR
pthread_attr_destory(&attr);
pthread_attr_destroy(&attr);
#endif

blas_server_avail = 0;
blas_server_avail = 0;

}
UNLOCK_COMMAND(&server_lock);

return 0;


+ 15
- 0
driver/others/dynamic.c View File

@@ -644,6 +644,21 @@ static gotoblas_t *get_coretype(void){
return NULL;
case 9:
case 8:
if (model == 12) { // Tiger Lake
if (support_avx512())
return &gotoblas_SKYLAKEX;
if(support_avx2()){
openblas_warning(FALLBACK_VERBOSE, HASWELL_FALLBACK);
return &gotoblas_HASWELL;
}
if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM;
}
}
if (model == 14 ) { // Kaby Lake, Coffee Lake
if(support_avx2())
return &gotoblas_HASWELL;


+ 2
- 2
driver/others/memory.c View File

@@ -222,11 +222,11 @@ int get_num_procs(void);
#else
int get_num_procs(void) {
static int nums = 0;

#if defined(__GLIBC_PREREQ)
cpu_set_t cpuset,*cpusetp;
size_t size;
int ret;

#if defined(__GLIBC_PREREQ)
#if !__GLIBC_PREREQ(2, 7)
int i;
#if !__GLIBC_PREREQ(2, 6)


+ 1
- 0
f_check View File

@@ -75,6 +75,7 @@ if ($compiler eq "") {

} elsif ($data =~ /GNU/ || $data =~ /GCC/ ) {

$data =~ s/\(+.*?\)+//g;
$data =~ /(\d+)\.(\d+).(\d+)/;
$major = $1;
$minor = $2;


+ 2
- 0
kernel/x86_64/KERNEL.ZEN View File

@@ -97,3 +97,5 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_haswell.c

SROTKERNEL = srot.c
DROTKERNEL = drot.c

+ 1
- 1
kernel/x86_64/dasum.c View File

@@ -6,7 +6,7 @@

#if defined(SKYLAKEX)
#include "dasum_microk_skylakex-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "dasum_microk_haswell-2.c"
#endif



+ 1
- 1
kernel/x86_64/drot.c View File

@@ -2,7 +2,7 @@

#if defined(SKYLAKEX)
#include "drot_microk_skylakex-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "drot_microk_haswell-2.c"
#endif



+ 1
- 1
kernel/x86_64/sasum.c View File

@@ -11,7 +11,7 @@

#if defined(SKYLAKEX)
#include "sasum_microk_skylakex-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "sasum_microk_haswell-2.c"
#endif



+ 1
- 1
kernel/x86_64/srot.c View File

@@ -2,7 +2,7 @@

#if defined(SKYLAKEX)
#include "srot_microk_skylakex-2.c"
#elif defined(HASWELL)
#elif defined(HASWELL) || defined(ZEN)
#include "srot_microk_haswell-2.c"
#endif



+ 5
- 0
param.h View File

@@ -2443,8 +2443,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 8
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define DGEMM_DEFAULT_UNROLL_M 16
#define DGEMM_DEFAULT_UNROLL_N 4
#else
#define DGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_N 8
#endif
#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 8


Loading…
Cancel
Save