You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

smallscaling.c 5.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. // run with OPENBLAS_NUM_THREADS=1 and OMP_NUM_THREADS=n
  2. #include <math.h>
  3. #include <stdlib.h>
  4. #include <stdio.h>
  5. #include <time.h>
  6. #include <cblas.h>
  7. #include <omp.h>
  8. #define MIN_SIZE 5
  9. #define MAX_SIZE 60
  10. #define NB_SIZE 10
  11. // number of loop for a 1x1 matrix. Lower it if the test is
  12. // too slow on you computer.
  13. #define NLOOP 2e7
  14. typedef struct {
  15. int matrix_size;
  16. int n_loop;
  17. void (* bench_func)();
  18. void (* blas_func)();
  19. void * (* create_matrix)(int size);
  20. } BenchParam;
  21. void * s_create_matrix(int size) {
  22. float * r = malloc(size * sizeof(double));
  23. int i;
  24. for(i = 0; i < size; i++)
  25. r[i] = 1e3 * i / size;
  26. return r;
  27. }
  28. void * c_create_matrix(int size) {
  29. float * r = malloc(size * 2 * sizeof(double));
  30. int i;
  31. for(i = 0; i < 2 * size; i++)
  32. r[i] = 1e3 * i / size;
  33. return r;
  34. }
  35. void * z_create_matrix(int size) {
  36. double * r = malloc(size * 2 * sizeof(double));
  37. int i;
  38. for(i = 0; i < 2 * size; i++)
  39. r[i] = 1e3 * i / size;
  40. return r;
  41. }
  42. void * d_create_matrix(int size) {
  43. double * r = malloc(size * sizeof(double));
  44. int i;
  45. for(i = 0; i < size; i++)
  46. r[i] = 1e3 * i / size;
  47. return r;
  48. }
  49. void trmv_bench(BenchParam * param)
  50. {
  51. int i, n;
  52. int size = param->matrix_size;
  53. n = param->n_loop / size;
  54. int one = 1;
  55. void * A = param->create_matrix(size * size);
  56. void * y = param->create_matrix(size);
  57. for(i = 0; i < n; i++) {
  58. param->blas_func("U", "N", "N", &size, A, &size, y, &one);
  59. }
  60. free(A);
  61. free(y);
  62. }
  63. void gemv_bench(BenchParam * param)
  64. {
  65. int i, n;
  66. int size = param->matrix_size;
  67. n = param->n_loop / size;
  68. double v = 1.01;
  69. int one = 1;
  70. void * A = param->create_matrix(size * size);
  71. void * y = param->create_matrix(size);
  72. for(i = 0; i < n; i++) {
  73. param->blas_func("N", &size, &size, &v, A, &size, y, &one, &v, y, &one);
  74. }
  75. free(A);
  76. free(y);
  77. }
  78. void ger_bench(BenchParam * param) {
  79. int i, n;
  80. int size = param->matrix_size;
  81. n = param->n_loop / size;
  82. double v = 1.01;
  83. int one = 1;
  84. void * A = param->create_matrix(size * size);
  85. void * y = param->create_matrix(size);
  86. for(i = 0; i < n; i++) {
  87. param->blas_func(&size, &size, &v, y, &one, y, &one, A, &size);
  88. }
  89. free(A);
  90. free(y);
  91. }
  92. #ifndef _WIN32
  93. void * pthread_func_wrapper(void * param) {
  94. ((BenchParam *)param)->bench_func(param);
  95. pthread_exit(NULL);
  96. }
  97. #endif
  98. #define NB_TESTS 5
  99. void * TESTS[4 * NB_TESTS] = {
  100. trmv_bench, ztrmv_, z_create_matrix, "ztrmv",
  101. gemv_bench, dgemv_, d_create_matrix, "dgemv",
  102. gemv_bench, zgemv_, z_create_matrix, "zgemv",
  103. ger_bench, dger_, d_create_matrix, "dger",
  104. ger_bench, zgerc_, z_create_matrix, "zgerc",
  105. };
  106. inline static double delta_time(struct timespec tick) {
  107. struct timespec tock;
  108. clock_gettime(CLOCK_MONOTONIC, &tock);
  109. return (tock.tv_sec - tick.tv_sec) + (tock.tv_nsec - tick.tv_nsec) / 1e9;
  110. }
  111. double pthread_bench(BenchParam * param, int nb_threads)
  112. {
  113. #ifdef _WIN32
  114. return 0;
  115. #else
  116. BenchParam threaded_param = *param;
  117. pthread_t threads[nb_threads];
  118. int t, rc;
  119. struct timespec tick;
  120. threaded_param.n_loop /= nb_threads;
  121. clock_gettime(CLOCK_MONOTONIC, &tick);
  122. for(t=0; t<nb_threads; t++){
  123. rc = pthread_create(&threads[t], NULL, pthread_func_wrapper, &threaded_param);
  124. if (rc){
  125. printf("ERROR; return code from pthread_create() is %d\n", rc);
  126. exit(-1);
  127. }
  128. }
  129. for(t=0; t<nb_threads; t++){
  130. pthread_join(threads[t], NULL);
  131. }
  132. return delta_time(tick);
  133. #endif
  134. }
  135. double seq_bench(BenchParam * param) {
  136. struct timespec tick;
  137. clock_gettime(CLOCK_MONOTONIC, &tick);
  138. param->bench_func(param);
  139. return delta_time(tick);
  140. }
  141. double omp_bench(BenchParam * param) {
  142. BenchParam threaded_param = *param;
  143. struct timespec tick;
  144. int t;
  145. int nb_threads = omp_get_max_threads();
  146. threaded_param.n_loop /= nb_threads;
  147. clock_gettime(CLOCK_MONOTONIC, &tick);
  148. #pragma omp parallel for
  149. for(t = 0; t < nb_threads; t ++){
  150. param->bench_func(&threaded_param);
  151. }
  152. return delta_time(tick);
  153. }
  154. int main(int argc, char * argv[]) {
  155. double inc_factor = exp(log((double)MAX_SIZE / MIN_SIZE) / NB_SIZE);
  156. BenchParam param;
  157. int test_id;
  158. printf ("Running on %d threads\n", omp_get_max_threads());
  159. for(test_id = 0; test_id < NB_TESTS; test_id ++) {
  160. double size = MIN_SIZE;
  161. param.bench_func = TESTS[test_id * 4];
  162. param.blas_func = TESTS[test_id * 4 + 1];
  163. param.create_matrix = TESTS[test_id * 4 + 2];
  164. printf("\nBenchmark of %s\n", (char*)TESTS[test_id * 4 + 3]);
  165. param.n_loop = NLOOP;
  166. while(size <= MAX_SIZE) {
  167. param.matrix_size = (int)(size + 0.5);
  168. double seq_time = seq_bench(&param);
  169. double omp_time = omp_bench(&param);
  170. double pthread_time = pthread_bench(&param, omp_get_max_threads());
  171. printf("matrix size %d, sequential %gs, openmp %gs, speedup %g, "
  172. "pthread %gs, speedup %g\n",
  173. param.matrix_size, seq_time,
  174. omp_time, seq_time / omp_time,
  175. pthread_time, seq_time / pthread_time);
  176. size *= inc_factor;
  177. }
  178. }
  179. return(0);
  180. }