You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dgemm_thread_safety.cpp 4.1 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. #include <iostream>
  2. #include <vector>
  3. #include <random>
  4. #include <future>
  5. #include <omp.h>
  6. #include "../cblas.h"
  7. #include "cpp_thread_safety_common.h"
  8. void launch_cblas_dgemm(double* A, double* B, double* C, const blasint randomMatSize){
  9. cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, randomMatSize, randomMatSize, randomMatSize, 1.0, A, randomMatSize, B, randomMatSize, 0.1, C, randomMatSize);
  10. }
  11. int main(int argc, char* argv[]){
  12. blasint randomMatSize = 1024; //dimension of the random square matrices used
  13. uint32_t numConcurrentThreads = 52; //number of concurrent calls of the functions being tested
  14. uint32_t numTestRounds = 16; //number of testing rounds before success exit
  15. if (argc > 4){
  16. std::cout<<"ERROR: too many arguments for thread safety tester"<<std::endl;
  17. abort();
  18. }
  19. if(argc == 4){
  20. std::vector<std::string> cliArgs;
  21. for (int i = 1; i < argc; i++){
  22. cliArgs.push_back(argv[i]);
  23. std::cout<<argv[i]<<std::endl;
  24. }
  25. randomMatSize = std::stoul(cliArgs[0]);
  26. numConcurrentThreads = std::stoul(cliArgs[1]);
  27. numTestRounds = std::stoul(cliArgs[2]);
  28. }
  29. std::uniform_real_distribution<double> rngdist{-1.0, 1.0};
  30. std::vector<std::vector<double>> matBlock(numConcurrentThreads*3);
  31. std::vector<std::future<void>> futureBlock(numConcurrentThreads);
  32. std::cout<<"*----------------------------*\n";
  33. std::cout<<"| DGEMM thread safety tester |\n";
  34. std::cout<<"*----------------------------*\n";
  35. std::cout<<"Size of random matrices(N=M=K): "<<randomMatSize<<'\n';
  36. std::cout<<"Number of concurrent calls into OpenBLAS : "<<numConcurrentThreads<<'\n';
  37. std::cout<<"Number of testing rounds : "<<numTestRounds<<'\n';
  38. std::cout<<"This test will need "<<(static_cast<uint64_t>(randomMatSize*randomMatSize)*numConcurrentThreads*3*8)/static_cast<double>(1024*1024)<<" MiB of RAM\n"<<std::endl;
  39. std::cout<<"Initializing random number generator..."<<std::flush;
  40. std::mt19937_64 PRNG = InitPRNG();
  41. std::cout<<"done\n";
  42. std::cout<<"Preparing to test CBLAS DGEMM thread safety\n";
  43. std::cout<<"Allocating matrices..."<<std::flush;
  44. for(uint32_t i=0; i<(numConcurrentThreads*3); i++){
  45. matBlock[i].resize(randomMatSize*randomMatSize);
  46. }
  47. std::cout<<"done\n";
  48. //pauser();
  49. std::cout<<"Filling matrices with random numbers..."<<std::flush;
  50. FillMatrices(matBlock, PRNG, rngdist, randomMatSize, numConcurrentThreads, 3);
  51. //PrintMatrices(matBlock, randomMatSize, numConcurrentThreads, 3);
  52. std::cout<<"done\n";
  53. std::cout<<"Testing CBLAS DGEMM thread safety\n";
  54. omp_set_num_threads(numConcurrentThreads);
  55. for(uint32_t R=0; R<numTestRounds; R++){
  56. std::cout<<"DGEMM round #"<<R<<std::endl;
  57. std::cout<<"Launching "<<numConcurrentThreads<<" threads simultaneously using OpenMP..."<<std::flush;
  58. #pragma omp parallel for default(none) shared(futureBlock, matBlock, randomMatSize, numConcurrentThreads)
  59. for(uint32_t i=0; i<numConcurrentThreads; i++){
  60. futureBlock[i] = std::async(std::launch::async, launch_cblas_dgemm, &matBlock[i*3][0], &matBlock[i*3+1][0], &matBlock[i*3+2][0], randomMatSize);
  61. //launch_cblas_dgemm( &matBlock[i][0], &matBlock[i+1][0], &matBlock[i+2][0]);
  62. }
  63. std::cout<<"done\n";
  64. std::cout<<"Waiting for threads to finish..."<<std::flush;
  65. for(uint32_t i=0; i<numConcurrentThreads; i++){
  66. futureBlock[i].get();
  67. }
  68. std::cout<<"done\n";
  69. //PrintMatrices(matBlock, randomMatSize, numConcurrentThreads, 3);
  70. std::cout<<"Comparing results from different threads..."<<std::flush;
  71. for(uint32_t i=3; i<(numConcurrentThreads*3); i+=3){ //i is the index of matrix A, for a given thread
  72. for(uint32_t j = 0; j < static_cast<uint32_t>(randomMatSize*randomMatSize); j++){
  73. if (std::abs(matBlock[i+2][j] - matBlock[2][j]) > 1.0E-13){ //i+2 is the index of matrix C, for a given thread
  74. std::cout<<"ERROR: one of the threads returned a different result! Index : "<<i+2<<std::endl;
  75. std::cout<<"CBLAS DGEMM thread safety test FAILED!"<<std::endl;
  76. return -1;
  77. }
  78. }
  79. }
  80. std::cout<<"OK!\n"<<std::endl;
  81. }
  82. std::cout<<"CBLAS DGEMM thread safety test PASSED!\n"<<std::endl;
  83. return 0;
  84. }