You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

sgemm.f 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. *> \brief \b SGEMM
  2. *
  3. * =========== DOCUMENTATION ===========
  4. *
  5. * Online html documentation available at
  6. * http://www.netlib.org/lapack/explore-html/
  7. *
  8. * Definition:
  9. * ===========
  10. *
  11. * SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
  12. *
  13. * .. Scalar Arguments ..
  14. * REAL ALPHA,BETA
  15. * INTEGER K,LDA,LDB,LDC,M,N
  16. * CHARACTER TRANSA,TRANSB
  17. * ..
  18. * .. Array Arguments ..
  19. * REAL A(LDA,*),B(LDB,*),C(LDC,*)
  20. * ..
  21. *
  22. *
  23. *> \par Purpose:
  24. * =============
  25. *>
  26. *> \verbatim
  27. *>
  28. *> SGEMM performs one of the matrix-matrix operations
  29. *>
  30. *> C := alpha*op( A )*op( B ) + beta*C,
  31. *>
  32. *> where op( X ) is one of
  33. *>
  34. *> op( X ) = X or op( X ) = X**T,
  35. *>
  36. *> alpha and beta are scalars, and A, B and C are matrices, with op( A )
  37. *> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
  38. *> \endverbatim
  39. *
  40. * Arguments:
  41. * ==========
  42. *
  43. *> \param[in] TRANSA
  44. *> \verbatim
  45. *> TRANSA is CHARACTER*1
  46. *> On entry, TRANSA specifies the form of op( A ) to be used in
  47. *> the matrix multiplication as follows:
  48. *>
  49. *> TRANSA = 'N' or 'n', op( A ) = A.
  50. *>
  51. *> TRANSA = 'T' or 't', op( A ) = A**T.
  52. *>
  53. *> TRANSA = 'C' or 'c', op( A ) = A**T.
  54. *> \endverbatim
  55. *>
  56. *> \param[in] TRANSB
  57. *> \verbatim
  58. *> TRANSB is CHARACTER*1
  59. *> On entry, TRANSB specifies the form of op( B ) to be used in
  60. *> the matrix multiplication as follows:
  61. *>
  62. *> TRANSB = 'N' or 'n', op( B ) = B.
  63. *>
  64. *> TRANSB = 'T' or 't', op( B ) = B**T.
  65. *>
  66. *> TRANSB = 'C' or 'c', op( B ) = B**T.
  67. *> \endverbatim
  68. *>
  69. *> \param[in] M
  70. *> \verbatim
  71. *> M is INTEGER
  72. *> On entry, M specifies the number of rows of the matrix
  73. *> op( A ) and of the matrix C. M must be at least zero.
  74. *> \endverbatim
  75. *>
  76. *> \param[in] N
  77. *> \verbatim
  78. *> N is INTEGER
  79. *> On entry, N specifies the number of columns of the matrix
  80. *> op( B ) and the number of columns of the matrix C. N must be
  81. *> at least zero.
  82. *> \endverbatim
  83. *>
  84. *> \param[in] K
  85. *> \verbatim
  86. *> K is INTEGER
  87. *> On entry, K specifies the number of columns of the matrix
  88. *> op( A ) and the number of rows of the matrix op( B ). K must
  89. *> be at least zero.
  90. *> \endverbatim
  91. *>
  92. *> \param[in] ALPHA
  93. *> \verbatim
  94. *> ALPHA is REAL
  95. *> On entry, ALPHA specifies the scalar alpha.
  96. *> \endverbatim
  97. *>
  98. *> \param[in] A
  99. *> \verbatim
  100. *> A is REAL array, dimension ( LDA, ka ), where ka is
  101. *> k when TRANSA = 'N' or 'n', and is m otherwise.
  102. *> Before entry with TRANSA = 'N' or 'n', the leading m by k
  103. *> part of the array A must contain the matrix A, otherwise
  104. *> the leading k by m part of the array A must contain the
  105. *> matrix A.
  106. *> \endverbatim
  107. *>
  108. *> \param[in] LDA
  109. *> \verbatim
  110. *> LDA is INTEGER
  111. *> On entry, LDA specifies the first dimension of A as declared
  112. *> in the calling (sub) program. When TRANSA = 'N' or 'n' then
  113. *> LDA must be at least max( 1, m ), otherwise LDA must be at
  114. *> least max( 1, k ).
  115. *> \endverbatim
  116. *>
  117. *> \param[in] B
  118. *> \verbatim
  119. *> B is REAL array, dimension ( LDB, kb ), where kb is
  120. *> n when TRANSB = 'N' or 'n', and is k otherwise.
  121. *> Before entry with TRANSB = 'N' or 'n', the leading k by n
  122. *> part of the array B must contain the matrix B, otherwise
  123. *> the leading n by k part of the array B must contain the
  124. *> matrix B.
  125. *> \endverbatim
  126. *>
  127. *> \param[in] LDB
  128. *> \verbatim
  129. *> LDB is INTEGER
  130. *> On entry, LDB specifies the first dimension of B as declared
  131. *> in the calling (sub) program. When TRANSB = 'N' or 'n' then
  132. *> LDB must be at least max( 1, k ), otherwise LDB must be at
  133. *> least max( 1, n ).
  134. *> \endverbatim
  135. *>
  136. *> \param[in] BETA
  137. *> \verbatim
  138. *> BETA is REAL
  139. *> On entry, BETA specifies the scalar beta. When BETA is
  140. *> supplied as zero then C need not be set on input.
  141. *> \endverbatim
  142. *>
  143. *> \param[in,out] C
  144. *> \verbatim
  145. *> C is REAL array, dimension ( LDC, N )
  146. *> Before entry, the leading m by n part of the array C must
  147. *> contain the matrix C, except when beta is zero, in which
  148. *> case C need not be set on entry.
  149. *> On exit, the array C is overwritten by the m by n matrix
  150. *> ( alpha*op( A )*op( B ) + beta*C ).
  151. *> \endverbatim
  152. *>
  153. *> \param[in] LDC
  154. *> \verbatim
  155. *> LDC is INTEGER
  156. *> On entry, LDC specifies the first dimension of C as declared
  157. *> in the calling (sub) program. LDC must be at least
  158. *> max( 1, m ).
  159. *> \endverbatim
  160. *
  161. * Authors:
  162. * ========
  163. *
  164. *> \author Univ. of Tennessee
  165. *> \author Univ. of California Berkeley
  166. *> \author Univ. of Colorado Denver
  167. *> \author NAG Ltd.
  168. *
  169. *> \date December 2016
  170. *
  171. *> \ingroup single_blas_level3
  172. *
  173. *> \par Further Details:
  174. * =====================
  175. *>
  176. *> \verbatim
  177. *>
  178. *> Level 3 Blas routine.
  179. *>
  180. *> -- Written on 8-February-1989.
  181. *> Jack Dongarra, Argonne National Laboratory.
  182. *> Iain Duff, AERE Harwell.
  183. *> Jeremy Du Croz, Numerical Algorithms Group Ltd.
  184. *> Sven Hammarling, Numerical Algorithms Group Ltd.
  185. *> \endverbatim
  186. *>
  187. * =====================================================================
  188. SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
  189. *
  190. * -- Reference BLAS level3 routine (version 3.7.0) --
  191. * -- Reference BLAS is a software package provided by Univ. of Tennessee, --
  192. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  193. * December 2016
  194. *
  195. * .. Scalar Arguments ..
  196. REAL ALPHA,BETA
  197. INTEGER K,LDA,LDB,LDC,M,N
  198. CHARACTER TRANSA,TRANSB
  199. * ..
  200. * .. Array Arguments ..
  201. REAL A(LDA,*),B(LDB,*),C(LDC,*)
  202. * ..
  203. *
  204. * =====================================================================
  205. *
  206. * .. External Functions ..
  207. LOGICAL LSAME
  208. EXTERNAL LSAME
  209. * ..
  210. * .. External Subroutines ..
  211. EXTERNAL XERBLA
  212. * ..
  213. * .. Intrinsic Functions ..
  214. INTRINSIC MAX
  215. * ..
  216. * .. Local Scalars ..
  217. REAL TEMP
  218. INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB
  219. LOGICAL NOTA,NOTB
  220. * ..
  221. * .. Parameters ..
  222. REAL ONE,ZERO
  223. PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
  224. * ..
  225. *
  226. * Set NOTA and NOTB as true if A and B respectively are not
  227. * transposed and set NROWA, NCOLA and NROWB as the number of rows
  228. * and columns of A and the number of rows of B respectively.
  229. *
  230. NOTA = LSAME(TRANSA,'N')
  231. NOTB = LSAME(TRANSB,'N')
  232. IF (NOTA) THEN
  233. NROWA = M
  234. NCOLA = K
  235. ELSE
  236. NROWA = K
  237. NCOLA = M
  238. END IF
  239. IF (NOTB) THEN
  240. NROWB = K
  241. ELSE
  242. NROWB = N
  243. END IF
  244. *
  245. * Test the input parameters.
  246. *
  247. INFO = 0
  248. IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND.
  249. + (.NOT.LSAME(TRANSA,'T'))) THEN
  250. INFO = 1
  251. ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND.
  252. + (.NOT.LSAME(TRANSB,'T'))) THEN
  253. INFO = 2
  254. ELSE IF (M.LT.0) THEN
  255. INFO = 3
  256. ELSE IF (N.LT.0) THEN
  257. INFO = 4
  258. ELSE IF (K.LT.0) THEN
  259. INFO = 5
  260. ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
  261. INFO = 8
  262. ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
  263. INFO = 10
  264. ELSE IF (LDC.LT.MAX(1,M)) THEN
  265. INFO = 13
  266. END IF
  267. IF (INFO.NE.0) THEN
  268. CALL XERBLA('SGEMM ',INFO)
  269. RETURN
  270. END IF
  271. *
  272. * Quick return if possible.
  273. *
  274. IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
  275. + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
  276. *
  277. * And if alpha.eq.zero.
  278. *
  279. IF (ALPHA.EQ.ZERO) THEN
  280. IF (BETA.EQ.ZERO) THEN
  281. DO 20 J = 1,N
  282. DO 10 I = 1,M
  283. C(I,J) = ZERO
  284. 10 CONTINUE
  285. 20 CONTINUE
  286. ELSE
  287. DO 40 J = 1,N
  288. DO 30 I = 1,M
  289. C(I,J) = BETA*C(I,J)
  290. 30 CONTINUE
  291. 40 CONTINUE
  292. END IF
  293. RETURN
  294. END IF
  295. *
  296. * Start the operations.
  297. *
  298. IF (NOTB) THEN
  299. IF (NOTA) THEN
  300. *
  301. * Form C := alpha*A*B + beta*C.
  302. *
  303. DO 90 J = 1,N
  304. IF (BETA.EQ.ZERO) THEN
  305. DO 50 I = 1,M
  306. C(I,J) = ZERO
  307. 50 CONTINUE
  308. ELSE IF (BETA.NE.ONE) THEN
  309. DO 60 I = 1,M
  310. C(I,J) = BETA*C(I,J)
  311. 60 CONTINUE
  312. END IF
  313. DO 80 L = 1,K
  314. TEMP = ALPHA*B(L,J)
  315. DO 70 I = 1,M
  316. C(I,J) = C(I,J) + TEMP*A(I,L)
  317. 70 CONTINUE
  318. 80 CONTINUE
  319. 90 CONTINUE
  320. ELSE
  321. *
  322. * Form C := alpha*A**T*B + beta*C
  323. *
  324. DO 120 J = 1,N
  325. DO 110 I = 1,M
  326. TEMP = ZERO
  327. DO 100 L = 1,K
  328. TEMP = TEMP + A(L,I)*B(L,J)
  329. 100 CONTINUE
  330. IF (BETA.EQ.ZERO) THEN
  331. C(I,J) = ALPHA*TEMP
  332. ELSE
  333. C(I,J) = ALPHA*TEMP + BETA*C(I,J)
  334. END IF
  335. 110 CONTINUE
  336. 120 CONTINUE
  337. END IF
  338. ELSE
  339. IF (NOTA) THEN
  340. *
  341. * Form C := alpha*A*B**T + beta*C
  342. *
  343. DO 170 J = 1,N
  344. IF (BETA.EQ.ZERO) THEN
  345. DO 130 I = 1,M
  346. C(I,J) = ZERO
  347. 130 CONTINUE
  348. ELSE IF (BETA.NE.ONE) THEN
  349. DO 140 I = 1,M
  350. C(I,J) = BETA*C(I,J)
  351. 140 CONTINUE
  352. END IF
  353. DO 160 L = 1,K
  354. TEMP = ALPHA*B(J,L)
  355. DO 150 I = 1,M
  356. C(I,J) = C(I,J) + TEMP*A(I,L)
  357. 150 CONTINUE
  358. 160 CONTINUE
  359. 170 CONTINUE
  360. ELSE
  361. *
  362. * Form C := alpha*A**T*B**T + beta*C
  363. *
  364. DO 200 J = 1,N
  365. DO 190 I = 1,M
  366. TEMP = ZERO
  367. DO 180 L = 1,K
  368. TEMP = TEMP + A(L,I)*B(J,L)
  369. 180 CONTINUE
  370. IF (BETA.EQ.ZERO) THEN
  371. C(I,J) = ALPHA*TEMP
  372. ELSE
  373. C(I,J) = ALPHA*TEMP + BETA*C(I,J)
  374. END IF
  375. 190 CONTINUE
  376. 200 CONTINUE
  377. END IF
  378. END IF
  379. *
  380. RETURN
  381. *
  382. * End of SGEMM .
  383. *
  384. END