You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

gemv_n_vfpv3.S 14 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823
  1. /***************************************************************************
  2. Copyright (c) 2013, The OpenBLAS Project
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. 3. Neither the name of the OpenBLAS project nor the names of
  14. its contributors may be used to endorse or promote products
  15. derived from this software without specific prior written permission.
  16. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
  20. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  22. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  23. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  25. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. *****************************************************************************/
  27. /**************************************************************************************
  28. * 2013/11/19 Saar
  29. * BLASTEST : OK
  30. * CTEST : OK
  31. * TEST : OK
  32. *
  33. **************************************************************************************/
  34. #define ASSEMBLER
  35. #include "common.h"
  36. #define STACKSIZE 256
  37. #if !defined(__ARM_PCS_VFP)
  38. #if !defined(DOUBLE)
  39. #define OLD_ALPHA r3
  40. #define OLD_A_SOFTFP [fp, #0 ]
  41. #define OLD_LDA [fp, #4 ]
  42. #define X [fp, #8 ]
  43. #define OLD_INC_X [fp, #12 ]
  44. #define Y [fp, #16 ]
  45. #define OLD_INC_Y [fp, #20 ]
  46. #else
  47. #define OLD_ALPHA [fp, #0 ]
  48. #define OLD_A_SOFTFP [fp, #8 ]
  49. #define OLD_LDA [fp, #12]
  50. #define X [fp, #16]
  51. #define OLD_INC_X [fp, #20]
  52. #define Y [fp, #24]
  53. #define OLD_INC_Y [fp, #28]
  54. #endif
  55. #else
  56. #define OLD_LDA [fp, #0 ]
  57. #define X [fp, #4 ]
  58. #define OLD_INC_X [fp, #8 ]
  59. #define Y [fp, #12 ]
  60. #define OLD_INC_Y [fp, #16 ]
  61. #endif
  62. #define OLD_A r3
  63. #define OLD_M r0
  64. #define AO1 r0
  65. #define N r1
  66. #define J r2
  67. #define AO2 r4
  68. #define XO r5
  69. #define YO r6
  70. #define LDA r7
  71. #define INC_X r8
  72. #define INC_Y r9
  73. #define I r12
  74. #define M [fp, #-252 ]
  75. #define A [fp, #-256 ]
  76. #define FP_ZERO [fp, #-228]
  77. #define FP_ZERO_0 [fp, #-228]
  78. #define FP_ZERO_1 [fp, #-224]
  79. #define X_PRE 64
  80. #define Y_PRE 0
  81. #define A_PRE 0
  82. /**************************************************************************************
  83. * Macro definitions
  84. **************************************************************************************/
  85. #if defined(DOUBLE)
  86. .macro INIT_F8
  87. pld [ YO , #Y_PRE ]
  88. pld [ YO , #Y_PRE+32 ]
  89. fldd d24 , FP_ZERO
  90. vmov.f64 d25 , d24
  91. vmov.f64 d26 , d24
  92. vmov.f64 d27 , d24
  93. vmov.f64 d28 , d24
  94. vmov.f64 d29 , d24
  95. vmov.f64 d30 , d24
  96. vmov.f64 d31 , d24
  97. .endm
  98. .macro KERNEL_F8X8
  99. pld [ XO , #X_PRE ]
  100. KERNEL_F8X1
  101. KERNEL_F8X1
  102. KERNEL_F8X1
  103. KERNEL_F8X1
  104. pld [ XO , #X_PRE ]
  105. KERNEL_F8X1
  106. KERNEL_F8X1
  107. KERNEL_F8X1
  108. KERNEL_F8X1
  109. .endm
  110. .macro KERNEL_F8X1
  111. vldmia.f64 XO! , { d4 }
  112. vldmia.f64 AO1 , { d8 - d15 }
  113. vmla.f64 d24 , d4 , d8
  114. pld [ AO2 , #A_PRE ]
  115. vmla.f64 d25 , d4 , d9
  116. pld [ AO2 , #A_PRE+32 ]
  117. vmla.f64 d26 , d4 , d10
  118. vmla.f64 d27 , d4 , d11
  119. vmla.f64 d28 , d4 , d12
  120. vmla.f64 d29 , d4 , d13
  121. add AO1, AO1, LDA
  122. vmla.f64 d30 , d4 , d14
  123. add AO2, AO2, LDA
  124. vmla.f64 d31 , d4 , d15
  125. .endm
  126. .macro SAVE_F8
  127. vldmia.f64 YO, { d16 - d23 }
  128. vmla.f64 d16, d0, d24
  129. vmla.f64 d17, d0, d25
  130. vmla.f64 d18, d0, d26
  131. vmla.f64 d19, d0, d27
  132. vmla.f64 d20, d0, d28
  133. vmla.f64 d21, d0, d29
  134. vmla.f64 d22, d0, d30
  135. vmla.f64 d23, d0, d31
  136. vstmia.f64 YO!, { d16 - d23 }
  137. .endm
  138. .macro INIT_F1
  139. fldd d24 , FP_ZERO
  140. .endm
  141. .macro KERNEL_F1X1
  142. vldmia.f64 XO! , { d4 }
  143. vldmia.f64 AO1 , { d8 }
  144. vmla.f64 d24 , d4 , d8
  145. add AO1, AO1, LDA
  146. .endm
  147. .macro SAVE_F1
  148. vldmia.f64 YO, { d16 }
  149. vmla.f64 d16, d0, d24
  150. vstmia.f64 YO!, { d16 }
  151. .endm
  152. /*********************************************************************************************/
  153. .macro INIT_S8
  154. fldd d24 , FP_ZERO
  155. vmov.f64 d25 , d24
  156. vmov.f64 d26 , d24
  157. vmov.f64 d27 , d24
  158. vmov.f64 d28 , d24
  159. vmov.f64 d29 , d24
  160. vmov.f64 d30 , d24
  161. vmov.f64 d31 , d24
  162. .endm
  163. .macro KERNEL_S8X8
  164. KERNEL_S8X1
  165. KERNEL_S8X1
  166. KERNEL_S8X1
  167. KERNEL_S8X1
  168. KERNEL_S8X1
  169. KERNEL_S8X1
  170. KERNEL_S8X1
  171. KERNEL_S8X1
  172. .endm
  173. .macro KERNEL_S8X1
  174. pld [ AO2 , #A_PRE ]
  175. pld [ AO2 , #A_PRE+32 ]
  176. vldmia.f64 XO , { d4 }
  177. vldmia.f64 AO1 , { d8 - d15 }
  178. vmla.f64 d24 , d4 , d8
  179. vmla.f64 d25 , d4 , d9
  180. vmla.f64 d26 , d4 , d10
  181. vmla.f64 d27 , d4 , d11
  182. vmla.f64 d28 , d4 , d12
  183. vmla.f64 d29 , d4 , d13
  184. vmla.f64 d30 , d4 , d14
  185. vmla.f64 d31 , d4 , d15
  186. add AO1, AO1, LDA
  187. add AO2, AO2, LDA
  188. add XO, XO, INC_X
  189. .endm
  190. .macro SAVE_S8
  191. vldmia.f64 YO, { d16 }
  192. vmla.f64 d16, d0, d24
  193. vstmia.f64 YO, { d16 }
  194. add YO, YO, INC_Y
  195. vldmia.f64 YO, { d17 }
  196. vmla.f64 d17, d0, d25
  197. vstmia.f64 YO, { d17 }
  198. add YO, YO, INC_Y
  199. vldmia.f64 YO, { d18 }
  200. vmla.f64 d18, d0, d26
  201. vstmia.f64 YO, { d18 }
  202. add YO, YO, INC_Y
  203. vldmia.f64 YO, { d19 }
  204. vmla.f64 d19, d0, d27
  205. vstmia.f64 YO, { d19 }
  206. add YO, YO, INC_Y
  207. vldmia.f64 YO, { d20 }
  208. vmla.f64 d20, d0, d28
  209. vstmia.f64 YO, { d20 }
  210. add YO, YO, INC_Y
  211. vldmia.f64 YO, { d21 }
  212. vmla.f64 d21, d0, d29
  213. vstmia.f64 YO, { d21 }
  214. add YO, YO, INC_Y
  215. vldmia.f64 YO, { d22 }
  216. vmla.f64 d22, d0, d30
  217. vstmia.f64 YO, { d22 }
  218. add YO, YO, INC_Y
  219. vldmia.f64 YO, { d23 }
  220. vmla.f64 d23, d0, d31
  221. vstmia.f64 YO, { d23 }
  222. add YO, YO, INC_Y
  223. .endm
  224. .macro INIT_S1
  225. fldd d24 , FP_ZERO
  226. .endm
  227. .macro KERNEL_S1X1
  228. vldmia.f64 XO , { d4 }
  229. vldmia.f64 AO1 , { d8 }
  230. vmla.f64 d24 , d4 , d8
  231. add AO1, AO1, LDA
  232. add XO, XO, INC_X
  233. .endm
  234. .macro SAVE_S1
  235. vldmia.f64 YO, { d16 }
  236. vmla.f64 d16, d0, d24
  237. vstmia.f64 YO, { d16 }
  238. add YO, YO, INC_Y
  239. .endm
  240. #else /************************* SINGLE PRECISION *****************************************/
  241. .macro INIT_F8
  242. pld [ YO , #Y_PRE ]
  243. flds s24 , FP_ZERO
  244. vmov.f32 s25 , s24
  245. vmov.f32 s26 , s24
  246. vmov.f32 s27 , s24
  247. vmov.f32 s28 , s24
  248. vmov.f32 s29 , s24
  249. vmov.f32 s30 , s24
  250. vmov.f32 s31 , s24
  251. .endm
  252. .macro KERNEL_F8X8
  253. pld [ XO , #X_PRE ]
  254. KERNEL_F8X1
  255. KERNEL_F8X1
  256. KERNEL_F8X1
  257. KERNEL_F8X1
  258. KERNEL_F8X1
  259. KERNEL_F8X1
  260. KERNEL_F8X1
  261. KERNEL_F8X1
  262. .endm
  263. .macro KERNEL_F8X1
  264. pld [ AO2 , #A_PRE ]
  265. vldmia.f32 XO! , { s4 }
  266. vldmia.f32 AO1 , { s8 - s15 }
  267. vmla.f32 s24 , s4 , s8
  268. vmla.f32 s25 , s4 , s9
  269. vmla.f32 s26 , s4 , s10
  270. vmla.f32 s27 , s4 , s11
  271. vmla.f32 s28 , s4 , s12
  272. vmla.f32 s29 , s4 , s13
  273. vmla.f32 s30 , s4 , s14
  274. vmla.f32 s31 , s4 , s15
  275. add AO1, AO1, LDA
  276. add AO2, AO2, LDA
  277. .endm
  278. .macro SAVE_F8
  279. vldmia.f32 YO, { s16 - s23 }
  280. vmla.f32 s16, s0, s24
  281. vmla.f32 s17, s0, s25
  282. vmla.f32 s18, s0, s26
  283. vmla.f32 s19, s0, s27
  284. vmla.f32 s20, s0, s28
  285. vmla.f32 s21, s0, s29
  286. vmla.f32 s22, s0, s30
  287. vmla.f32 s23, s0, s31
  288. vstmia.f32 YO!, { s16 - s23 }
  289. .endm
  290. .macro INIT_F1
  291. flds s24 , FP_ZERO
  292. .endm
  293. .macro KERNEL_F1X1
  294. vldmia.f32 XO! , { s4 }
  295. vldmia.f32 AO1 , { s8 }
  296. vmla.f32 s24 , s4 , s8
  297. add AO1, AO1, LDA
  298. .endm
  299. .macro SAVE_F1
  300. vldmia.f32 YO, { s16 }
  301. vmla.f32 s16, s0, s24
  302. vstmia.f32 YO!, { s16 }
  303. .endm
  304. /*********************************************************************************************/
  305. .macro INIT_S8
  306. flds s24 , FP_ZERO
  307. vmov.f32 s25 , s24
  308. vmov.f32 s26 , s24
  309. vmov.f32 s27 , s24
  310. vmov.f32 s28 , s24
  311. vmov.f32 s29 , s24
  312. vmov.f32 s30 , s24
  313. vmov.f32 s31 , s24
  314. .endm
  315. .macro KERNEL_S8X8
  316. KERNEL_S8X1
  317. KERNEL_S8X1
  318. KERNEL_S8X1
  319. KERNEL_S8X1
  320. KERNEL_S8X1
  321. KERNEL_S8X1
  322. KERNEL_S8X1
  323. KERNEL_S8X1
  324. .endm
  325. .macro KERNEL_S8X1
  326. pld [ AO2 , #A_PRE ]
  327. vldmia.f32 XO , { s4 }
  328. vldmia.f32 AO1 , { s8 - s15 }
  329. vmla.f32 s24 , s4 , s8
  330. vmla.f32 s25 , s4 , s9
  331. vmla.f32 s26 , s4 , s10
  332. vmla.f32 s27 , s4 , s11
  333. vmla.f32 s28 , s4 , s12
  334. vmla.f32 s29 , s4 , s13
  335. vmla.f32 s30 , s4 , s14
  336. vmla.f32 s31 , s4 , s15
  337. add AO1, AO1, LDA
  338. add AO2, AO2, LDA
  339. add XO, XO, INC_X
  340. .endm
  341. .macro SAVE_S8
  342. vldmia.f32 YO, { s16 }
  343. vmla.f32 s16, s0, s24
  344. vstmia.f32 YO, { s16 }
  345. add YO, YO, INC_Y
  346. vldmia.f32 YO, { s17 }
  347. vmla.f32 s17, s0, s25
  348. vstmia.f32 YO, { s17 }
  349. add YO, YO, INC_Y
  350. vldmia.f32 YO, { s18 }
  351. vmla.f32 s18, s0, s26
  352. vstmia.f32 YO, { s18 }
  353. add YO, YO, INC_Y
  354. vldmia.f32 YO, { s19 }
  355. vmla.f32 s19, s0, s27
  356. vstmia.f32 YO, { s19 }
  357. add YO, YO, INC_Y
  358. vldmia.f32 YO, { s20 }
  359. vmla.f32 s20, s0, s28
  360. vstmia.f32 YO, { s20 }
  361. add YO, YO, INC_Y
  362. vldmia.f32 YO, { s21 }
  363. vmla.f32 s21, s0, s29
  364. vstmia.f32 YO, { s21 }
  365. add YO, YO, INC_Y
  366. vldmia.f32 YO, { s22 }
  367. vmla.f32 s22, s0, s30
  368. vstmia.f32 YO, { s22 }
  369. add YO, YO, INC_Y
  370. vldmia.f32 YO, { s23 }
  371. vmla.f32 s23, s0, s31
  372. vstmia.f32 YO, { s23 }
  373. add YO, YO, INC_Y
  374. .endm
  375. .macro INIT_S1
  376. flds s24 , FP_ZERO
  377. .endm
  378. .macro KERNEL_S1X1
  379. vldmia.f32 XO , { s4 }
  380. vldmia.f32 AO1 , { s8 }
  381. vmla.f32 s24 , s4 , s8
  382. add AO1, AO1, LDA
  383. add XO, XO, INC_X
  384. .endm
  385. .macro SAVE_S1
  386. vldmia.f32 YO, { s16 }
  387. vmla.f32 s16, s0, s24
  388. vstmia.f32 YO, { s16 }
  389. add YO, YO, INC_Y
  390. .endm
  391. #endif
  392. /**************************************************************************************
  393. * End of macro definitions
  394. **************************************************************************************/
  395. PROLOGUE
  396. .align 5
  397. push {r4 - r9 , fp}
  398. add fp, sp, #28
  399. sub sp, sp, #STACKSIZE // reserve stack
  400. sub r12, fp, #192
  401. #if defined(DOUBLE)
  402. vstm r12, { d8 - d15 } // store floating point registers
  403. #else
  404. vstm r12, { s8 - s31 } // store floating point registers
  405. #endif
  406. movs r12, #0
  407. str r12, FP_ZERO
  408. str r12, FP_ZERO_1
  409. cmp OLD_M, #0
  410. ble gemvn_kernel_L999
  411. cmp N, #0
  412. ble gemvn_kernel_L999
  413. #if !defined(__ARM_PCS_VFP)
  414. #if !defined(DOUBLE)
  415. vmov s0, OLD_ALPHA
  416. #else
  417. vldr d0, OLD_ALPHA
  418. #endif
  419. ldr OLD_A, OLD_A_SOFTFP
  420. #endif
  421. str OLD_A, A
  422. str OLD_M, M
  423. ldr INC_X , OLD_INC_X
  424. ldr INC_Y , OLD_INC_Y
  425. cmp INC_X, #0
  426. beq gemvn_kernel_L999
  427. cmp INC_Y, #0
  428. beq gemvn_kernel_L999
  429. ldr LDA, OLD_LDA
  430. #if defined(DOUBLE)
  431. lsl LDA, LDA, #3 // LDA * SIZE
  432. #else
  433. lsl LDA, LDA, #2 // LDA * SIZE
  434. #endif
  435. cmp INC_X, #1
  436. bne gemvn_kernel_S8_BEGIN
  437. cmp INC_Y, #1
  438. bne gemvn_kernel_S8_BEGIN
  439. gemvn_kernel_F8_BEGIN:
  440. ldr YO , Y
  441. ldr I, M
  442. asrs I, I, #3 // I = M / 8
  443. ble gemvn_kernel_F1_BEGIN
  444. gemvn_kernel_F8X8:
  445. ldr AO1, A
  446. add AO2, AO1, LDA
  447. add r3 , AO1, #8*SIZE
  448. str r3 , A
  449. ldr XO , X
  450. INIT_F8
  451. asrs J, N, #3 // J = N / 8
  452. ble gemvn_kernel_F8X1
  453. gemvn_kernel_F8X8_10:
  454. KERNEL_F8X8
  455. subs J, J, #1
  456. bne gemvn_kernel_F8X8_10
  457. gemvn_kernel_F8X1:
  458. ands J, N , #7
  459. ble gemvn_kernel_F8_END
  460. gemvn_kernel_F8X1_10:
  461. KERNEL_F8X1
  462. subs J, J, #1
  463. bne gemvn_kernel_F8X1_10
  464. gemvn_kernel_F8_END:
  465. SAVE_F8
  466. subs I , I , #1
  467. bne gemvn_kernel_F8X8
  468. gemvn_kernel_F1_BEGIN:
  469. ldr I, M
  470. ands I, I , #7
  471. ble gemvn_kernel_L999
  472. gemvn_kernel_F1X1:
  473. ldr AO1, A
  474. add r3, AO1, #SIZE
  475. str r3, A
  476. ldr XO , X
  477. INIT_F1
  478. mov J, N
  479. gemvn_kernel_F1X1_10:
  480. KERNEL_F1X1
  481. subs J, J, #1
  482. bne gemvn_kernel_F1X1_10
  483. gemvn_kernel_F1_END:
  484. SAVE_F1
  485. subs I , I , #1
  486. bne gemvn_kernel_F1X1
  487. b gemvn_kernel_L999
  488. /*************************************************************************************************************/
  489. gemvn_kernel_S8_BEGIN:
  490. #if defined(DOUBLE)
  491. lsl INC_X, INC_X, #3 // INC_X * SIZE
  492. lsl INC_Y, INC_Y, #3 // INC_Y * SIZE
  493. #else
  494. lsl INC_X, INC_X, #2 // INC_X * SIZE
  495. lsl INC_Y, INC_Y, #2 // INC_Y * SIZE
  496. #endif
  497. ldr YO , Y
  498. ldr I, M
  499. asrs I, I, #3 // I = M / 8
  500. ble gemvn_kernel_S1_BEGIN
  501. gemvn_kernel_S8X8:
  502. ldr AO1, A
  503. add AO2, AO1, LDA
  504. add r3 , AO1, #8*SIZE
  505. str r3 , A
  506. ldr XO , X
  507. INIT_S8
  508. asrs J, N, #3 // J = N / 8
  509. ble gemvn_kernel_S8X1
  510. gemvn_kernel_S8X8_10:
  511. KERNEL_S8X8
  512. subs J, J, #1
  513. bne gemvn_kernel_S8X8_10
  514. gemvn_kernel_S8X1:
  515. ands J, N , #7
  516. ble gemvn_kernel_S8_END
  517. gemvn_kernel_S8X1_10:
  518. KERNEL_S8X1
  519. subs J, J, #1
  520. bne gemvn_kernel_S8X1_10
  521. gemvn_kernel_S8_END:
  522. SAVE_S8
  523. subs I , I , #1
  524. bne gemvn_kernel_S8X8
  525. gemvn_kernel_S1_BEGIN:
  526. ldr I, M
  527. ands I, I , #7
  528. ble gemvn_kernel_L999
  529. gemvn_kernel_S1X1:
  530. ldr AO1, A
  531. add r3, AO1, #SIZE
  532. str r3, A
  533. ldr XO , X
  534. INIT_S1
  535. mov J, N
  536. gemvn_kernel_S1X1_10:
  537. KERNEL_S1X1
  538. subs J, J, #1
  539. bne gemvn_kernel_S1X1_10
  540. gemvn_kernel_S1_END:
  541. SAVE_S1
  542. subs I , I , #1
  543. bne gemvn_kernel_S1X1
  544. /*************************************************************************************************************/
  545. gemvn_kernel_L999:
  546. sub r3, fp, #192
  547. #if defined(DOUBLE)
  548. vldm r3, { d8 - d15 } // restore floating point registers
  549. #else
  550. vldm r3, { s8 - s31 } // restore floating point registers
  551. #endif
  552. mov r0, #0 // set return value
  553. sub sp, fp, #28
  554. pop {r4 -r9 ,fp}
  555. bx lr
  556. EPILOGUE