You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

sgemm_kernel_8x4_ps.S 137 kB

14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797
  1. #define REALNAME ASMNAME
  2. #define ASSEMBLER
  3. #include "common.h"
  4. #define FETCH ld
  5. #define STACKSIZE 160
  6. #define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
  7. #define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
  8. ##### Parameter registers ####
  9. #define M $4
  10. #define N $5
  11. #define K $6
  12. #define A $8
  13. #define B $9
  14. #define C $10
  15. #define LDC $11
  16. #### Pointer A, B, C ####
  17. #define AO $12
  18. #define BO $13
  19. #define CO1 $14
  20. #define CO2 $15
  21. #define CO3 $16
  22. #define CO4 $17
  23. #define PREA $18
  24. #define PREB $19
  25. #### Used registers ####
  26. #define A1 $f0
  27. #define A2 $f1
  28. #define A3 $f2
  29. #define A4 $f3
  30. #define A5 $f4
  31. #define A6 $f5
  32. #define A7 $f6
  33. #define A8 $f7
  34. #define B1 $f8
  35. #define B2 $f9
  36. #define B3 $f10
  37. #define B4 $f11
  38. #define B5 $f12
  39. #define B6 $f13
  40. #define B7 $f14
  41. #define B8 $f15
  42. #define C11 $f16
  43. #define C12 $f17
  44. #define C21 $f18
  45. #define C22 $f19
  46. #define C31 $f20
  47. #define C32 $f21
  48. #define C41 $f22
  49. #define C42 $f23
  50. #define C13 $f24
  51. #define C14 $f25
  52. #define C23 $f26
  53. #define C24 $f27
  54. #define C33 $f28
  55. #define C34 $f29
  56. #define C43 $f30
  57. #define C44 $f31
  58. #define I $2
  59. #define J $3
  60. #define L $7
  61. #### Alpha register ####
  62. #define ALPHA $f15
  63. #define F31 31
  64. #define F30 30
  65. #define F29 29
  66. #define F28 28
  67. #define F27 27
  68. #define F26 26
  69. #define F25 25
  70. #define F24 24
  71. #define F23 23
  72. #define F22 22
  73. #define F21 21
  74. #define F20 20
  75. #define F19 19
  76. #define F18 18
  77. #define F17 17
  78. #define F16 16
  79. #define F15 15
  80. #define F14 14
  81. #define F13 13
  82. #define F12 12
  83. #define F11 11
  84. #define F10 10
  85. #define F9 9
  86. #define F8 8
  87. #define F7 7
  88. #define F6 6
  89. #define F5 5
  90. #define F4 4
  91. #define F3 3
  92. #define F2 2
  93. #define F1 1
  94. #define F0 0
  95. #define R12 12
  96. #define R13 13
  97. #define R14 14
  98. #define R15 15
  99. #define R16 16
  100. #define R17 17
  101. #if defined(TRMMKERNEL)
  102. #define OFFSET $23
  103. #define KK $24
  104. #define TEMP $25
  105. #endif
  106. # .text
  107. # .align 2
  108. ## .globl gemm
  109. # .set nomips16
  110. # .ent gemm
  111. # .type gemm, @function
  112. #gemm:
  113. # .frame $sp,STACKSIZE,$31 # vars= 48, regs= 1/0, args= 0, gp= 0
  114. # .mask 0x40000000,-8
  115. # .fmask 0x00000000,0
  116. # .set noreorder
  117. # .set nomacro
  118. PROLOGUE
  119. daddiu $sp,$sp,-STACKSIZE
  120. sd $16, 0($sp)
  121. sd $17, 8($sp)
  122. sd $18, 16($sp)
  123. sd $19, 24($sp)
  124. sd $20, 32($sp)
  125. sd $21, 40($sp)
  126. sd $22, 48($sp)
  127. sdc1 $f24, 56($sp)
  128. sdc1 $f25, 64($sp)
  129. sdc1 $f26, 72($sp)
  130. sdc1 $f27, 80($sp)
  131. sdc1 $f28, 88($sp)
  132. #if defined(TRMMKERNEL)
  133. sd $23, 96($sp)
  134. sd $24, 104($sp)
  135. sd $25, 112($sp)
  136. LDARG OFFSET, 160($sp)
  137. #endif
  138. #ifndef __64BIT__
  139. sdc1 $f20,120($sp)
  140. sdc1 $f21,128($sp)
  141. sdc1 $f22,136($sp)
  142. sdc1 $f23,144($sp)
  143. #endif
  144. .align 4
  145. .L4:
  146. dsra J, N, 2 # NR=4
  147. dsll LDC, LDC, BASE_SHIFT# LDC*SIZE
  148. #if defined(TRMMKERNEL) && !defined(LEFT)
  149. neg KK, OFFSET
  150. #endif
  151. blez J, .L2
  152. ST ALPHA, 152($sp)
  153. .L48:
  154. dsra I, M, 3 # MR=8
  155. dsll PREA, K, BASE_SHIFT
  156. move AO, A # Reset A
  157. move CO1, C
  158. daddu CO2, C, LDC
  159. daddu CO3, CO2, LDC
  160. daddu CO4, CO3, LDC
  161. daddu PREA, A, PREA
  162. #if defined(TRMMKERNEL) && defined(LEFT)
  163. move KK, OFFSET
  164. #endif
  165. blez I, .L44
  166. daddu C, CO4, LDC
  167. .align 4
  168. .L481:
  169. #if defined(TRMMKERNEL)
  170. #if (defined(LEFT) && defined(TRANSA)) ||\
  171. (!defined(LEFT) && !defined(TRANSA))
  172. move BO, B
  173. #else
  174. dsll L, KK, 3 + BASE_SHIFT # kk*8mr*datasize
  175. dsll TEMP, KK, 2 + BASE_SHIFT
  176. daddu AO, AO, L # AO point to the data addr
  177. daddu BO, B, TEMP
  178. #endif
  179. MTC $0, C11 # CLEAR REAULTS REGISTERS
  180. MOV C12, C11
  181. dsll PREB, K, BASE_SHIFT
  182. MOV C21, C11
  183. MOV C22, C11
  184. MOV C31, C11
  185. MOV C32, C11
  186. gsLQC1(R13, F9, F8, 0) # B1 B2
  187. MOV C41, C11
  188. MOV C42, C11
  189. gsLQC1(R12, F1, F0, 0) # A1 A2
  190. MOV C13, C11
  191. MOV C14, C11
  192. gsLQC1(R12, F3, F2, 1) # A3 A4
  193. MOV C23, C11
  194. FETCH $0, 0 * SIZE(CO1)
  195. MOV C24, C11
  196. FETCH $0, 4 * SIZE(CO1)
  197. MOV C33, C11
  198. FETCH $0, 0 * SIZE(CO2)
  199. MOV C34, C11
  200. FETCH $0, 4 * SIZE(CO2)
  201. daddu PREB, B, PREB
  202. MOV C43, C11
  203. FETCH $0, 0 * SIZE(CO3)
  204. MOV C44, C11
  205. FETCH $0, 4 * SIZE(CO3)
  206. PLU B3, B1, B1
  207. FETCH $0, 0 * SIZE(CO4)
  208. PLU B4, B2, B2
  209. FETCH $0, 4 * SIZE(CO4)
  210. #if (defined(LEFT) && !defined(TRANSA)) ||\
  211. (!defined(LEFT) && defined(TRANSA))
  212. dsubu TEMP, K, KK # TEMP is the length of the data part
  213. #elif defined(LEFT)
  214. daddiu TEMP, KK, 8
  215. #else
  216. daddiu TEMP, KK, 4
  217. #endif
  218. dsra L, TEMP, 6
  219. blez L, .L482
  220. NOP
  221. #else
  222. # GEMM PART
  223. move BO, B # Reset B
  224. dsra L, K, 6 # UnRoll K=64
  225. MTC $0, C11 # CLEAR REAULTS REGISTERS
  226. MOV C12, C11
  227. dsll PREB, K, BASE_SHIFT
  228. MOV C21, C11
  229. MOV C22, C11
  230. MOV C31, C11
  231. MOV C32, C11
  232. gsLQC1(R13, F9, F8, 0) # B1 B2
  233. MOV C41, C11
  234. MOV C42, C11
  235. gsLQC1(R12, F1, F0, 0) # A1 A2
  236. MOV C13, C11
  237. MOV C14, C11
  238. gsLQC1(R12, F3, F2, 1) # A3 A4
  239. MOV C23, C11
  240. FETCH $0, 0 * SIZE(CO1)
  241. MOV C24, C11
  242. FETCH $0, 4 * SIZE(CO1)
  243. MOV C33, C11
  244. FETCH $0, 0 * SIZE(CO2)
  245. MOV C34, C11
  246. FETCH $0, 4 * SIZE(CO2)
  247. daddu PREB, B, PREB
  248. MOV C43, C11
  249. FETCH $0, 0 * SIZE(CO3)
  250. MOV C44, C11
  251. FETCH $0, 4 * SIZE(CO3)
  252. PLU B3, B1, B1
  253. FETCH $0, 0 * SIZE(CO4)
  254. PLU B4, B2, B2
  255. blez L, .L482
  256. FETCH $0, 4 * SIZE(CO4)
  257. #endif
  258. .L4810:
  259. daddiu L, L, -1
  260. MADPS C11, C11, A1, B1
  261. MADPS C21, C21, A2, B1
  262. gsLQC1(R13, F13, F12, 1) # B3 B4
  263. MADPS C12, C12, A1, B2
  264. MADPS C22, C22, A2, B2
  265. gsLQC1(R12, F5, F4, 2) # A5 A6
  266. MADPS C31, C31, A3, B1
  267. MADPS C41, C41, A4, B1
  268. gsLQC1(R12, F7, F6, 3) # A7 A8
  269. MADPS C32, C32, A3, B2
  270. MADPS C42, C42, A4, B2
  271. FETCH $0, 0 * SIZE(PREB)
  272. MADPS C13, C13, A1, B3
  273. MADPS C23, C23, A2, B3
  274. MADPS C33, C33, A3, B3
  275. MADPS C43, C43, A4, B3
  276. MADPS C14, C14, A1, B4
  277. PLU B7, B5, B5
  278. FETCH $0, 0 * SIZE(PREA)
  279. MADPS C24, C24, A2, B4
  280. PLU B8, B6, B6
  281. FETCH $0, 4 * SIZE(PREA)
  282. MADPS C34, C34, A3, B4
  283. MADPS C44, C44, A4, B4
  284. MADPS C11, C11, A5, B5
  285. MADPS C21, C21, A6, B5
  286. gsLQC1(R13, F9, F8, 2) # B1 B2
  287. MADPS C12, C12, A5, B6
  288. MADPS C22, C22, A6, B6
  289. gsLQC1(R12, F1, F0, 4) # A1 A2
  290. MADPS C31, C31, A7, B5
  291. MADPS C41, C41, A8, B5
  292. gsLQC1(R12, F3, F2, 5) # A3 A4
  293. MADPS C32, C32, A7, B6
  294. MADPS C42, C42, A8, B6
  295. FETCH $0, 4 * SIZE(PREB)
  296. MADPS C13, C13, A5, B7
  297. MADPS C23, C23, A6, B7
  298. MADPS C33, C33, A7, B7
  299. MADPS C43, C43, A8, B7
  300. MADPS C14, C14, A5, B8
  301. PLU B3, B1, B1
  302. FETCH $0, 8 * SIZE(PREA)
  303. MADPS C24, C24, A6, B8
  304. PLU B4, B2, B2
  305. FETCH $0, 12 * SIZE(PREA)
  306. MADPS C34, C34, A7, B8
  307. MADPS C44, C44, A8, B8
  308. MADPS C11, C11, A1, B1
  309. MADPS C21, C21, A2, B1
  310. gsLQC1(R13, F13, F12, 3) # B3 B4
  311. MADPS C12, C12, A1, B2
  312. MADPS C22, C22, A2, B2
  313. gsLQC1(R12, F5, F4, 6) # A5 A6
  314. MADPS C31, C31, A3, B1
  315. MADPS C41, C41, A4, B1
  316. gsLQC1(R12, F7, F6, 7) # A7 A8
  317. MADPS C32, C32, A3, B2
  318. MADPS C42, C42, A4, B2
  319. FETCH $0, 8 * SIZE(PREB)
  320. MADPS C13, C13, A1, B3
  321. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  322. MADPS C23, C23, A2, B3
  323. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  324. MADPS C33, C33, A3, B3
  325. MADPS C43, C43, A4, B3
  326. MADPS C14, C14, A1, B4
  327. PLU B7, B5, B5
  328. FETCH $0, 16 * SIZE(PREA)
  329. MADPS C24, C24, A2, B4
  330. PLU B8, B6, B6
  331. FETCH $0, 20 * SIZE(PREA)
  332. MADPS C34, C34, A3, B4
  333. MADPS C44, C44, A4, B4
  334. MADPS C11, C11, A5, B5
  335. MADPS C21, C21, A6, B5
  336. gsLQC1(R13, F9, F8, 0) # B1 B2
  337. MADPS C12, C12, A5, B6
  338. MADPS C22, C22, A6, B6
  339. gsLQC1(R12, F1, F0, 0) # A1 A2
  340. MADPS C31, C31, A7, B5
  341. MADPS C41, C41, A8, B5
  342. gsLQC1(R12, F3, F2, 1) # A3 A4
  343. MADPS C32, C32, A7, B6
  344. MADPS C42, C42, A8, B6
  345. FETCH $0, 12 * SIZE(PREB)
  346. MADPS C13, C13, A5, B7
  347. MADPS C23, C23, A6, B7
  348. daddiu PREB, PREB, 16 * SIZE
  349. MADPS C33, C33, A7, B7
  350. MADPS C43, C43, A8, B7
  351. MADPS C14, C14, A5, B8
  352. PLU B3, B1, B1
  353. FETCH $0, 24 * SIZE(PREA)
  354. MADPS C24, C24, A6, B8
  355. PLU B4, B2, B2
  356. FETCH $0, 28 * SIZE(PREA)
  357. daddiu PREA, PREA, 32 * SIZE
  358. MADPS C34, C34, A7, B8
  359. MADPS C44, C44, A8, B8
  360. MADPS C11, C11, A1, B1
  361. MADPS C21, C21, A2, B1
  362. gsLQC1(R13, F13, F12, 1) # B3 B4
  363. MADPS C12, C12, A1, B2
  364. MADPS C22, C22, A2, B2
  365. gsLQC1(R12, F5, F4, 2) # A5 A6
  366. MADPS C31, C31, A3, B1
  367. MADPS C41, C41, A4, B1
  368. gsLQC1(R12, F7, F6, 3) # A7 A8
  369. MADPS C32, C32, A3, B2
  370. MADPS C42, C42, A4, B2
  371. FETCH $0, 0 * SIZE(PREB)
  372. MADPS C13, C13, A1, B3
  373. MADPS C23, C23, A2, B3
  374. MADPS C33, C33, A3, B3
  375. MADPS C43, C43, A4, B3
  376. MADPS C14, C14, A1, B4
  377. PLU B7, B5, B5
  378. FETCH $0, 0 * SIZE(PREA)
  379. MADPS C24, C24, A2, B4
  380. PLU B8, B6, B6
  381. FETCH $0, 4 * SIZE(PREA)
  382. MADPS C34, C34, A3, B4
  383. MADPS C44, C44, A4, B4
  384. MADPS C11, C11, A5, B5
  385. MADPS C21, C21, A6, B5
  386. gsLQC1(R13, F9, F8, 2) # B1 B2
  387. MADPS C12, C12, A5, B6
  388. MADPS C22, C22, A6, B6
  389. gsLQC1(R12, F1, F0, 4) # A1 A2
  390. MADPS C31, C31, A7, B5
  391. MADPS C41, C41, A8, B5
  392. gsLQC1(R12, F3, F2, 5) # A3 A4
  393. MADPS C32, C32, A7, B6
  394. MADPS C42, C42, A8, B6
  395. FETCH $0, 4 * SIZE(PREB)
  396. MADPS C13, C13, A5, B7
  397. MADPS C23, C23, A6, B7
  398. MADPS C33, C33, A7, B7
  399. MADPS C43, C43, A8, B7
  400. MADPS C14, C14, A5, B8
  401. PLU B3, B1, B1
  402. FETCH $0, 8 * SIZE(PREA)
  403. MADPS C24, C24, A6, B8
  404. PLU B4, B2, B2
  405. FETCH $0, 12 * SIZE(PREA)
  406. MADPS C34, C34, A7, B8
  407. MADPS C44, C44, A8, B8
  408. MADPS C11, C11, A1, B1
  409. MADPS C21, C21, A2, B1
  410. gsLQC1(R13, F13, F12, 3) # B3 B4
  411. MADPS C12, C12, A1, B2
  412. MADPS C22, C22, A2, B2
  413. gsLQC1(R12, F5, F4, 6) # A5 A6
  414. MADPS C31, C31, A3, B1
  415. MADPS C41, C41, A4, B1
  416. gsLQC1(R12, F7, F6, 7) # A7 A8
  417. MADPS C32, C32, A3, B2
  418. MADPS C42, C42, A4, B2
  419. FETCH $0, 8 * SIZE(PREB)
  420. MADPS C13, C13, A1, B3
  421. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  422. MADPS C23, C23, A2, B3
  423. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  424. MADPS C33, C33, A3, B3
  425. MADPS C43, C43, A4, B3
  426. MADPS C14, C14, A1, B4
  427. PLU B7, B5, B5
  428. FETCH $0, 16 * SIZE(PREA)
  429. MADPS C24, C24, A2, B4
  430. PLU B8, B6, B6
  431. FETCH $0, 20 * SIZE(PREA)
  432. MADPS C34, C34, A3, B4
  433. MADPS C44, C44, A4, B4
  434. MADPS C11, C11, A5, B5
  435. MADPS C21, C21, A6, B5
  436. gsLQC1(R13, F9, F8, 0) # B1 B2
  437. MADPS C12, C12, A5, B6
  438. MADPS C22, C22, A6, B6
  439. gsLQC1(R12, F1, F0, 0) # A1 A2
  440. MADPS C31, C31, A7, B5
  441. MADPS C41, C41, A8, B5
  442. gsLQC1(R12, F3, F2, 1) # A3 A4
  443. MADPS C32, C32, A7, B6
  444. MADPS C42, C42, A8, B6
  445. FETCH $0, 12 * SIZE(PREB)
  446. MADPS C13, C13, A5, B7
  447. MADPS C23, C23, A6, B7
  448. daddiu PREB, PREB, 16 * SIZE
  449. MADPS C33, C33, A7, B7
  450. MADPS C43, C43, A8, B7
  451. MADPS C14, C14, A5, B8
  452. PLU B3, B1, B1
  453. FETCH $0, 24 * SIZE(PREA)
  454. MADPS C24, C24, A6, B8
  455. PLU B4, B2, B2
  456. FETCH $0, 28 * SIZE(PREA)
  457. daddiu PREA, PREA, 32 * SIZE
  458. MADPS C34, C34, A7, B8
  459. MADPS C44, C44, A8, B8
  460. MADPS C11, C11, A1, B1
  461. MADPS C21, C21, A2, B1
  462. gsLQC1(R13, F13, F12, 1) # B3 B4
  463. MADPS C12, C12, A1, B2
  464. MADPS C22, C22, A2, B2
  465. gsLQC1(R12, F5, F4, 2) # A5 A6
  466. MADPS C31, C31, A3, B1
  467. MADPS C41, C41, A4, B1
  468. gsLQC1(R12, F7, F6, 3) # A7 A8
  469. MADPS C32, C32, A3, B2
  470. MADPS C42, C42, A4, B2
  471. FETCH $0, 0 * SIZE(PREB)
  472. MADPS C13, C13, A1, B3
  473. MADPS C23, C23, A2, B3
  474. MADPS C33, C33, A3, B3
  475. MADPS C43, C43, A4, B3
  476. MADPS C14, C14, A1, B4
  477. PLU B7, B5, B5
  478. FETCH $0, 0 * SIZE(PREA)
  479. MADPS C24, C24, A2, B4
  480. PLU B8, B6, B6
  481. FETCH $0, 4 * SIZE(PREA)
  482. MADPS C34, C34, A3, B4
  483. MADPS C44, C44, A4, B4
  484. MADPS C11, C11, A5, B5
  485. MADPS C21, C21, A6, B5
  486. gsLQC1(R13, F9, F8, 2) # B1 B2
  487. MADPS C12, C12, A5, B6
  488. MADPS C22, C22, A6, B6
  489. gsLQC1(R12, F1, F0, 4) # A1 A2
  490. MADPS C31, C31, A7, B5
  491. MADPS C41, C41, A8, B5
  492. gsLQC1(R12, F3, F2, 5) # A3 A4
  493. MADPS C32, C32, A7, B6
  494. MADPS C42, C42, A8, B6
  495. FETCH $0, 4 * SIZE(PREB)
  496. MADPS C13, C13, A5, B7
  497. MADPS C23, C23, A6, B7
  498. MADPS C33, C33, A7, B7
  499. MADPS C43, C43, A8, B7
  500. MADPS C14, C14, A5, B8
  501. PLU B3, B1, B1
  502. FETCH $0, 8 * SIZE(PREA)
  503. MADPS C24, C24, A6, B8
  504. PLU B4, B2, B2
  505. FETCH $0, 12 * SIZE(PREA)
  506. MADPS C34, C34, A7, B8
  507. MADPS C44, C44, A8, B8
  508. MADPS C11, C11, A1, B1
  509. MADPS C21, C21, A2, B1
  510. gsLQC1(R13, F13, F12, 3) # B3 B4
  511. MADPS C12, C12, A1, B2
  512. MADPS C22, C22, A2, B2
  513. gsLQC1(R12, F5, F4, 6) # A5 A6
  514. MADPS C31, C31, A3, B1
  515. MADPS C41, C41, A4, B1
  516. gsLQC1(R12, F7, F6, 7) # A7 A8
  517. MADPS C32, C32, A3, B2
  518. MADPS C42, C42, A4, B2
  519. FETCH $0, 8 * SIZE(PREB)
  520. MADPS C13, C13, A1, B3
  521. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  522. MADPS C23, C23, A2, B3
  523. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  524. MADPS C33, C33, A3, B3
  525. MADPS C43, C43, A4, B3
  526. MADPS C14, C14, A1, B4
  527. PLU B7, B5, B5
  528. FETCH $0, 16 * SIZE(PREA)
  529. MADPS C24, C24, A2, B4
  530. PLU B8, B6, B6
  531. FETCH $0, 20 * SIZE(PREA)
  532. MADPS C34, C34, A3, B4
  533. MADPS C44, C44, A4, B4
  534. MADPS C11, C11, A5, B5
  535. MADPS C21, C21, A6, B5
  536. gsLQC1(R13, F9, F8, 0) # B1 B2
  537. MADPS C12, C12, A5, B6
  538. MADPS C22, C22, A6, B6
  539. gsLQC1(R12, F1, F0, 0) # A1 A2
  540. MADPS C31, C31, A7, B5
  541. MADPS C41, C41, A8, B5
  542. gsLQC1(R12, F3, F2, 1) # A3 A4
  543. MADPS C32, C32, A7, B6
  544. MADPS C42, C42, A8, B6
  545. FETCH $0, 12 * SIZE(PREB)
  546. MADPS C13, C13, A5, B7
  547. MADPS C23, C23, A6, B7
  548. daddiu PREB, PREB, 16 * SIZE
  549. MADPS C33, C33, A7, B7
  550. MADPS C43, C43, A8, B7
  551. MADPS C14, C14, A5, B8
  552. PLU B3, B1, B1
  553. FETCH $0, 24 * SIZE(PREA)
  554. MADPS C24, C24, A6, B8
  555. PLU B4, B2, B2
  556. FETCH $0, 28 * SIZE(PREA)
  557. daddiu PREA, PREA, 32 * SIZE
  558. MADPS C34, C34, A7, B8
  559. MADPS C44, C44, A8, B8
  560. MADPS C11, C11, A1, B1
  561. MADPS C21, C21, A2, B1
  562. gsLQC1(R13, F13, F12, 1) # B3 B4
  563. MADPS C12, C12, A1, B2
  564. MADPS C22, C22, A2, B2
  565. gsLQC1(R12, F5, F4, 2) # A5 A6
  566. MADPS C31, C31, A3, B1
  567. MADPS C41, C41, A4, B1
  568. gsLQC1(R12, F7, F6, 3) # A7 A8
  569. MADPS C32, C32, A3, B2
  570. MADPS C42, C42, A4, B2
  571. FETCH $0, 0 * SIZE(PREB)
  572. MADPS C13, C13, A1, B3
  573. MADPS C23, C23, A2, B3
  574. MADPS C33, C33, A3, B3
  575. MADPS C43, C43, A4, B3
  576. MADPS C14, C14, A1, B4
  577. PLU B7, B5, B5
  578. FETCH $0, 0 * SIZE(PREA)
  579. MADPS C24, C24, A2, B4
  580. PLU B8, B6, B6
  581. FETCH $0, 4 * SIZE(PREA)
  582. MADPS C34, C34, A3, B4
  583. MADPS C44, C44, A4, B4
  584. MADPS C11, C11, A5, B5
  585. MADPS C21, C21, A6, B5
  586. gsLQC1(R13, F9, F8, 2) # B1 B2
  587. MADPS C12, C12, A5, B6
  588. MADPS C22, C22, A6, B6
  589. gsLQC1(R12, F1, F0, 4) # A1 A2
  590. MADPS C31, C31, A7, B5
  591. MADPS C41, C41, A8, B5
  592. gsLQC1(R12, F3, F2, 5) # A3 A4
  593. MADPS C32, C32, A7, B6
  594. MADPS C42, C42, A8, B6
  595. FETCH $0, 4 * SIZE(PREB)
  596. MADPS C13, C13, A5, B7
  597. MADPS C23, C23, A6, B7
  598. MADPS C33, C33, A7, B7
  599. MADPS C43, C43, A8, B7
  600. MADPS C14, C14, A5, B8
  601. PLU B3, B1, B1
  602. FETCH $0, 8 * SIZE(PREA)
  603. MADPS C24, C24, A6, B8
  604. PLU B4, B2, B2
  605. FETCH $0, 12 * SIZE(PREA)
  606. MADPS C34, C34, A7, B8
  607. MADPS C44, C44, A8, B8
  608. MADPS C11, C11, A1, B1
  609. MADPS C21, C21, A2, B1
  610. gsLQC1(R13, F13, F12, 3) # B3 B4
  611. MADPS C12, C12, A1, B2
  612. MADPS C22, C22, A2, B2
  613. gsLQC1(R12, F5, F4, 6) # A5 A6
  614. MADPS C31, C31, A3, B1
  615. MADPS C41, C41, A4, B1
  616. gsLQC1(R12, F7, F6, 7) # A7 A8
  617. MADPS C32, C32, A3, B2
  618. MADPS C42, C42, A4, B2
  619. FETCH $0, 8 * SIZE(PREB)
  620. MADPS C13, C13, A1, B3
  621. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  622. MADPS C23, C23, A2, B3
  623. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  624. MADPS C33, C33, A3, B3
  625. MADPS C43, C43, A4, B3
  626. MADPS C14, C14, A1, B4
  627. PLU B7, B5, B5
  628. FETCH $0, 16 * SIZE(PREA)
  629. MADPS C24, C24, A2, B4
  630. PLU B8, B6, B6
  631. FETCH $0, 20 * SIZE(PREA)
  632. MADPS C34, C34, A3, B4
  633. MADPS C44, C44, A4, B4
  634. MADPS C11, C11, A5, B5
  635. MADPS C21, C21, A6, B5
  636. gsLQC1(R13, F9, F8, 0) # B1 B2
  637. MADPS C12, C12, A5, B6
  638. MADPS C22, C22, A6, B6
  639. gsLQC1(R12, F1, F0, 0) # A1 A2
  640. MADPS C31, C31, A7, B5
  641. MADPS C41, C41, A8, B5
  642. gsLQC1(R12, F3, F2, 1) # A3 A4
  643. MADPS C32, C32, A7, B6
  644. MADPS C42, C42, A8, B6
  645. FETCH $0, 12 * SIZE(PREB)
  646. MADPS C13, C13, A5, B7
  647. MADPS C23, C23, A6, B7
  648. daddiu PREB, PREB, 16 * SIZE
  649. MADPS C33, C33, A7, B7
  650. MADPS C43, C43, A8, B7
  651. MADPS C14, C14, A5, B8
  652. PLU B3, B1, B1
  653. FETCH $0, 24 * SIZE(PREA)
  654. MADPS C24, C24, A6, B8
  655. PLU B4, B2, B2
  656. FETCH $0, 28 * SIZE(PREA)
  657. daddiu PREA, PREA, 32 * SIZE
  658. MADPS C34, C34, A7, B8
  659. MADPS C44, C44, A8, B8
  660. MADPS C11, C11, A1, B1
  661. MADPS C21, C21, A2, B1
  662. gsLQC1(R13, F13, F12, 1) # B3 B4
  663. MADPS C12, C12, A1, B2
  664. MADPS C22, C22, A2, B2
  665. gsLQC1(R12, F5, F4, 2) # A5 A6
  666. MADPS C31, C31, A3, B1
  667. MADPS C41, C41, A4, B1
  668. gsLQC1(R12, F7, F6, 3) # A7 A8
  669. MADPS C32, C32, A3, B2
  670. MADPS C42, C42, A4, B2
  671. FETCH $0, 0 * SIZE(PREB)
  672. MADPS C13, C13, A1, B3
  673. MADPS C23, C23, A2, B3
  674. MADPS C33, C33, A3, B3
  675. MADPS C43, C43, A4, B3
  676. MADPS C14, C14, A1, B4
  677. PLU B7, B5, B5
  678. FETCH $0, 0 * SIZE(PREA)
  679. MADPS C24, C24, A2, B4
  680. PLU B8, B6, B6
  681. FETCH $0, 4 * SIZE(PREA)
  682. MADPS C34, C34, A3, B4
  683. MADPS C44, C44, A4, B4
  684. MADPS C11, C11, A5, B5
  685. MADPS C21, C21, A6, B5
  686. gsLQC1(R13, F9, F8, 2) # B1 B2
  687. MADPS C12, C12, A5, B6
  688. MADPS C22, C22, A6, B6
  689. gsLQC1(R12, F1, F0, 4) # A1 A2
  690. MADPS C31, C31, A7, B5
  691. MADPS C41, C41, A8, B5
  692. gsLQC1(R12, F3, F2, 5) # A3 A4
  693. MADPS C32, C32, A7, B6
  694. MADPS C42, C42, A8, B6
  695. FETCH $0, 4 * SIZE(PREB)
  696. MADPS C13, C13, A5, B7
  697. MADPS C23, C23, A6, B7
  698. MADPS C33, C33, A7, B7
  699. MADPS C43, C43, A8, B7
  700. MADPS C14, C14, A5, B8
  701. PLU B3, B1, B1
  702. FETCH $0, 8 * SIZE(PREA)
  703. MADPS C24, C24, A6, B8
  704. PLU B4, B2, B2
  705. FETCH $0, 12 * SIZE(PREA)
  706. MADPS C34, C34, A7, B8
  707. MADPS C44, C44, A8, B8
  708. MADPS C11, C11, A1, B1
  709. MADPS C21, C21, A2, B1
  710. gsLQC1(R13, F13, F12, 3) # B3 B4
  711. MADPS C12, C12, A1, B2
  712. MADPS C22, C22, A2, B2
  713. gsLQC1(R12, F5, F4, 6) # A5 A6
  714. MADPS C31, C31, A3, B1
  715. MADPS C41, C41, A4, B1
  716. gsLQC1(R12, F7, F6, 7) # A7 A8
  717. MADPS C32, C32, A3, B2
  718. MADPS C42, C42, A4, B2
  719. FETCH $0, 8 * SIZE(PREB)
  720. MADPS C13, C13, A1, B3
  721. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  722. MADPS C23, C23, A2, B3
  723. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  724. MADPS C33, C33, A3, B3
  725. MADPS C43, C43, A4, B3
  726. MADPS C14, C14, A1, B4
  727. PLU B7, B5, B5
  728. FETCH $0, 16 * SIZE(PREA)
  729. MADPS C24, C24, A2, B4
  730. PLU B8, B6, B6
  731. FETCH $0, 20 * SIZE(PREA)
  732. MADPS C34, C34, A3, B4
  733. MADPS C44, C44, A4, B4
  734. MADPS C11, C11, A5, B5
  735. MADPS C21, C21, A6, B5
  736. gsLQC1(R13, F9, F8, 0) # B1 B2
  737. MADPS C12, C12, A5, B6
  738. MADPS C22, C22, A6, B6
  739. gsLQC1(R12, F1, F0, 0) # A1 A2
  740. MADPS C31, C31, A7, B5
  741. MADPS C41, C41, A8, B5
  742. gsLQC1(R12, F3, F2, 1) # A3 A4
  743. MADPS C32, C32, A7, B6
  744. MADPS C42, C42, A8, B6
  745. FETCH $0, 12 * SIZE(PREB)
  746. MADPS C13, C13, A5, B7
  747. MADPS C23, C23, A6, B7
  748. daddiu PREB, PREB, 16 * SIZE
  749. MADPS C33, C33, A7, B7
  750. MADPS C43, C43, A8, B7
  751. MADPS C14, C14, A5, B8
  752. PLU B3, B1, B1
  753. FETCH $0, 24 * SIZE(PREA)
  754. MADPS C24, C24, A6, B8
  755. PLU B4, B2, B2
  756. FETCH $0, 28 * SIZE(PREA)
  757. daddiu PREA, PREA, 32 * SIZE
  758. MADPS C34, C34, A7, B8
  759. MADPS C44, C44, A8, B8
  760. MADPS C11, C11, A1, B1
  761. MADPS C21, C21, A2, B1
  762. gsLQC1(R13, F13, F12, 1) # B3 B4
  763. MADPS C12, C12, A1, B2
  764. MADPS C22, C22, A2, B2
  765. gsLQC1(R12, F5, F4, 2) # A5 A6
  766. MADPS C31, C31, A3, B1
  767. MADPS C41, C41, A4, B1
  768. gsLQC1(R12, F7, F6, 3) # A7 A8
  769. MADPS C32, C32, A3, B2
  770. MADPS C42, C42, A4, B2
  771. FETCH $0, 0 * SIZE(PREB)
  772. MADPS C13, C13, A1, B3
  773. MADPS C23, C23, A2, B3
  774. MADPS C33, C33, A3, B3
  775. MADPS C43, C43, A4, B3
  776. MADPS C14, C14, A1, B4
  777. PLU B7, B5, B5
  778. FETCH $0, 0 * SIZE(PREA)
  779. MADPS C24, C24, A2, B4
  780. PLU B8, B6, B6
  781. FETCH $0, 4 * SIZE(PREA)
  782. MADPS C34, C34, A3, B4
  783. MADPS C44, C44, A4, B4
  784. MADPS C11, C11, A5, B5
  785. MADPS C21, C21, A6, B5
  786. gsLQC1(R13, F9, F8, 2) # B1 B2
  787. MADPS C12, C12, A5, B6
  788. MADPS C22, C22, A6, B6
  789. gsLQC1(R12, F1, F0, 4) # A1 A2
  790. MADPS C31, C31, A7, B5
  791. MADPS C41, C41, A8, B5
  792. gsLQC1(R12, F3, F2, 5) # A3 A4
  793. MADPS C32, C32, A7, B6
  794. MADPS C42, C42, A8, B6
  795. FETCH $0, 4 * SIZE(PREB)
  796. MADPS C13, C13, A5, B7
  797. MADPS C23, C23, A6, B7
  798. MADPS C33, C33, A7, B7
  799. MADPS C43, C43, A8, B7
  800. MADPS C14, C14, A5, B8
  801. PLU B3, B1, B1
  802. FETCH $0, 8 * SIZE(PREA)
  803. MADPS C24, C24, A6, B8
  804. PLU B4, B2, B2
  805. FETCH $0, 12 * SIZE(PREA)
  806. MADPS C34, C34, A7, B8
  807. MADPS C44, C44, A8, B8
  808. MADPS C11, C11, A1, B1
  809. MADPS C21, C21, A2, B1
  810. gsLQC1(R13, F13, F12, 3) # B3 B4
  811. MADPS C12, C12, A1, B2
  812. MADPS C22, C22, A2, B2
  813. gsLQC1(R12, F5, F4, 6) # A5 A6
  814. MADPS C31, C31, A3, B1
  815. MADPS C41, C41, A4, B1
  816. gsLQC1(R12, F7, F6, 7) # A7 A8
  817. MADPS C32, C32, A3, B2
  818. MADPS C42, C42, A4, B2
  819. FETCH $0, 8 * SIZE(PREB)
  820. MADPS C13, C13, A1, B3
  821. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  822. MADPS C23, C23, A2, B3
  823. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  824. MADPS C33, C33, A3, B3
  825. MADPS C43, C43, A4, B3
  826. MADPS C14, C14, A1, B4
  827. PLU B7, B5, B5
  828. FETCH $0, 16 * SIZE(PREA)
  829. MADPS C24, C24, A2, B4
  830. PLU B8, B6, B6
  831. FETCH $0, 20 * SIZE(PREA)
  832. MADPS C34, C34, A3, B4
  833. MADPS C44, C44, A4, B4
  834. MADPS C11, C11, A5, B5
  835. MADPS C21, C21, A6, B5
  836. gsLQC1(R13, F9, F8, 0) # B1 B2
  837. MADPS C12, C12, A5, B6
  838. MADPS C22, C22, A6, B6
  839. gsLQC1(R12, F1, F0, 0) # A1 A2
  840. MADPS C31, C31, A7, B5
  841. MADPS C41, C41, A8, B5
  842. gsLQC1(R12, F3, F2, 1) # A3 A4
  843. MADPS C32, C32, A7, B6
  844. MADPS C42, C42, A8, B6
  845. FETCH $0, 12 * SIZE(PREB)
  846. MADPS C13, C13, A5, B7
  847. MADPS C23, C23, A6, B7
  848. daddiu PREB, PREB, 16 * SIZE
  849. MADPS C33, C33, A7, B7
  850. MADPS C43, C43, A8, B7
  851. MADPS C14, C14, A5, B8
  852. PLU B3, B1, B1
  853. FETCH $0, 24 * SIZE(PREA)
  854. MADPS C24, C24, A6, B8
  855. PLU B4, B2, B2
  856. FETCH $0, 28 * SIZE(PREA)
  857. daddiu PREA, PREA, 32 * SIZE
  858. MADPS C34, C34, A7, B8
  859. MADPS C44, C44, A8, B8
  860. MADPS C11, C11, A1, B1
  861. MADPS C21, C21, A2, B1
  862. gsLQC1(R13, F13, F12, 1) # B3 B4
  863. MADPS C12, C12, A1, B2
  864. MADPS C22, C22, A2, B2
  865. gsLQC1(R12, F5, F4, 2) # A5 A6
  866. MADPS C31, C31, A3, B1
  867. MADPS C41, C41, A4, B1
  868. gsLQC1(R12, F7, F6, 3) # A7 A8
  869. MADPS C32, C32, A3, B2
  870. MADPS C42, C42, A4, B2
  871. FETCH $0, 0 * SIZE(PREB)
  872. MADPS C13, C13, A1, B3
  873. MADPS C23, C23, A2, B3
  874. MADPS C33, C33, A3, B3
  875. MADPS C43, C43, A4, B3
  876. MADPS C14, C14, A1, B4
  877. PLU B7, B5, B5
  878. FETCH $0, 0 * SIZE(PREA)
  879. MADPS C24, C24, A2, B4
  880. PLU B8, B6, B6
  881. FETCH $0, 4 * SIZE(PREA)
  882. MADPS C34, C34, A3, B4
  883. MADPS C44, C44, A4, B4
  884. MADPS C11, C11, A5, B5
  885. MADPS C21, C21, A6, B5
  886. gsLQC1(R13, F9, F8, 2) # B1 B2
  887. MADPS C12, C12, A5, B6
  888. MADPS C22, C22, A6, B6
  889. gsLQC1(R12, F1, F0, 4) # A1 A2
  890. MADPS C31, C31, A7, B5
  891. MADPS C41, C41, A8, B5
  892. gsLQC1(R12, F3, F2, 5) # A3 A4
  893. MADPS C32, C32, A7, B6
  894. MADPS C42, C42, A8, B6
  895. FETCH $0, 4 * SIZE(PREB)
  896. MADPS C13, C13, A5, B7
  897. MADPS C23, C23, A6, B7
  898. MADPS C33, C33, A7, B7
  899. MADPS C43, C43, A8, B7
  900. MADPS C14, C14, A5, B8
  901. PLU B3, B1, B1
  902. FETCH $0, 8 * SIZE(PREA)
  903. MADPS C24, C24, A6, B8
  904. PLU B4, B2, B2
  905. FETCH $0, 12 * SIZE(PREA)
  906. MADPS C34, C34, A7, B8
  907. MADPS C44, C44, A8, B8
  908. MADPS C11, C11, A1, B1
  909. MADPS C21, C21, A2, B1
  910. gsLQC1(R13, F13, F12, 3) # B3 B4
  911. MADPS C12, C12, A1, B2
  912. MADPS C22, C22, A2, B2
  913. gsLQC1(R12, F5, F4, 6) # A5 A6
  914. MADPS C31, C31, A3, B1
  915. MADPS C41, C41, A4, B1
  916. gsLQC1(R12, F7, F6, 7) # A7 A8
  917. MADPS C32, C32, A3, B2
  918. MADPS C42, C42, A4, B2
  919. FETCH $0, 8 * SIZE(PREB)
  920. MADPS C13, C13, A1, B3
  921. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  922. MADPS C23, C23, A2, B3
  923. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  924. MADPS C33, C33, A3, B3
  925. MADPS C43, C43, A4, B3
  926. MADPS C14, C14, A1, B4
  927. PLU B7, B5, B5
  928. FETCH $0, 16 * SIZE(PREA)
  929. MADPS C24, C24, A2, B4
  930. PLU B8, B6, B6
  931. FETCH $0, 20 * SIZE(PREA)
  932. MADPS C34, C34, A3, B4
  933. MADPS C44, C44, A4, B4
  934. MADPS C11, C11, A5, B5
  935. MADPS C21, C21, A6, B5
  936. gsLQC1(R13, F9, F8, 0) # B1 B2
  937. MADPS C12, C12, A5, B6
  938. MADPS C22, C22, A6, B6
  939. gsLQC1(R12, F1, F0, 0) # A1 A2
  940. MADPS C31, C31, A7, B5
  941. MADPS C41, C41, A8, B5
  942. gsLQC1(R12, F3, F2, 1) # A3 A4
  943. MADPS C32, C32, A7, B6
  944. MADPS C42, C42, A8, B6
  945. FETCH $0, 12 * SIZE(PREB)
  946. MADPS C13, C13, A5, B7
  947. MADPS C23, C23, A6, B7
  948. daddiu PREB, PREB, 16 * SIZE
  949. MADPS C33, C33, A7, B7
  950. MADPS C43, C43, A8, B7
  951. MADPS C14, C14, A5, B8
  952. PLU B3, B1, B1
  953. FETCH $0, 24 * SIZE(PREA)
  954. MADPS C24, C24, A6, B8
  955. PLU B4, B2, B2
  956. FETCH $0, 28 * SIZE(PREA)
  957. daddiu PREA, PREA, 32 * SIZE
  958. MADPS C34, C34, A7, B8
  959. MADPS C44, C44, A8, B8
  960. MADPS C11, C11, A1, B1
  961. MADPS C21, C21, A2, B1
  962. gsLQC1(R13, F13, F12, 1) # B3 B4
  963. MADPS C12, C12, A1, B2
  964. MADPS C22, C22, A2, B2
  965. gsLQC1(R12, F5, F4, 2) # A5 A6
  966. MADPS C31, C31, A3, B1
  967. MADPS C41, C41, A4, B1
  968. gsLQC1(R12, F7, F6, 3) # A7 A8
  969. MADPS C32, C32, A3, B2
  970. MADPS C42, C42, A4, B2
  971. FETCH $0, 0 * SIZE(PREB)
  972. MADPS C13, C13, A1, B3
  973. MADPS C23, C23, A2, B3
  974. MADPS C33, C33, A3, B3
  975. MADPS C43, C43, A4, B3
  976. MADPS C14, C14, A1, B4
  977. PLU B7, B5, B5
  978. FETCH $0, 0 * SIZE(PREA)
  979. MADPS C24, C24, A2, B4
  980. PLU B8, B6, B6
  981. FETCH $0, 4 * SIZE(PREA)
  982. MADPS C34, C34, A3, B4
  983. MADPS C44, C44, A4, B4
  984. MADPS C11, C11, A5, B5
  985. MADPS C21, C21, A6, B5
  986. gsLQC1(R13, F9, F8, 2) # B1 B2
  987. MADPS C12, C12, A5, B6
  988. MADPS C22, C22, A6, B6
  989. gsLQC1(R12, F1, F0, 4) # A1 A2
  990. MADPS C31, C31, A7, B5
  991. MADPS C41, C41, A8, B5
  992. gsLQC1(R12, F3, F2, 5) # A3 A4
  993. MADPS C32, C32, A7, B6
  994. MADPS C42, C42, A8, B6
  995. FETCH $0, 4 * SIZE(PREB)
  996. MADPS C13, C13, A5, B7
  997. MADPS C23, C23, A6, B7
  998. MADPS C33, C33, A7, B7
  999. MADPS C43, C43, A8, B7
  1000. MADPS C14, C14, A5, B8
  1001. PLU B3, B1, B1
  1002. FETCH $0, 8 * SIZE(PREA)
  1003. MADPS C24, C24, A6, B8
  1004. PLU B4, B2, B2
  1005. FETCH $0, 12 * SIZE(PREA)
  1006. MADPS C34, C34, A7, B8
  1007. MADPS C44, C44, A8, B8
  1008. MADPS C11, C11, A1, B1
  1009. MADPS C21, C21, A2, B1
  1010. gsLQC1(R13, F13, F12, 3) # B3 B4
  1011. MADPS C12, C12, A1, B2
  1012. MADPS C22, C22, A2, B2
  1013. gsLQC1(R12, F5, F4, 6) # A5 A6
  1014. MADPS C31, C31, A3, B1
  1015. MADPS C41, C41, A4, B1
  1016. gsLQC1(R12, F7, F6, 7) # A7 A8
  1017. MADPS C32, C32, A3, B2
  1018. MADPS C42, C42, A4, B2
  1019. FETCH $0, 8 * SIZE(PREB)
  1020. MADPS C13, C13, A1, B3
  1021. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  1022. MADPS C23, C23, A2, B3
  1023. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  1024. MADPS C33, C33, A3, B3
  1025. MADPS C43, C43, A4, B3
  1026. MADPS C14, C14, A1, B4
  1027. PLU B7, B5, B5
  1028. FETCH $0, 16 * SIZE(PREA)
  1029. MADPS C24, C24, A2, B4
  1030. PLU B8, B6, B6
  1031. FETCH $0, 20 * SIZE(PREA)
  1032. MADPS C34, C34, A3, B4
  1033. MADPS C44, C44, A4, B4
  1034. MADPS C11, C11, A5, B5
  1035. MADPS C21, C21, A6, B5
  1036. gsLQC1(R13, F9, F8, 0) # B1 B2
  1037. MADPS C12, C12, A5, B6
  1038. MADPS C22, C22, A6, B6
  1039. gsLQC1(R12, F1, F0, 0) # A1 A2
  1040. MADPS C31, C31, A7, B5
  1041. MADPS C41, C41, A8, B5
  1042. gsLQC1(R12, F3, F2, 1) # A3 A4
  1043. MADPS C32, C32, A7, B6
  1044. MADPS C42, C42, A8, B6
  1045. FETCH $0, 12 * SIZE(PREB)
  1046. MADPS C13, C13, A5, B7
  1047. MADPS C23, C23, A6, B7
  1048. daddiu PREB, PREB, 16 * SIZE
  1049. MADPS C33, C33, A7, B7
  1050. MADPS C43, C43, A8, B7
  1051. MADPS C14, C14, A5, B8
  1052. PLU B3, B1, B1
  1053. FETCH $0, 24 * SIZE(PREA)
  1054. MADPS C24, C24, A6, B8
  1055. PLU B4, B2, B2
  1056. FETCH $0, 28 * SIZE(PREA)
  1057. daddiu PREA, PREA, 32 * SIZE
  1058. MADPS C34, C34, A7, B8
  1059. MADPS C44, C44, A8, B8
  1060. MADPS C11, C11, A1, B1
  1061. MADPS C21, C21, A2, B1
  1062. gsLQC1(R13, F13, F12, 1) # B3 B4
  1063. MADPS C12, C12, A1, B2
  1064. MADPS C22, C22, A2, B2
  1065. gsLQC1(R12, F5, F4, 2) # A5 A6
  1066. MADPS C31, C31, A3, B1
  1067. MADPS C41, C41, A4, B1
  1068. gsLQC1(R12, F7, F6, 3) # A7 A8
  1069. MADPS C32, C32, A3, B2
  1070. MADPS C42, C42, A4, B2
  1071. FETCH $0, 0 * SIZE(PREB)
  1072. MADPS C13, C13, A1, B3
  1073. MADPS C23, C23, A2, B3
  1074. MADPS C33, C33, A3, B3
  1075. MADPS C43, C43, A4, B3
  1076. MADPS C14, C14, A1, B4
  1077. PLU B7, B5, B5
  1078. FETCH $0, 0 * SIZE(PREA)
  1079. MADPS C24, C24, A2, B4
  1080. PLU B8, B6, B6
  1081. FETCH $0, 4 * SIZE(PREA)
  1082. MADPS C34, C34, A3, B4
  1083. MADPS C44, C44, A4, B4
  1084. MADPS C11, C11, A5, B5
  1085. MADPS C21, C21, A6, B5
  1086. gsLQC1(R13, F9, F8, 2) # B1 B2
  1087. MADPS C12, C12, A5, B6
  1088. MADPS C22, C22, A6, B6
  1089. gsLQC1(R12, F1, F0, 4) # A1 A2
  1090. MADPS C31, C31, A7, B5
  1091. MADPS C41, C41, A8, B5
  1092. gsLQC1(R12, F3, F2, 5) # A3 A4
  1093. MADPS C32, C32, A7, B6
  1094. MADPS C42, C42, A8, B6
  1095. FETCH $0, 4 * SIZE(PREB)
  1096. MADPS C13, C13, A5, B7
  1097. MADPS C23, C23, A6, B7
  1098. MADPS C33, C33, A7, B7
  1099. MADPS C43, C43, A8, B7
  1100. MADPS C14, C14, A5, B8
  1101. PLU B3, B1, B1
  1102. FETCH $0, 8 * SIZE(PREA)
  1103. MADPS C24, C24, A6, B8
  1104. PLU B4, B2, B2
  1105. FETCH $0, 12 * SIZE(PREA)
  1106. MADPS C34, C34, A7, B8
  1107. MADPS C44, C44, A8, B8
  1108. MADPS C11, C11, A1, B1
  1109. MADPS C21, C21, A2, B1
  1110. gsLQC1(R13, F13, F12, 3) # B3 B4
  1111. MADPS C12, C12, A1, B2
  1112. MADPS C22, C22, A2, B2
  1113. gsLQC1(R12, F5, F4, 6) # A5 A6
  1114. MADPS C31, C31, A3, B1
  1115. MADPS C41, C41, A4, B1
  1116. gsLQC1(R12, F7, F6, 7) # A7 A8
  1117. MADPS C32, C32, A3, B2
  1118. MADPS C42, C42, A4, B2
  1119. FETCH $0, 8 * SIZE(PREB)
  1120. MADPS C13, C13, A1, B3
  1121. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  1122. MADPS C23, C23, A2, B3
  1123. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  1124. MADPS C33, C33, A3, B3
  1125. MADPS C43, C43, A4, B3
  1126. MADPS C14, C14, A1, B4
  1127. PLU B7, B5, B5
  1128. FETCH $0, 16 * SIZE(PREA)
  1129. MADPS C24, C24, A2, B4
  1130. PLU B8, B6, B6
  1131. FETCH $0, 20 * SIZE(PREA)
  1132. MADPS C34, C34, A3, B4
  1133. MADPS C44, C44, A4, B4
  1134. MADPS C11, C11, A5, B5
  1135. MADPS C21, C21, A6, B5
  1136. gsLQC1(R13, F9, F8, 0) # B1 B2
  1137. MADPS C12, C12, A5, B6
  1138. MADPS C22, C22, A6, B6
  1139. gsLQC1(R12, F1, F0, 0) # A1 A2
  1140. MADPS C31, C31, A7, B5
  1141. MADPS C41, C41, A8, B5
  1142. gsLQC1(R12, F3, F2, 1) # A3 A4
  1143. MADPS C32, C32, A7, B6
  1144. MADPS C42, C42, A8, B6
  1145. FETCH $0, 12 * SIZE(PREB)
  1146. MADPS C13, C13, A5, B7
  1147. MADPS C23, C23, A6, B7
  1148. daddiu PREB, PREB, 16 * SIZE
  1149. MADPS C33, C33, A7, B7
  1150. MADPS C43, C43, A8, B7
  1151. MADPS C14, C14, A5, B8
  1152. PLU B3, B1, B1
  1153. FETCH $0, 24 * SIZE(PREA)
  1154. MADPS C24, C24, A6, B8
  1155. PLU B4, B2, B2
  1156. FETCH $0, 28 * SIZE(PREA)
  1157. daddiu PREA, PREA, 32 * SIZE
  1158. MADPS C34, C34, A7, B8
  1159. MADPS C44, C44, A8, B8
  1160. MADPS C11, C11, A1, B1
  1161. MADPS C21, C21, A2, B1
  1162. gsLQC1(R13, F13, F12, 1) # B3 B4
  1163. MADPS C12, C12, A1, B2
  1164. MADPS C22, C22, A2, B2
  1165. gsLQC1(R12, F5, F4, 2) # A5 A6
  1166. MADPS C31, C31, A3, B1
  1167. MADPS C41, C41, A4, B1
  1168. gsLQC1(R12, F7, F6, 3) # A7 A8
  1169. MADPS C32, C32, A3, B2
  1170. MADPS C42, C42, A4, B2
  1171. FETCH $0, 0 * SIZE(PREB)
  1172. MADPS C13, C13, A1, B3
  1173. MADPS C23, C23, A2, B3
  1174. MADPS C33, C33, A3, B3
  1175. MADPS C43, C43, A4, B3
  1176. MADPS C14, C14, A1, B4
  1177. PLU B7, B5, B5
  1178. FETCH $0, 0 * SIZE(PREA)
  1179. MADPS C24, C24, A2, B4
  1180. PLU B8, B6, B6
  1181. FETCH $0, 4 * SIZE(PREA)
  1182. MADPS C34, C34, A3, B4
  1183. MADPS C44, C44, A4, B4
  1184. MADPS C11, C11, A5, B5
  1185. MADPS C21, C21, A6, B5
  1186. gsLQC1(R13, F9, F8, 2) # B1 B2
  1187. MADPS C12, C12, A5, B6
  1188. MADPS C22, C22, A6, B6
  1189. gsLQC1(R12, F1, F0, 4) # A1 A2
  1190. MADPS C31, C31, A7, B5
  1191. MADPS C41, C41, A8, B5
  1192. gsLQC1(R12, F3, F2, 5) # A3 A4
  1193. MADPS C32, C32, A7, B6
  1194. MADPS C42, C42, A8, B6
  1195. FETCH $0, 4 * SIZE(PREB)
  1196. MADPS C13, C13, A5, B7
  1197. MADPS C23, C23, A6, B7
  1198. MADPS C33, C33, A7, B7
  1199. MADPS C43, C43, A8, B7
  1200. MADPS C14, C14, A5, B8
  1201. PLU B3, B1, B1
  1202. FETCH $0, 8 * SIZE(PREA)
  1203. MADPS C24, C24, A6, B8
  1204. PLU B4, B2, B2
  1205. FETCH $0, 12 * SIZE(PREA)
  1206. MADPS C34, C34, A7, B8
  1207. MADPS C44, C44, A8, B8
  1208. MADPS C11, C11, A1, B1
  1209. MADPS C21, C21, A2, B1
  1210. gsLQC1(R13, F13, F12, 3) # B3 B4
  1211. MADPS C12, C12, A1, B2
  1212. MADPS C22, C22, A2, B2
  1213. gsLQC1(R12, F5, F4, 6) # A5 A6
  1214. MADPS C31, C31, A3, B1
  1215. MADPS C41, C41, A4, B1
  1216. gsLQC1(R12, F7, F6, 7) # A7 A8
  1217. MADPS C32, C32, A3, B2
  1218. MADPS C42, C42, A4, B2
  1219. FETCH $0, 8 * SIZE(PREB)
  1220. MADPS C13, C13, A1, B3
  1221. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  1222. MADPS C23, C23, A2, B3
  1223. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  1224. MADPS C33, C33, A3, B3
  1225. MADPS C43, C43, A4, B3
  1226. MADPS C14, C14, A1, B4
  1227. PLU B7, B5, B5
  1228. FETCH $0, 16 * SIZE(PREA)
  1229. MADPS C24, C24, A2, B4
  1230. PLU B8, B6, B6
  1231. FETCH $0, 20 * SIZE(PREA)
  1232. MADPS C34, C34, A3, B4
  1233. MADPS C44, C44, A4, B4
  1234. MADPS C11, C11, A5, B5
  1235. MADPS C21, C21, A6, B5
  1236. gsLQC1(R13, F9, F8, 0) # B1 B2
  1237. MADPS C12, C12, A5, B6
  1238. MADPS C22, C22, A6, B6
  1239. gsLQC1(R12, F1, F0, 0) # A1 A2
  1240. MADPS C31, C31, A7, B5
  1241. MADPS C41, C41, A8, B5
  1242. gsLQC1(R12, F3, F2, 1) # A3 A4
  1243. MADPS C32, C32, A7, B6
  1244. MADPS C42, C42, A8, B6
  1245. FETCH $0, 12 * SIZE(PREB)
  1246. MADPS C13, C13, A5, B7
  1247. MADPS C23, C23, A6, B7
  1248. daddiu PREB, PREB, 16 * SIZE
  1249. MADPS C33, C33, A7, B7
  1250. MADPS C43, C43, A8, B7
  1251. MADPS C14, C14, A5, B8
  1252. PLU B3, B1, B1
  1253. FETCH $0, 24 * SIZE(PREA)
  1254. MADPS C24, C24, A6, B8
  1255. PLU B4, B2, B2
  1256. FETCH $0, 28 * SIZE(PREA)
  1257. daddiu PREA, PREA, 32 * SIZE
  1258. MADPS C34, C34, A7, B8
  1259. MADPS C44, C44, A8, B8
  1260. MADPS C11, C11, A1, B1
  1261. MADPS C21, C21, A2, B1
  1262. gsLQC1(R13, F13, F12, 1) # B3 B4
  1263. MADPS C12, C12, A1, B2
  1264. MADPS C22, C22, A2, B2
  1265. gsLQC1(R12, F5, F4, 2) # A5 A6
  1266. MADPS C31, C31, A3, B1
  1267. MADPS C41, C41, A4, B1
  1268. gsLQC1(R12, F7, F6, 3) # A7 A8
  1269. MADPS C32, C32, A3, B2
  1270. MADPS C42, C42, A4, B2
  1271. FETCH $0, 0 * SIZE(PREB)
  1272. MADPS C13, C13, A1, B3
  1273. MADPS C23, C23, A2, B3
  1274. MADPS C33, C33, A3, B3
  1275. MADPS C43, C43, A4, B3
  1276. MADPS C14, C14, A1, B4
  1277. PLU B7, B5, B5
  1278. FETCH $0, 0 * SIZE(PREA)
  1279. MADPS C24, C24, A2, B4
  1280. PLU B8, B6, B6
  1281. FETCH $0, 4 * SIZE(PREA)
  1282. MADPS C34, C34, A3, B4
  1283. MADPS C44, C44, A4, B4
  1284. MADPS C11, C11, A5, B5
  1285. MADPS C21, C21, A6, B5
  1286. gsLQC1(R13, F9, F8, 2) # B1 B2
  1287. MADPS C12, C12, A5, B6
  1288. MADPS C22, C22, A6, B6
  1289. gsLQC1(R12, F1, F0, 4) # A1 A2
  1290. MADPS C31, C31, A7, B5
  1291. MADPS C41, C41, A8, B5
  1292. gsLQC1(R12, F3, F2, 5) # A3 A4
  1293. MADPS C32, C32, A7, B6
  1294. MADPS C42, C42, A8, B6
  1295. FETCH $0, 4 * SIZE(PREB)
  1296. MADPS C13, C13, A5, B7
  1297. MADPS C23, C23, A6, B7
  1298. MADPS C33, C33, A7, B7
  1299. MADPS C43, C43, A8, B7
  1300. MADPS C14, C14, A5, B8
  1301. PLU B3, B1, B1
  1302. FETCH $0, 8 * SIZE(PREA)
  1303. MADPS C24, C24, A6, B8
  1304. PLU B4, B2, B2
  1305. FETCH $0, 12 * SIZE(PREA)
  1306. MADPS C34, C34, A7, B8
  1307. MADPS C44, C44, A8, B8
  1308. MADPS C11, C11, A1, B1
  1309. MADPS C21, C21, A2, B1
  1310. gsLQC1(R13, F13, F12, 3) # B3 B4
  1311. MADPS C12, C12, A1, B2
  1312. MADPS C22, C22, A2, B2
  1313. gsLQC1(R12, F5, F4, 6) # A5 A6
  1314. MADPS C31, C31, A3, B1
  1315. MADPS C41, C41, A4, B1
  1316. gsLQC1(R12, F7, F6, 7) # A7 A8
  1317. MADPS C32, C32, A3, B2
  1318. MADPS C42, C42, A4, B2
  1319. FETCH $0, 8 * SIZE(PREB)
  1320. MADPS C13, C13, A1, B3
  1321. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  1322. MADPS C23, C23, A2, B3
  1323. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  1324. MADPS C33, C33, A3, B3
  1325. MADPS C43, C43, A4, B3
  1326. MADPS C14, C14, A1, B4
  1327. PLU B7, B5, B5
  1328. FETCH $0, 16 * SIZE(PREA)
  1329. MADPS C24, C24, A2, B4
  1330. PLU B8, B6, B6
  1331. FETCH $0, 20 * SIZE(PREA)
  1332. MADPS C34, C34, A3, B4
  1333. MADPS C44, C44, A4, B4
  1334. MADPS C11, C11, A5, B5
  1335. MADPS C21, C21, A6, B5
  1336. gsLQC1(R13, F9, F8, 0) # B1 B2
  1337. MADPS C12, C12, A5, B6
  1338. MADPS C22, C22, A6, B6
  1339. gsLQC1(R12, F1, F0, 0) # A1 A2
  1340. MADPS C31, C31, A7, B5
  1341. MADPS C41, C41, A8, B5
  1342. gsLQC1(R12, F3, F2, 1) # A3 A4
  1343. MADPS C32, C32, A7, B6
  1344. MADPS C42, C42, A8, B6
  1345. FETCH $0, 12 * SIZE(PREB)
  1346. MADPS C13, C13, A5, B7
  1347. MADPS C23, C23, A6, B7
  1348. daddiu PREB, PREB, 16 * SIZE
  1349. MADPS C33, C33, A7, B7
  1350. MADPS C43, C43, A8, B7
  1351. MADPS C14, C14, A5, B8
  1352. PLU B3, B1, B1
  1353. FETCH $0, 24 * SIZE(PREA)
  1354. MADPS C24, C24, A6, B8
  1355. PLU B4, B2, B2
  1356. FETCH $0, 28 * SIZE(PREA)
  1357. daddiu PREA, PREA, 32 * SIZE
  1358. MADPS C34, C34, A7, B8
  1359. MADPS C44, C44, A8, B8
  1360. MADPS C11, C11, A1, B1
  1361. MADPS C21, C21, A2, B1
  1362. gsLQC1(R13, F13, F12, 1) # B3 B4
  1363. MADPS C12, C12, A1, B2
  1364. MADPS C22, C22, A2, B2
  1365. gsLQC1(R12, F5, F4, 2) # A5 A6
  1366. MADPS C31, C31, A3, B1
  1367. MADPS C41, C41, A4, B1
  1368. gsLQC1(R12, F7, F6, 3) # A7 A8
  1369. MADPS C32, C32, A3, B2
  1370. MADPS C42, C42, A4, B2
  1371. FETCH $0, 0 * SIZE(PREB)
  1372. MADPS C13, C13, A1, B3
  1373. MADPS C23, C23, A2, B3
  1374. MADPS C33, C33, A3, B3
  1375. MADPS C43, C43, A4, B3
  1376. MADPS C14, C14, A1, B4
  1377. PLU B7, B5, B5
  1378. FETCH $0, 0 * SIZE(PREA)
  1379. MADPS C24, C24, A2, B4
  1380. PLU B8, B6, B6
  1381. FETCH $0, 4 * SIZE(PREA)
  1382. MADPS C34, C34, A3, B4
  1383. MADPS C44, C44, A4, B4
  1384. MADPS C11, C11, A5, B5
  1385. MADPS C21, C21, A6, B5
  1386. gsLQC1(R13, F9, F8, 2) # B1 B2
  1387. MADPS C12, C12, A5, B6
  1388. MADPS C22, C22, A6, B6
  1389. gsLQC1(R12, F1, F0, 4) # A1 A2
  1390. MADPS C31, C31, A7, B5
  1391. MADPS C41, C41, A8, B5
  1392. gsLQC1(R12, F3, F2, 5) # A3 A4
  1393. MADPS C32, C32, A7, B6
  1394. MADPS C42, C42, A8, B6
  1395. FETCH $0, 4 * SIZE(PREB)
  1396. MADPS C13, C13, A5, B7
  1397. MADPS C23, C23, A6, B7
  1398. MADPS C33, C33, A7, B7
  1399. MADPS C43, C43, A8, B7
  1400. MADPS C14, C14, A5, B8
  1401. PLU B3, B1, B1
  1402. FETCH $0, 8 * SIZE(PREA)
  1403. MADPS C24, C24, A6, B8
  1404. PLU B4, B2, B2
  1405. FETCH $0, 12 * SIZE(PREA)
  1406. MADPS C34, C34, A7, B8
  1407. MADPS C44, C44, A8, B8
  1408. MADPS C11, C11, A1, B1
  1409. MADPS C21, C21, A2, B1
  1410. gsLQC1(R13, F13, F12, 3) # B3 B4
  1411. MADPS C12, C12, A1, B2
  1412. MADPS C22, C22, A2, B2
  1413. gsLQC1(R12, F5, F4, 6) # A5 A6
  1414. MADPS C31, C31, A3, B1
  1415. MADPS C41, C41, A4, B1
  1416. gsLQC1(R12, F7, F6, 7) # A7 A8
  1417. MADPS C32, C32, A3, B2
  1418. MADPS C42, C42, A4, B2
  1419. FETCH $0, 8 * SIZE(PREB)
  1420. MADPS C13, C13, A1, B3
  1421. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  1422. MADPS C23, C23, A2, B3
  1423. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  1424. MADPS C33, C33, A3, B3
  1425. MADPS C43, C43, A4, B3
  1426. MADPS C14, C14, A1, B4
  1427. PLU B7, B5, B5
  1428. FETCH $0, 16 * SIZE(PREA)
  1429. MADPS C24, C24, A2, B4
  1430. PLU B8, B6, B6
  1431. FETCH $0, 20 * SIZE(PREA)
  1432. MADPS C34, C34, A3, B4
  1433. MADPS C44, C44, A4, B4
  1434. MADPS C11, C11, A5, B5
  1435. MADPS C21, C21, A6, B5
  1436. gsLQC1(R13, F9, F8, 0) # B1 B2
  1437. MADPS C12, C12, A5, B6
  1438. MADPS C22, C22, A6, B6
  1439. gsLQC1(R12, F1, F0, 0) # A1 A2
  1440. MADPS C31, C31, A7, B5
  1441. MADPS C41, C41, A8, B5
  1442. gsLQC1(R12, F3, F2, 1) # A3 A4
  1443. MADPS C32, C32, A7, B6
  1444. MADPS C42, C42, A8, B6
  1445. FETCH $0, 12 * SIZE(PREB)
  1446. MADPS C13, C13, A5, B7
  1447. MADPS C23, C23, A6, B7
  1448. daddiu PREB, PREB, 16 * SIZE
  1449. MADPS C33, C33, A7, B7
  1450. MADPS C43, C43, A8, B7
  1451. MADPS C14, C14, A5, B8
  1452. PLU B3, B1, B1
  1453. FETCH $0, 24 * SIZE(PREA)
  1454. MADPS C24, C24, A6, B8
  1455. PLU B4, B2, B2
  1456. FETCH $0, 28 * SIZE(PREA)
  1457. daddiu PREA, PREA, 32 * SIZE
  1458. MADPS C34, C34, A7, B8
  1459. MADPS C44, C44, A8, B8
  1460. MADPS C11, C11, A1, B1
  1461. MADPS C21, C21, A2, B1
  1462. gsLQC1(R13, F13, F12, 1) # B3 B4
  1463. MADPS C12, C12, A1, B2
  1464. MADPS C22, C22, A2, B2
  1465. gsLQC1(R12, F5, F4, 2) # A5 A6
  1466. MADPS C31, C31, A3, B1
  1467. MADPS C41, C41, A4, B1
  1468. gsLQC1(R12, F7, F6, 3) # A7 A8
  1469. MADPS C32, C32, A3, B2
  1470. MADPS C42, C42, A4, B2
  1471. FETCH $0, 0 * SIZE(PREB)
  1472. MADPS C13, C13, A1, B3
  1473. MADPS C23, C23, A2, B3
  1474. MADPS C33, C33, A3, B3
  1475. MADPS C43, C43, A4, B3
  1476. MADPS C14, C14, A1, B4
  1477. PLU B7, B5, B5
  1478. FETCH $0, 0 * SIZE(PREA)
  1479. MADPS C24, C24, A2, B4
  1480. PLU B8, B6, B6
  1481. FETCH $0, 4 * SIZE(PREA)
  1482. MADPS C34, C34, A3, B4
  1483. MADPS C44, C44, A4, B4
  1484. MADPS C11, C11, A5, B5
  1485. MADPS C21, C21, A6, B5
  1486. gsLQC1(R13, F9, F8, 2) # B1 B2
  1487. MADPS C12, C12, A5, B6
  1488. MADPS C22, C22, A6, B6
  1489. gsLQC1(R12, F1, F0, 4) # A1 A2
  1490. MADPS C31, C31, A7, B5
  1491. MADPS C41, C41, A8, B5
  1492. gsLQC1(R12, F3, F2, 5) # A3 A4
  1493. MADPS C32, C32, A7, B6
  1494. MADPS C42, C42, A8, B6
  1495. FETCH $0, 4 * SIZE(PREB)
  1496. MADPS C13, C13, A5, B7
  1497. MADPS C23, C23, A6, B7
  1498. MADPS C33, C33, A7, B7
  1499. MADPS C43, C43, A8, B7
  1500. MADPS C14, C14, A5, B8
  1501. PLU B3, B1, B1
  1502. FETCH $0, 8 * SIZE(PREA)
  1503. MADPS C24, C24, A6, B8
  1504. PLU B4, B2, B2
  1505. FETCH $0, 12 * SIZE(PREA)
  1506. MADPS C34, C34, A7, B8
  1507. MADPS C44, C44, A8, B8
  1508. MADPS C11, C11, A1, B1
  1509. MADPS C21, C21, A2, B1
  1510. gsLQC1(R13, F13, F12, 3) # B3 B4
  1511. MADPS C12, C12, A1, B2
  1512. MADPS C22, C22, A2, B2
  1513. gsLQC1(R12, F5, F4, 6) # A5 A6
  1514. MADPS C31, C31, A3, B1
  1515. MADPS C41, C41, A4, B1
  1516. gsLQC1(R12, F7, F6, 7) # A7 A8
  1517. MADPS C32, C32, A3, B2
  1518. MADPS C42, C42, A4, B2
  1519. FETCH $0, 8 * SIZE(PREB)
  1520. MADPS C13, C13, A1, B3
  1521. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  1522. MADPS C23, C23, A2, B3
  1523. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  1524. MADPS C33, C33, A3, B3
  1525. MADPS C43, C43, A4, B3
  1526. MADPS C14, C14, A1, B4
  1527. PLU B7, B5, B5
  1528. FETCH $0, 16 * SIZE(PREA)
  1529. MADPS C24, C24, A2, B4
  1530. PLU B8, B6, B6
  1531. FETCH $0, 20 * SIZE(PREA)
  1532. MADPS C34, C34, A3, B4
  1533. MADPS C44, C44, A4, B4
  1534. MADPS C11, C11, A5, B5
  1535. MADPS C21, C21, A6, B5
  1536. gsLQC1(R13, F9, F8, 0) # B1 B2
  1537. MADPS C12, C12, A5, B6
  1538. MADPS C22, C22, A6, B6
  1539. gsLQC1(R12, F1, F0, 0) # A1 A2
  1540. MADPS C31, C31, A7, B5
  1541. MADPS C41, C41, A8, B5
  1542. gsLQC1(R12, F3, F2, 1) # A3 A4
  1543. MADPS C32, C32, A7, B6
  1544. MADPS C42, C42, A8, B6
  1545. FETCH $0, 12 * SIZE(PREB)
  1546. MADPS C13, C13, A5, B7
  1547. MADPS C23, C23, A6, B7
  1548. daddiu PREB, PREB, 16 * SIZE
  1549. MADPS C33, C33, A7, B7
  1550. MADPS C43, C43, A8, B7
  1551. MADPS C14, C14, A5, B8
  1552. PLU B3, B1, B1
  1553. FETCH $0, 24 * SIZE(PREA)
  1554. MADPS C24, C24, A6, B8
  1555. PLU B4, B2, B2
  1556. FETCH $0, 28 * SIZE(PREA)
  1557. daddiu PREA, PREA, 32 * SIZE
  1558. MADPS C34, C34, A7, B8
  1559. MADPS C44, C44, A8, B8
  1560. MADPS C11, C11, A1, B1
  1561. MADPS C21, C21, A2, B1
  1562. gsLQC1(R13, F13, F12, 1) # B3 B4
  1563. MADPS C12, C12, A1, B2
  1564. MADPS C22, C22, A2, B2
  1565. gsLQC1(R12, F5, F4, 2) # A5 A6
  1566. MADPS C31, C31, A3, B1
  1567. MADPS C41, C41, A4, B1
  1568. gsLQC1(R12, F7, F6, 3) # A7 A8
  1569. MADPS C32, C32, A3, B2
  1570. MADPS C42, C42, A4, B2
  1571. FETCH $0, 0 * SIZE(PREB)
  1572. MADPS C13, C13, A1, B3
  1573. MADPS C23, C23, A2, B3
  1574. MADPS C33, C33, A3, B3
  1575. MADPS C43, C43, A4, B3
  1576. MADPS C14, C14, A1, B4
  1577. PLU B7, B5, B5
  1578. FETCH $0, 0 * SIZE(PREA)
  1579. MADPS C24, C24, A2, B4
  1580. PLU B8, B6, B6
  1581. FETCH $0, 4 * SIZE(PREA)
  1582. MADPS C34, C34, A3, B4
  1583. MADPS C44, C44, A4, B4
  1584. MADPS C11, C11, A5, B5
  1585. MADPS C21, C21, A6, B5
  1586. gsLQC1(R13, F9, F8, 2) # B1 B2
  1587. MADPS C12, C12, A5, B6
  1588. MADPS C22, C22, A6, B6
  1589. gsLQC1(R12, F1, F0, 4) # A1 A2
  1590. MADPS C31, C31, A7, B5
  1591. MADPS C41, C41, A8, B5
  1592. gsLQC1(R12, F3, F2, 5) # A3 A4
  1593. MADPS C32, C32, A7, B6
  1594. MADPS C42, C42, A8, B6
  1595. FETCH $0, 4 * SIZE(PREB)
  1596. MADPS C13, C13, A5, B7
  1597. MADPS C23, C23, A6, B7
  1598. MADPS C33, C33, A7, B7
  1599. MADPS C43, C43, A8, B7
  1600. MADPS C14, C14, A5, B8
  1601. PLU B3, B1, B1
  1602. FETCH $0, 8 * SIZE(PREA)
  1603. MADPS C24, C24, A6, B8
  1604. PLU B4, B2, B2
  1605. FETCH $0, 12 * SIZE(PREA)
  1606. MADPS C34, C34, A7, B8
  1607. MADPS C44, C44, A8, B8
  1608. MADPS C11, C11, A1, B1
  1609. MADPS C21, C21, A2, B1
  1610. gsLQC1(R13, F13, F12, 3) # B3 B4
  1611. MADPS C12, C12, A1, B2
  1612. MADPS C22, C22, A2, B2
  1613. gsLQC1(R12, F5, F4, 6) # A5 A6
  1614. MADPS C31, C31, A3, B1
  1615. MADPS C41, C41, A4, B1
  1616. gsLQC1(R12, F7, F6, 7) # A7 A8
  1617. MADPS C32, C32, A3, B2
  1618. MADPS C42, C42, A4, B2
  1619. FETCH $0, 8 * SIZE(PREB)
  1620. MADPS C13, C13, A1, B3
  1621. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  1622. MADPS C23, C23, A2, B3
  1623. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  1624. MADPS C33, C33, A3, B3
  1625. MADPS C43, C43, A4, B3
  1626. MADPS C14, C14, A1, B4
  1627. PLU B7, B5, B5
  1628. FETCH $0, 16 * SIZE(PREA)
  1629. MADPS C24, C24, A2, B4
  1630. PLU B8, B6, B6
  1631. FETCH $0, 20 * SIZE(PREA)
  1632. MADPS C34, C34, A3, B4
  1633. MADPS C44, C44, A4, B4
  1634. MADPS C11, C11, A5, B5
  1635. MADPS C21, C21, A6, B5
  1636. gsLQC1(R13, F9, F8, 0) # B1 B2
  1637. MADPS C12, C12, A5, B6
  1638. MADPS C22, C22, A6, B6
  1639. gsLQC1(R12, F1, F0, 0) # A1 A2
  1640. MADPS C31, C31, A7, B5
  1641. MADPS C41, C41, A8, B5
  1642. gsLQC1(R12, F3, F2, 1) # A3 A4
  1643. MADPS C32, C32, A7, B6
  1644. MADPS C42, C42, A8, B6
  1645. FETCH $0, 12 * SIZE(PREB)
  1646. MADPS C13, C13, A5, B7
  1647. MADPS C23, C23, A6, B7
  1648. daddiu PREB, PREB, 16 * SIZE
  1649. MADPS C33, C33, A7, B7
  1650. MADPS C43, C43, A8, B7
  1651. MADPS C14, C14, A5, B8
  1652. PLU B3, B1, B1
  1653. FETCH $0, 24 * SIZE(PREA)
  1654. MADPS C24, C24, A6, B8
  1655. PLU B4, B2, B2
  1656. FETCH $0, 28 * SIZE(PREA)
  1657. daddiu PREA, PREA, 32 * SIZE
  1658. MADPS C34, C34, A7, B8
  1659. MADPS C44, C44, A8, B8
  1660. MADPS C11, C11, A1, B1
  1661. MADPS C21, C21, A2, B1
  1662. gsLQC1(R13, F13, F12, 1) # B3 B4
  1663. MADPS C12, C12, A1, B2
  1664. MADPS C22, C22, A2, B2
  1665. gsLQC1(R12, F5, F4, 2) # A5 A6
  1666. MADPS C31, C31, A3, B1
  1667. MADPS C41, C41, A4, B1
  1668. gsLQC1(R12, F7, F6, 3) # A7 A8
  1669. MADPS C32, C32, A3, B2
  1670. MADPS C42, C42, A4, B2
  1671. FETCH $0, 0 * SIZE(PREB)
  1672. MADPS C13, C13, A1, B3
  1673. MADPS C23, C23, A2, B3
  1674. MADPS C33, C33, A3, B3
  1675. MADPS C43, C43, A4, B3
  1676. MADPS C14, C14, A1, B4
  1677. PLU B7, B5, B5
  1678. FETCH $0, 0 * SIZE(PREA)
  1679. MADPS C24, C24, A2, B4
  1680. PLU B8, B6, B6
  1681. FETCH $0, 4 * SIZE(PREA)
  1682. MADPS C34, C34, A3, B4
  1683. MADPS C44, C44, A4, B4
  1684. MADPS C11, C11, A5, B5
  1685. MADPS C21, C21, A6, B5
  1686. gsLQC1(R13, F9, F8, 2) # B1 B2
  1687. MADPS C12, C12, A5, B6
  1688. MADPS C22, C22, A6, B6
  1689. gsLQC1(R12, F1, F0, 4) # A1 A2
  1690. MADPS C31, C31, A7, B5
  1691. MADPS C41, C41, A8, B5
  1692. gsLQC1(R12, F3, F2, 5) # A3 A4
  1693. MADPS C32, C32, A7, B6
  1694. MADPS C42, C42, A8, B6
  1695. FETCH $0, 4 * SIZE(PREB)
  1696. MADPS C13, C13, A5, B7
  1697. MADPS C23, C23, A6, B7
  1698. MADPS C33, C33, A7, B7
  1699. MADPS C43, C43, A8, B7
  1700. MADPS C14, C14, A5, B8
  1701. PLU B3, B1, B1
  1702. FETCH $0, 8 * SIZE(PREA)
  1703. MADPS C24, C24, A6, B8
  1704. PLU B4, B2, B2
  1705. FETCH $0, 12 * SIZE(PREA)
  1706. MADPS C34, C34, A7, B8
  1707. MADPS C44, C44, A8, B8
  1708. MADPS C11, C11, A1, B1
  1709. MADPS C21, C21, A2, B1
  1710. gsLQC1(R13, F13, F12, 3) # B3 B4
  1711. MADPS C12, C12, A1, B2
  1712. MADPS C22, C22, A2, B2
  1713. gsLQC1(R12, F5, F4, 6) # A5 A6
  1714. MADPS C31, C31, A3, B1
  1715. MADPS C41, C41, A4, B1
  1716. gsLQC1(R12, F7, F6, 7) # A7 A8
  1717. MADPS C32, C32, A3, B2
  1718. MADPS C42, C42, A4, B2
  1719. FETCH $0, 8 * SIZE(PREB)
  1720. MADPS C13, C13, A1, B3
  1721. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  1722. MADPS C23, C23, A2, B3
  1723. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  1724. MADPS C33, C33, A3, B3
  1725. MADPS C43, C43, A4, B3
  1726. MADPS C14, C14, A1, B4
  1727. PLU B7, B5, B5
  1728. FETCH $0, 16 * SIZE(PREA)
  1729. MADPS C24, C24, A2, B4
  1730. PLU B8, B6, B6
  1731. FETCH $0, 20 * SIZE(PREA)
  1732. MADPS C34, C34, A3, B4
  1733. MADPS C44, C44, A4, B4
  1734. MADPS C11, C11, A5, B5
  1735. MADPS C21, C21, A6, B5
  1736. gsLQC1(R13, F9, F8, 0) # B1 B2
  1737. MADPS C12, C12, A5, B6
  1738. MADPS C22, C22, A6, B6
  1739. gsLQC1(R12, F1, F0, 0) # A1 A2
  1740. MADPS C31, C31, A7, B5
  1741. MADPS C41, C41, A8, B5
  1742. gsLQC1(R12, F3, F2, 1) # A3 A4
  1743. MADPS C32, C32, A7, B6
  1744. MADPS C42, C42, A8, B6
  1745. FETCH $0, 12 * SIZE(PREB)
  1746. MADPS C13, C13, A5, B7
  1747. MADPS C23, C23, A6, B7
  1748. daddiu PREB, PREB, 16 * SIZE
  1749. MADPS C33, C33, A7, B7
  1750. MADPS C43, C43, A8, B7
  1751. MADPS C14, C14, A5, B8
  1752. PLU B3, B1, B1
  1753. FETCH $0, 24 * SIZE(PREA)
  1754. MADPS C24, C24, A6, B8
  1755. PLU B4, B2, B2
  1756. FETCH $0, 28 * SIZE(PREA)
  1757. daddiu PREA, PREA, 32 * SIZE
  1758. MADPS C34, C34, A7, B8
  1759. MADPS C44, C44, A8, B8
  1760. MADPS C11, C11, A1, B1
  1761. MADPS C21, C21, A2, B1
  1762. gsLQC1(R13, F13, F12, 1) # B3 B4
  1763. MADPS C12, C12, A1, B2
  1764. MADPS C22, C22, A2, B2
  1765. gsLQC1(R12, F5, F4, 2) # A5 A6
  1766. MADPS C31, C31, A3, B1
  1767. MADPS C41, C41, A4, B1
  1768. gsLQC1(R12, F7, F6, 3) # A7 A8
  1769. MADPS C32, C32, A3, B2
  1770. MADPS C42, C42, A4, B2
  1771. FETCH $0, 0 * SIZE(PREB)
  1772. MADPS C13, C13, A1, B3
  1773. MADPS C23, C23, A2, B3
  1774. MADPS C33, C33, A3, B3
  1775. MADPS C43, C43, A4, B3
  1776. MADPS C14, C14, A1, B4
  1777. PLU B7, B5, B5
  1778. FETCH $0, 0 * SIZE(PREA)
  1779. MADPS C24, C24, A2, B4
  1780. PLU B8, B6, B6
  1781. FETCH $0, 4 * SIZE(PREA)
  1782. MADPS C34, C34, A3, B4
  1783. MADPS C44, C44, A4, B4
  1784. MADPS C11, C11, A5, B5
  1785. MADPS C21, C21, A6, B5
  1786. gsLQC1(R13, F9, F8, 2) # B1 B2
  1787. MADPS C12, C12, A5, B6
  1788. MADPS C22, C22, A6, B6
  1789. gsLQC1(R12, F1, F0, 4) # A1 A2
  1790. MADPS C31, C31, A7, B5
  1791. MADPS C41, C41, A8, B5
  1792. gsLQC1(R12, F3, F2, 5) # A3 A4
  1793. MADPS C32, C32, A7, B6
  1794. MADPS C42, C42, A8, B6
  1795. FETCH $0, 4 * SIZE(PREB)
  1796. MADPS C13, C13, A5, B7
  1797. MADPS C23, C23, A6, B7
  1798. MADPS C33, C33, A7, B7
  1799. MADPS C43, C43, A8, B7
  1800. MADPS C14, C14, A5, B8
  1801. PLU B3, B1, B1
  1802. FETCH $0, 8 * SIZE(PREA)
  1803. MADPS C24, C24, A6, B8
  1804. PLU B4, B2, B2
  1805. FETCH $0, 12 * SIZE(PREA)
  1806. MADPS C34, C34, A7, B8
  1807. MADPS C44, C44, A8, B8
  1808. MADPS C11, C11, A1, B1
  1809. MADPS C21, C21, A2, B1
  1810. gsLQC1(R13, F13, F12, 3) # B3 B4
  1811. MADPS C12, C12, A1, B2
  1812. MADPS C22, C22, A2, B2
  1813. gsLQC1(R12, F5, F4, 6) # A5 A6
  1814. MADPS C31, C31, A3, B1
  1815. MADPS C41, C41, A4, B1
  1816. gsLQC1(R12, F7, F6, 7) # A7 A8
  1817. MADPS C32, C32, A3, B2
  1818. MADPS C42, C42, A4, B2
  1819. FETCH $0, 8 * SIZE(PREB)
  1820. MADPS C13, C13, A1, B3
  1821. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  1822. MADPS C23, C23, A2, B3
  1823. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  1824. MADPS C33, C33, A3, B3
  1825. MADPS C43, C43, A4, B3
  1826. MADPS C14, C14, A1, B4
  1827. PLU B7, B5, B5
  1828. FETCH $0, 16 * SIZE(PREA)
  1829. MADPS C24, C24, A2, B4
  1830. PLU B8, B6, B6
  1831. FETCH $0, 20 * SIZE(PREA)
  1832. MADPS C34, C34, A3, B4
  1833. MADPS C44, C44, A4, B4
  1834. MADPS C11, C11, A5, B5
  1835. MADPS C21, C21, A6, B5
  1836. gsLQC1(R13, F9, F8, 0) # B1 B2
  1837. MADPS C12, C12, A5, B6
  1838. MADPS C22, C22, A6, B6
  1839. gsLQC1(R12, F1, F0, 0) # A1 A2
  1840. MADPS C31, C31, A7, B5
  1841. MADPS C41, C41, A8, B5
  1842. gsLQC1(R12, F3, F2, 1) # A3 A4
  1843. MADPS C32, C32, A7, B6
  1844. MADPS C42, C42, A8, B6
  1845. FETCH $0, 12 * SIZE(PREB)
  1846. MADPS C13, C13, A5, B7
  1847. MADPS C23, C23, A6, B7
  1848. daddiu PREB, PREB, 16 * SIZE
  1849. MADPS C33, C33, A7, B7
  1850. MADPS C43, C43, A8, B7
  1851. MADPS C14, C14, A5, B8
  1852. PLU B3, B1, B1
  1853. FETCH $0, 24 * SIZE(PREA)
  1854. MADPS C24, C24, A6, B8
  1855. PLU B4, B2, B2
  1856. FETCH $0, 28 * SIZE(PREA)
  1857. daddiu PREA, PREA, 32 * SIZE
  1858. MADPS C34, C34, A7, B8
  1859. bgtz L, .L4810
  1860. MADPS C44, C44, A8, B8
  1861. .align 4
  1862. .L482:
  1863. #ifndef TRMMKERNEL
  1864. andi L, K, 32
  1865. #else
  1866. andi L, TEMP, 32
  1867. #endif
  1868. blez L, .L483
  1869. NOP
  1870. MADPS C11, C11, A1, B1
  1871. MADPS C21, C21, A2, B1
  1872. gsLQC1(R13, F13, F12, 1) # B3 B4
  1873. MADPS C12, C12, A1, B2
  1874. MADPS C22, C22, A2, B2
  1875. gsLQC1(R12, F5, F4, 2) # A5 A6
  1876. MADPS C31, C31, A3, B1
  1877. MADPS C41, C41, A4, B1
  1878. gsLQC1(R12, F7, F6, 3) # A7 A8
  1879. MADPS C32, C32, A3, B2
  1880. MADPS C42, C42, A4, B2
  1881. FETCH $0, 0 * SIZE(PREB)
  1882. MADPS C13, C13, A1, B3
  1883. MADPS C23, C23, A2, B3
  1884. MADPS C33, C33, A3, B3
  1885. MADPS C43, C43, A4, B3
  1886. MADPS C14, C14, A1, B4
  1887. PLU B7, B5, B5
  1888. FETCH $0, 0 * SIZE(PREA)
  1889. MADPS C24, C24, A2, B4
  1890. PLU B8, B6, B6
  1891. FETCH $0, 4 * SIZE(PREA)
  1892. MADPS C34, C34, A3, B4
  1893. MADPS C44, C44, A4, B4
  1894. MADPS C11, C11, A5, B5
  1895. MADPS C21, C21, A6, B5
  1896. gsLQC1(R13, F9, F8, 2) # B1 B2
  1897. MADPS C12, C12, A5, B6
  1898. MADPS C22, C22, A6, B6
  1899. gsLQC1(R12, F1, F0, 4) # A1 A2
  1900. MADPS C31, C31, A7, B5
  1901. MADPS C41, C41, A8, B5
  1902. gsLQC1(R12, F3, F2, 5) # A3 A4
  1903. MADPS C32, C32, A7, B6
  1904. MADPS C42, C42, A8, B6
  1905. FETCH $0, 4 * SIZE(PREB)
  1906. MADPS C13, C13, A5, B7
  1907. MADPS C23, C23, A6, B7
  1908. MADPS C33, C33, A7, B7
  1909. MADPS C43, C43, A8, B7
  1910. MADPS C14, C14, A5, B8
  1911. PLU B3, B1, B1
  1912. FETCH $0, 8 * SIZE(PREA)
  1913. MADPS C24, C24, A6, B8
  1914. PLU B4, B2, B2
  1915. FETCH $0, 12 * SIZE(PREA)
  1916. MADPS C34, C34, A7, B8
  1917. MADPS C44, C44, A8, B8
  1918. MADPS C11, C11, A1, B1
  1919. MADPS C21, C21, A2, B1
  1920. gsLQC1(R13, F13, F12, 3) # B3 B4
  1921. MADPS C12, C12, A1, B2
  1922. MADPS C22, C22, A2, B2
  1923. gsLQC1(R12, F5, F4, 6) # A5 A6
  1924. MADPS C31, C31, A3, B1
  1925. MADPS C41, C41, A4, B1
  1926. gsLQC1(R12, F7, F6, 7) # A7 A8
  1927. MADPS C32, C32, A3, B2
  1928. MADPS C42, C42, A4, B2
  1929. FETCH $0, 8 * SIZE(PREB)
  1930. MADPS C13, C13, A1, B3
  1931. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  1932. MADPS C23, C23, A2, B3
  1933. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  1934. MADPS C33, C33, A3, B3
  1935. MADPS C43, C43, A4, B3
  1936. MADPS C14, C14, A1, B4
  1937. PLU B7, B5, B5
  1938. FETCH $0, 16 * SIZE(PREA)
  1939. MADPS C24, C24, A2, B4
  1940. PLU B8, B6, B6
  1941. FETCH $0, 20 * SIZE(PREA)
  1942. MADPS C34, C34, A3, B4
  1943. MADPS C44, C44, A4, B4
  1944. MADPS C11, C11, A5, B5
  1945. MADPS C21, C21, A6, B5
  1946. gsLQC1(R13, F9, F8, 0) # B1 B2
  1947. MADPS C12, C12, A5, B6
  1948. MADPS C22, C22, A6, B6
  1949. gsLQC1(R12, F1, F0, 0) # A1 A2
  1950. MADPS C31, C31, A7, B5
  1951. MADPS C41, C41, A8, B5
  1952. gsLQC1(R12, F3, F2, 1) # A3 A4
  1953. MADPS C32, C32, A7, B6
  1954. MADPS C42, C42, A8, B6
  1955. FETCH $0, 12 * SIZE(PREB)
  1956. MADPS C13, C13, A5, B7
  1957. MADPS C23, C23, A6, B7
  1958. daddiu PREB, PREB, 16 * SIZE
  1959. MADPS C33, C33, A7, B7
  1960. MADPS C43, C43, A8, B7
  1961. MADPS C14, C14, A5, B8
  1962. PLU B3, B1, B1
  1963. FETCH $0, 24 * SIZE(PREA)
  1964. MADPS C24, C24, A6, B8
  1965. PLU B4, B2, B2
  1966. FETCH $0, 28 * SIZE(PREA)
  1967. daddiu PREA, PREA, 32 * SIZE
  1968. MADPS C34, C34, A7, B8
  1969. MADPS C44, C44, A8, B8
  1970. MADPS C11, C11, A1, B1
  1971. MADPS C21, C21, A2, B1
  1972. gsLQC1(R13, F13, F12, 1) # B3 B4
  1973. MADPS C12, C12, A1, B2
  1974. MADPS C22, C22, A2, B2
  1975. gsLQC1(R12, F5, F4, 2) # A5 A6
  1976. MADPS C31, C31, A3, B1
  1977. MADPS C41, C41, A4, B1
  1978. gsLQC1(R12, F7, F6, 3) # A7 A8
  1979. MADPS C32, C32, A3, B2
  1980. MADPS C42, C42, A4, B2
  1981. FETCH $0, 0 * SIZE(PREB)
  1982. MADPS C13, C13, A1, B3
  1983. MADPS C23, C23, A2, B3
  1984. MADPS C33, C33, A3, B3
  1985. MADPS C43, C43, A4, B3
  1986. MADPS C14, C14, A1, B4
  1987. PLU B7, B5, B5
  1988. FETCH $0, 0 * SIZE(PREA)
  1989. MADPS C24, C24, A2, B4
  1990. PLU B8, B6, B6
  1991. FETCH $0, 4 * SIZE(PREA)
  1992. MADPS C34, C34, A3, B4
  1993. MADPS C44, C44, A4, B4
  1994. MADPS C11, C11, A5, B5
  1995. MADPS C21, C21, A6, B5
  1996. gsLQC1(R13, F9, F8, 2) # B1 B2
  1997. MADPS C12, C12, A5, B6
  1998. MADPS C22, C22, A6, B6
  1999. gsLQC1(R12, F1, F0, 4) # A1 A2
  2000. MADPS C31, C31, A7, B5
  2001. MADPS C41, C41, A8, B5
  2002. gsLQC1(R12, F3, F2, 5) # A3 A4
  2003. MADPS C32, C32, A7, B6
  2004. MADPS C42, C42, A8, B6
  2005. FETCH $0, 4 * SIZE(PREB)
  2006. MADPS C13, C13, A5, B7
  2007. MADPS C23, C23, A6, B7
  2008. MADPS C33, C33, A7, B7
  2009. MADPS C43, C43, A8, B7
  2010. MADPS C14, C14, A5, B8
  2011. PLU B3, B1, B1
  2012. FETCH $0, 8 * SIZE(PREA)
  2013. MADPS C24, C24, A6, B8
  2014. PLU B4, B2, B2
  2015. FETCH $0, 12 * SIZE(PREA)
  2016. MADPS C34, C34, A7, B8
  2017. MADPS C44, C44, A8, B8
  2018. MADPS C11, C11, A1, B1
  2019. MADPS C21, C21, A2, B1
  2020. gsLQC1(R13, F13, F12, 3) # B3 B4
  2021. MADPS C12, C12, A1, B2
  2022. MADPS C22, C22, A2, B2
  2023. gsLQC1(R12, F5, F4, 6) # A5 A6
  2024. MADPS C31, C31, A3, B1
  2025. MADPS C41, C41, A4, B1
  2026. gsLQC1(R12, F7, F6, 7) # A7 A8
  2027. MADPS C32, C32, A3, B2
  2028. MADPS C42, C42, A4, B2
  2029. FETCH $0, 8 * SIZE(PREB)
  2030. MADPS C13, C13, A1, B3
  2031. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  2032. MADPS C23, C23, A2, B3
  2033. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  2034. MADPS C33, C33, A3, B3
  2035. MADPS C43, C43, A4, B3
  2036. MADPS C14, C14, A1, B4
  2037. PLU B7, B5, B5
  2038. FETCH $0, 16 * SIZE(PREA)
  2039. MADPS C24, C24, A2, B4
  2040. PLU B8, B6, B6
  2041. FETCH $0, 20 * SIZE(PREA)
  2042. MADPS C34, C34, A3, B4
  2043. MADPS C44, C44, A4, B4
  2044. MADPS C11, C11, A5, B5
  2045. MADPS C21, C21, A6, B5
  2046. gsLQC1(R13, F9, F8, 0) # B1 B2
  2047. MADPS C12, C12, A5, B6
  2048. MADPS C22, C22, A6, B6
  2049. gsLQC1(R12, F1, F0, 0) # A1 A2
  2050. MADPS C31, C31, A7, B5
  2051. MADPS C41, C41, A8, B5
  2052. gsLQC1(R12, F3, F2, 1) # A3 A4
  2053. MADPS C32, C32, A7, B6
  2054. MADPS C42, C42, A8, B6
  2055. FETCH $0, 12 * SIZE(PREB)
  2056. MADPS C13, C13, A5, B7
  2057. MADPS C23, C23, A6, B7
  2058. daddiu PREB, PREB, 16 * SIZE
  2059. MADPS C33, C33, A7, B7
  2060. MADPS C43, C43, A8, B7
  2061. MADPS C14, C14, A5, B8
  2062. PLU B3, B1, B1
  2063. FETCH $0, 24 * SIZE(PREA)
  2064. MADPS C24, C24, A6, B8
  2065. PLU B4, B2, B2
  2066. FETCH $0, 28 * SIZE(PREA)
  2067. daddiu PREA, PREA, 32 * SIZE
  2068. MADPS C34, C34, A7, B8
  2069. MADPS C44, C44, A8, B8
  2070. MADPS C11, C11, A1, B1
  2071. MADPS C21, C21, A2, B1
  2072. gsLQC1(R13, F13, F12, 1) # B3 B4
  2073. MADPS C12, C12, A1, B2
  2074. MADPS C22, C22, A2, B2
  2075. gsLQC1(R12, F5, F4, 2) # A5 A6
  2076. MADPS C31, C31, A3, B1
  2077. MADPS C41, C41, A4, B1
  2078. gsLQC1(R12, F7, F6, 3) # A7 A8
  2079. MADPS C32, C32, A3, B2
  2080. MADPS C42, C42, A4, B2
  2081. FETCH $0, 0 * SIZE(PREB)
  2082. MADPS C13, C13, A1, B3
  2083. MADPS C23, C23, A2, B3
  2084. MADPS C33, C33, A3, B3
  2085. MADPS C43, C43, A4, B3
  2086. MADPS C14, C14, A1, B4
  2087. PLU B7, B5, B5
  2088. FETCH $0, 0 * SIZE(PREA)
  2089. MADPS C24, C24, A2, B4
  2090. PLU B8, B6, B6
  2091. FETCH $0, 4 * SIZE(PREA)
  2092. MADPS C34, C34, A3, B4
  2093. MADPS C44, C44, A4, B4
  2094. MADPS C11, C11, A5, B5
  2095. MADPS C21, C21, A6, B5
  2096. gsLQC1(R13, F9, F8, 2) # B1 B2
  2097. MADPS C12, C12, A5, B6
  2098. MADPS C22, C22, A6, B6
  2099. gsLQC1(R12, F1, F0, 4) # A1 A2
  2100. MADPS C31, C31, A7, B5
  2101. MADPS C41, C41, A8, B5
  2102. gsLQC1(R12, F3, F2, 5) # A3 A4
  2103. MADPS C32, C32, A7, B6
  2104. MADPS C42, C42, A8, B6
  2105. FETCH $0, 4 * SIZE(PREB)
  2106. MADPS C13, C13, A5, B7
  2107. MADPS C23, C23, A6, B7
  2108. MADPS C33, C33, A7, B7
  2109. MADPS C43, C43, A8, B7
  2110. MADPS C14, C14, A5, B8
  2111. PLU B3, B1, B1
  2112. FETCH $0, 8 * SIZE(PREA)
  2113. MADPS C24, C24, A6, B8
  2114. PLU B4, B2, B2
  2115. FETCH $0, 12 * SIZE(PREA)
  2116. MADPS C34, C34, A7, B8
  2117. MADPS C44, C44, A8, B8
  2118. MADPS C11, C11, A1, B1
  2119. MADPS C21, C21, A2, B1
  2120. gsLQC1(R13, F13, F12, 3) # B3 B4
  2121. MADPS C12, C12, A1, B2
  2122. MADPS C22, C22, A2, B2
  2123. gsLQC1(R12, F5, F4, 6) # A5 A6
  2124. MADPS C31, C31, A3, B1
  2125. MADPS C41, C41, A4, B1
  2126. gsLQC1(R12, F7, F6, 7) # A7 A8
  2127. MADPS C32, C32, A3, B2
  2128. MADPS C42, C42, A4, B2
  2129. FETCH $0, 8 * SIZE(PREB)
  2130. MADPS C13, C13, A1, B3
  2131. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  2132. MADPS C23, C23, A2, B3
  2133. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  2134. MADPS C33, C33, A3, B3
  2135. MADPS C43, C43, A4, B3
  2136. MADPS C14, C14, A1, B4
  2137. PLU B7, B5, B5
  2138. FETCH $0, 16 * SIZE(PREA)
  2139. MADPS C24, C24, A2, B4
  2140. PLU B8, B6, B6
  2141. FETCH $0, 20 * SIZE(PREA)
  2142. MADPS C34, C34, A3, B4
  2143. MADPS C44, C44, A4, B4
  2144. MADPS C11, C11, A5, B5
  2145. MADPS C21, C21, A6, B5
  2146. gsLQC1(R13, F9, F8, 0) # B1 B2
  2147. MADPS C12, C12, A5, B6
  2148. MADPS C22, C22, A6, B6
  2149. gsLQC1(R12, F1, F0, 0) # A1 A2
  2150. MADPS C31, C31, A7, B5
  2151. MADPS C41, C41, A8, B5
  2152. gsLQC1(R12, F3, F2, 1) # A3 A4
  2153. MADPS C32, C32, A7, B6
  2154. MADPS C42, C42, A8, B6
  2155. FETCH $0, 12 * SIZE(PREB)
  2156. MADPS C13, C13, A5, B7
  2157. MADPS C23, C23, A6, B7
  2158. daddiu PREB, PREB, 16 * SIZE
  2159. MADPS C33, C33, A7, B7
  2160. MADPS C43, C43, A8, B7
  2161. MADPS C14, C14, A5, B8
  2162. PLU B3, B1, B1
  2163. FETCH $0, 24 * SIZE(PREA)
  2164. MADPS C24, C24, A6, B8
  2165. PLU B4, B2, B2
  2166. FETCH $0, 28 * SIZE(PREA)
  2167. daddiu PREA, PREA, 32 * SIZE
  2168. MADPS C34, C34, A7, B8
  2169. MADPS C44, C44, A8, B8
  2170. MADPS C11, C11, A1, B1
  2171. MADPS C21, C21, A2, B1
  2172. gsLQC1(R13, F13, F12, 1) # B3 B4
  2173. MADPS C12, C12, A1, B2
  2174. MADPS C22, C22, A2, B2
  2175. gsLQC1(R12, F5, F4, 2) # A5 A6
  2176. MADPS C31, C31, A3, B1
  2177. MADPS C41, C41, A4, B1
  2178. gsLQC1(R12, F7, F6, 3) # A7 A8
  2179. MADPS C32, C32, A3, B2
  2180. MADPS C42, C42, A4, B2
  2181. FETCH $0, 0 * SIZE(PREB)
  2182. MADPS C13, C13, A1, B3
  2183. MADPS C23, C23, A2, B3
  2184. MADPS C33, C33, A3, B3
  2185. MADPS C43, C43, A4, B3
  2186. MADPS C14, C14, A1, B4
  2187. PLU B7, B5, B5
  2188. FETCH $0, 0 * SIZE(PREA)
  2189. MADPS C24, C24, A2, B4
  2190. PLU B8, B6, B6
  2191. FETCH $0, 4 * SIZE(PREA)
  2192. MADPS C34, C34, A3, B4
  2193. MADPS C44, C44, A4, B4
  2194. MADPS C11, C11, A5, B5
  2195. MADPS C21, C21, A6, B5
  2196. gsLQC1(R13, F9, F8, 2) # B1 B2
  2197. MADPS C12, C12, A5, B6
  2198. MADPS C22, C22, A6, B6
  2199. gsLQC1(R12, F1, F0, 4) # A1 A2
  2200. MADPS C31, C31, A7, B5
  2201. MADPS C41, C41, A8, B5
  2202. gsLQC1(R12, F3, F2, 5) # A3 A4
  2203. MADPS C32, C32, A7, B6
  2204. MADPS C42, C42, A8, B6
  2205. FETCH $0, 4 * SIZE(PREB)
  2206. MADPS C13, C13, A5, B7
  2207. MADPS C23, C23, A6, B7
  2208. MADPS C33, C33, A7, B7
  2209. MADPS C43, C43, A8, B7
  2210. MADPS C14, C14, A5, B8
  2211. PLU B3, B1, B1
  2212. FETCH $0, 8 * SIZE(PREA)
  2213. MADPS C24, C24, A6, B8
  2214. PLU B4, B2, B2
  2215. FETCH $0, 12 * SIZE(PREA)
  2216. MADPS C34, C34, A7, B8
  2217. MADPS C44, C44, A8, B8
  2218. MADPS C11, C11, A1, B1
  2219. MADPS C21, C21, A2, B1
  2220. gsLQC1(R13, F13, F12, 3) # B3 B4
  2221. MADPS C12, C12, A1, B2
  2222. MADPS C22, C22, A2, B2
  2223. gsLQC1(R12, F5, F4, 6) # A5 A6
  2224. MADPS C31, C31, A3, B1
  2225. MADPS C41, C41, A4, B1
  2226. gsLQC1(R12, F7, F6, 7) # A7 A8
  2227. MADPS C32, C32, A3, B2
  2228. MADPS C42, C42, A4, B2
  2229. FETCH $0, 8 * SIZE(PREB)
  2230. MADPS C13, C13, A1, B3
  2231. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  2232. MADPS C23, C23, A2, B3
  2233. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  2234. MADPS C33, C33, A3, B3
  2235. MADPS C43, C43, A4, B3
  2236. MADPS C14, C14, A1, B4
  2237. PLU B7, B5, B5
  2238. FETCH $0, 16 * SIZE(PREA)
  2239. MADPS C24, C24, A2, B4
  2240. PLU B8, B6, B6
  2241. FETCH $0, 20 * SIZE(PREA)
  2242. MADPS C34, C34, A3, B4
  2243. MADPS C44, C44, A4, B4
  2244. MADPS C11, C11, A5, B5
  2245. MADPS C21, C21, A6, B5
  2246. gsLQC1(R13, F9, F8, 0) # B1 B2
  2247. MADPS C12, C12, A5, B6
  2248. MADPS C22, C22, A6, B6
  2249. gsLQC1(R12, F1, F0, 0) # A1 A2
  2250. MADPS C31, C31, A7, B5
  2251. MADPS C41, C41, A8, B5
  2252. gsLQC1(R12, F3, F2, 1) # A3 A4
  2253. MADPS C32, C32, A7, B6
  2254. MADPS C42, C42, A8, B6
  2255. FETCH $0, 12 * SIZE(PREB)
  2256. MADPS C13, C13, A5, B7
  2257. MADPS C23, C23, A6, B7
  2258. daddiu PREB, PREB, 16 * SIZE
  2259. MADPS C33, C33, A7, B7
  2260. MADPS C43, C43, A8, B7
  2261. MADPS C14, C14, A5, B8
  2262. PLU B3, B1, B1
  2263. FETCH $0, 24 * SIZE(PREA)
  2264. MADPS C24, C24, A6, B8
  2265. PLU B4, B2, B2
  2266. FETCH $0, 28 * SIZE(PREA)
  2267. daddiu PREA, PREA, 32 * SIZE
  2268. MADPS C34, C34, A7, B8
  2269. MADPS C44, C44, A8, B8
  2270. MADPS C11, C11, A1, B1
  2271. MADPS C21, C21, A2, B1
  2272. gsLQC1(R13, F13, F12, 1) # B3 B4
  2273. MADPS C12, C12, A1, B2
  2274. MADPS C22, C22, A2, B2
  2275. gsLQC1(R12, F5, F4, 2) # A5 A6
  2276. MADPS C31, C31, A3, B1
  2277. MADPS C41, C41, A4, B1
  2278. gsLQC1(R12, F7, F6, 3) # A7 A8
  2279. MADPS C32, C32, A3, B2
  2280. MADPS C42, C42, A4, B2
  2281. FETCH $0, 0 * SIZE(PREB)
  2282. MADPS C13, C13, A1, B3
  2283. MADPS C23, C23, A2, B3
  2284. MADPS C33, C33, A3, B3
  2285. MADPS C43, C43, A4, B3
  2286. MADPS C14, C14, A1, B4
  2287. PLU B7, B5, B5
  2288. FETCH $0, 0 * SIZE(PREA)
  2289. MADPS C24, C24, A2, B4
  2290. PLU B8, B6, B6
  2291. FETCH $0, 4 * SIZE(PREA)
  2292. MADPS C34, C34, A3, B4
  2293. MADPS C44, C44, A4, B4
  2294. MADPS C11, C11, A5, B5
  2295. MADPS C21, C21, A6, B5
  2296. gsLQC1(R13, F9, F8, 2) # B1 B2
  2297. MADPS C12, C12, A5, B6
  2298. MADPS C22, C22, A6, B6
  2299. gsLQC1(R12, F1, F0, 4) # A1 A2
  2300. MADPS C31, C31, A7, B5
  2301. MADPS C41, C41, A8, B5
  2302. gsLQC1(R12, F3, F2, 5) # A3 A4
  2303. MADPS C32, C32, A7, B6
  2304. MADPS C42, C42, A8, B6
  2305. FETCH $0, 4 * SIZE(PREB)
  2306. MADPS C13, C13, A5, B7
  2307. MADPS C23, C23, A6, B7
  2308. MADPS C33, C33, A7, B7
  2309. MADPS C43, C43, A8, B7
  2310. MADPS C14, C14, A5, B8
  2311. PLU B3, B1, B1
  2312. FETCH $0, 8 * SIZE(PREA)
  2313. MADPS C24, C24, A6, B8
  2314. PLU B4, B2, B2
  2315. FETCH $0, 12 * SIZE(PREA)
  2316. MADPS C34, C34, A7, B8
  2317. MADPS C44, C44, A8, B8
  2318. MADPS C11, C11, A1, B1
  2319. MADPS C21, C21, A2, B1
  2320. gsLQC1(R13, F13, F12, 3) # B3 B4
  2321. MADPS C12, C12, A1, B2
  2322. MADPS C22, C22, A2, B2
  2323. gsLQC1(R12, F5, F4, 6) # A5 A6
  2324. MADPS C31, C31, A3, B1
  2325. MADPS C41, C41, A4, B1
  2326. gsLQC1(R12, F7, F6, 7) # A7 A8
  2327. MADPS C32, C32, A3, B2
  2328. MADPS C42, C42, A4, B2
  2329. FETCH $0, 8 * SIZE(PREB)
  2330. MADPS C13, C13, A1, B3
  2331. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  2332. MADPS C23, C23, A2, B3
  2333. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  2334. MADPS C33, C33, A3, B3
  2335. MADPS C43, C43, A4, B3
  2336. MADPS C14, C14, A1, B4
  2337. PLU B7, B5, B5
  2338. FETCH $0, 16 * SIZE(PREA)
  2339. MADPS C24, C24, A2, B4
  2340. PLU B8, B6, B6
  2341. FETCH $0, 20 * SIZE(PREA)
  2342. MADPS C34, C34, A3, B4
  2343. MADPS C44, C44, A4, B4
  2344. MADPS C11, C11, A5, B5
  2345. MADPS C21, C21, A6, B5
  2346. gsLQC1(R13, F9, F8, 0) # B1 B2
  2347. MADPS C12, C12, A5, B6
  2348. MADPS C22, C22, A6, B6
  2349. gsLQC1(R12, F1, F0, 0) # A1 A2
  2350. MADPS C31, C31, A7, B5
  2351. MADPS C41, C41, A8, B5
  2352. gsLQC1(R12, F3, F2, 1) # A3 A4
  2353. MADPS C32, C32, A7, B6
  2354. MADPS C42, C42, A8, B6
  2355. FETCH $0, 12 * SIZE(PREB)
  2356. MADPS C13, C13, A5, B7
  2357. MADPS C23, C23, A6, B7
  2358. daddiu PREB, PREB, 16 * SIZE
  2359. MADPS C33, C33, A7, B7
  2360. MADPS C43, C43, A8, B7
  2361. MADPS C14, C14, A5, B8
  2362. PLU B3, B1, B1
  2363. FETCH $0, 24 * SIZE(PREA)
  2364. MADPS C24, C24, A6, B8
  2365. PLU B4, B2, B2
  2366. FETCH $0, 28 * SIZE(PREA)
  2367. daddiu PREA, PREA, 32 * SIZE
  2368. MADPS C34, C34, A7, B8
  2369. MADPS C44, C44, A8, B8
  2370. MADPS C11, C11, A1, B1
  2371. MADPS C21, C21, A2, B1
  2372. gsLQC1(R13, F13, F12, 1) # B3 B4
  2373. MADPS C12, C12, A1, B2
  2374. MADPS C22, C22, A2, B2
  2375. gsLQC1(R12, F5, F4, 2) # A5 A6
  2376. MADPS C31, C31, A3, B1
  2377. MADPS C41, C41, A4, B1
  2378. gsLQC1(R12, F7, F6, 3) # A7 A8
  2379. MADPS C32, C32, A3, B2
  2380. MADPS C42, C42, A4, B2
  2381. FETCH $0, 0 * SIZE(PREB)
  2382. MADPS C13, C13, A1, B3
  2383. MADPS C23, C23, A2, B3
  2384. MADPS C33, C33, A3, B3
  2385. MADPS C43, C43, A4, B3
  2386. MADPS C14, C14, A1, B4
  2387. PLU B7, B5, B5
  2388. FETCH $0, 0 * SIZE(PREA)
  2389. MADPS C24, C24, A2, B4
  2390. PLU B8, B6, B6
  2391. FETCH $0, 4 * SIZE(PREA)
  2392. MADPS C34, C34, A3, B4
  2393. MADPS C44, C44, A4, B4
  2394. MADPS C11, C11, A5, B5
  2395. MADPS C21, C21, A6, B5
  2396. gsLQC1(R13, F9, F8, 2) # B1 B2
  2397. MADPS C12, C12, A5, B6
  2398. MADPS C22, C22, A6, B6
  2399. gsLQC1(R12, F1, F0, 4) # A1 A2
  2400. MADPS C31, C31, A7, B5
  2401. MADPS C41, C41, A8, B5
  2402. gsLQC1(R12, F3, F2, 5) # A3 A4
  2403. MADPS C32, C32, A7, B6
  2404. MADPS C42, C42, A8, B6
  2405. FETCH $0, 4 * SIZE(PREB)
  2406. MADPS C13, C13, A5, B7
  2407. MADPS C23, C23, A6, B7
  2408. MADPS C33, C33, A7, B7
  2409. MADPS C43, C43, A8, B7
  2410. MADPS C14, C14, A5, B8
  2411. PLU B3, B1, B1
  2412. FETCH $0, 8 * SIZE(PREA)
  2413. MADPS C24, C24, A6, B8
  2414. PLU B4, B2, B2
  2415. FETCH $0, 12 * SIZE(PREA)
  2416. MADPS C34, C34, A7, B8
  2417. MADPS C44, C44, A8, B8
  2418. MADPS C11, C11, A1, B1
  2419. MADPS C21, C21, A2, B1
  2420. gsLQC1(R13, F13, F12, 3) # B3 B4
  2421. MADPS C12, C12, A1, B2
  2422. MADPS C22, C22, A2, B2
  2423. gsLQC1(R12, F5, F4, 6) # A5 A6
  2424. MADPS C31, C31, A3, B1
  2425. MADPS C41, C41, A4, B1
  2426. gsLQC1(R12, F7, F6, 7) # A7 A8
  2427. MADPS C32, C32, A3, B2
  2428. MADPS C42, C42, A4, B2
  2429. FETCH $0, 8 * SIZE(PREB)
  2430. MADPS C13, C13, A1, B3
  2431. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  2432. MADPS C23, C23, A2, B3
  2433. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  2434. MADPS C33, C33, A3, B3
  2435. MADPS C43, C43, A4, B3
  2436. MADPS C14, C14, A1, B4
  2437. PLU B7, B5, B5
  2438. FETCH $0, 16 * SIZE(PREA)
  2439. MADPS C24, C24, A2, B4
  2440. PLU B8, B6, B6
  2441. FETCH $0, 20 * SIZE(PREA)
  2442. MADPS C34, C34, A3, B4
  2443. MADPS C44, C44, A4, B4
  2444. MADPS C11, C11, A5, B5
  2445. MADPS C21, C21, A6, B5
  2446. gsLQC1(R13, F9, F8, 0) # B1 B2
  2447. MADPS C12, C12, A5, B6
  2448. MADPS C22, C22, A6, B6
  2449. gsLQC1(R12, F1, F0, 0) # A1 A2
  2450. MADPS C31, C31, A7, B5
  2451. MADPS C41, C41, A8, B5
  2452. gsLQC1(R12, F3, F2, 1) # A3 A4
  2453. MADPS C32, C32, A7, B6
  2454. MADPS C42, C42, A8, B6
  2455. FETCH $0, 12 * SIZE(PREB)
  2456. MADPS C13, C13, A5, B7
  2457. MADPS C23, C23, A6, B7
  2458. daddiu PREB, PREB, 16 * SIZE
  2459. MADPS C33, C33, A7, B7
  2460. MADPS C43, C43, A8, B7
  2461. MADPS C14, C14, A5, B8
  2462. PLU B3, B1, B1
  2463. FETCH $0, 24 * SIZE(PREA)
  2464. MADPS C24, C24, A6, B8
  2465. PLU B4, B2, B2
  2466. FETCH $0, 28 * SIZE(PREA)
  2467. daddiu PREA, PREA, 32 * SIZE
  2468. MADPS C34, C34, A7, B8
  2469. MADPS C44, C44, A8, B8
  2470. MADPS C11, C11, A1, B1
  2471. MADPS C21, C21, A2, B1
  2472. gsLQC1(R13, F13, F12, 1) # B3 B4
  2473. MADPS C12, C12, A1, B2
  2474. MADPS C22, C22, A2, B2
  2475. gsLQC1(R12, F5, F4, 2) # A5 A6
  2476. MADPS C31, C31, A3, B1
  2477. MADPS C41, C41, A4, B1
  2478. gsLQC1(R12, F7, F6, 3) # A7 A8
  2479. MADPS C32, C32, A3, B2
  2480. MADPS C42, C42, A4, B2
  2481. FETCH $0, 0 * SIZE(PREB)
  2482. MADPS C13, C13, A1, B3
  2483. MADPS C23, C23, A2, B3
  2484. MADPS C33, C33, A3, B3
  2485. MADPS C43, C43, A4, B3
  2486. MADPS C14, C14, A1, B4
  2487. PLU B7, B5, B5
  2488. FETCH $0, 0 * SIZE(PREA)
  2489. MADPS C24, C24, A2, B4
  2490. PLU B8, B6, B6
  2491. FETCH $0, 4 * SIZE(PREA)
  2492. MADPS C34, C34, A3, B4
  2493. MADPS C44, C44, A4, B4
  2494. MADPS C11, C11, A5, B5
  2495. MADPS C21, C21, A6, B5
  2496. gsLQC1(R13, F9, F8, 2) # B1 B2
  2497. MADPS C12, C12, A5, B6
  2498. MADPS C22, C22, A6, B6
  2499. gsLQC1(R12, F1, F0, 4) # A1 A2
  2500. MADPS C31, C31, A7, B5
  2501. MADPS C41, C41, A8, B5
  2502. gsLQC1(R12, F3, F2, 5) # A3 A4
  2503. MADPS C32, C32, A7, B6
  2504. MADPS C42, C42, A8, B6
  2505. FETCH $0, 4 * SIZE(PREB)
  2506. MADPS C13, C13, A5, B7
  2507. MADPS C23, C23, A6, B7
  2508. MADPS C33, C33, A7, B7
  2509. MADPS C43, C43, A8, B7
  2510. MADPS C14, C14, A5, B8
  2511. PLU B3, B1, B1
  2512. FETCH $0, 8 * SIZE(PREA)
  2513. MADPS C24, C24, A6, B8
  2514. PLU B4, B2, B2
  2515. FETCH $0, 12 * SIZE(PREA)
  2516. MADPS C34, C34, A7, B8
  2517. MADPS C44, C44, A8, B8
  2518. MADPS C11, C11, A1, B1
  2519. MADPS C21, C21, A2, B1
  2520. gsLQC1(R13, F13, F12, 3) # B3 B4
  2521. MADPS C12, C12, A1, B2
  2522. MADPS C22, C22, A2, B2
  2523. gsLQC1(R12, F5, F4, 6) # A5 A6
  2524. MADPS C31, C31, A3, B1
  2525. MADPS C41, C41, A4, B1
  2526. gsLQC1(R12, F7, F6, 7) # A7 A8
  2527. MADPS C32, C32, A3, B2
  2528. MADPS C42, C42, A4, B2
  2529. FETCH $0, 8 * SIZE(PREB)
  2530. MADPS C13, C13, A1, B3
  2531. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  2532. MADPS C23, C23, A2, B3
  2533. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  2534. MADPS C33, C33, A3, B3
  2535. MADPS C43, C43, A4, B3
  2536. MADPS C14, C14, A1, B4
  2537. PLU B7, B5, B5
  2538. FETCH $0, 16 * SIZE(PREA)
  2539. MADPS C24, C24, A2, B4
  2540. PLU B8, B6, B6
  2541. FETCH $0, 20 * SIZE(PREA)
  2542. MADPS C34, C34, A3, B4
  2543. MADPS C44, C44, A4, B4
  2544. MADPS C11, C11, A5, B5
  2545. MADPS C21, C21, A6, B5
  2546. gsLQC1(R13, F9, F8, 0) # B1 B2
  2547. MADPS C12, C12, A5, B6
  2548. MADPS C22, C22, A6, B6
  2549. gsLQC1(R12, F1, F0, 0) # A1 A2
  2550. MADPS C31, C31, A7, B5
  2551. MADPS C41, C41, A8, B5
  2552. gsLQC1(R12, F3, F2, 1) # A3 A4
  2553. MADPS C32, C32, A7, B6
  2554. MADPS C42, C42, A8, B6
  2555. FETCH $0, 12 * SIZE(PREB)
  2556. MADPS C13, C13, A5, B7
  2557. MADPS C23, C23, A6, B7
  2558. daddiu PREB, PREB, 16 * SIZE
  2559. MADPS C33, C33, A7, B7
  2560. MADPS C43, C43, A8, B7
  2561. MADPS C14, C14, A5, B8
  2562. PLU B3, B1, B1
  2563. FETCH $0, 24 * SIZE(PREA)
  2564. MADPS C24, C24, A6, B8
  2565. PLU B4, B2, B2
  2566. FETCH $0, 28 * SIZE(PREA)
  2567. daddiu PREA, PREA, 32 * SIZE
  2568. MADPS C34, C34, A7, B8
  2569. MADPS C44, C44, A8, B8
  2570. MADPS C11, C11, A1, B1
  2571. MADPS C21, C21, A2, B1
  2572. gsLQC1(R13, F13, F12, 1) # B3 B4
  2573. MADPS C12, C12, A1, B2
  2574. MADPS C22, C22, A2, B2
  2575. gsLQC1(R12, F5, F4, 2) # A5 A6
  2576. MADPS C31, C31, A3, B1
  2577. MADPS C41, C41, A4, B1
  2578. gsLQC1(R12, F7, F6, 3) # A7 A8
  2579. MADPS C32, C32, A3, B2
  2580. MADPS C42, C42, A4, B2
  2581. FETCH $0, 0 * SIZE(PREB)
  2582. MADPS C13, C13, A1, B3
  2583. MADPS C23, C23, A2, B3
  2584. MADPS C33, C33, A3, B3
  2585. MADPS C43, C43, A4, B3
  2586. MADPS C14, C14, A1, B4
  2587. PLU B7, B5, B5
  2588. FETCH $0, 0 * SIZE(PREA)
  2589. MADPS C24, C24, A2, B4
  2590. PLU B8, B6, B6
  2591. FETCH $0, 4 * SIZE(PREA)
  2592. MADPS C34, C34, A3, B4
  2593. MADPS C44, C44, A4, B4
  2594. MADPS C11, C11, A5, B5
  2595. MADPS C21, C21, A6, B5
  2596. gsLQC1(R13, F9, F8, 2) # B1 B2
  2597. MADPS C12, C12, A5, B6
  2598. MADPS C22, C22, A6, B6
  2599. gsLQC1(R12, F1, F0, 4) # A1 A2
  2600. MADPS C31, C31, A7, B5
  2601. MADPS C41, C41, A8, B5
  2602. gsLQC1(R12, F3, F2, 5) # A3 A4
  2603. MADPS C32, C32, A7, B6
  2604. MADPS C42, C42, A8, B6
  2605. FETCH $0, 4 * SIZE(PREB)
  2606. MADPS C13, C13, A5, B7
  2607. MADPS C23, C23, A6, B7
  2608. MADPS C33, C33, A7, B7
  2609. MADPS C43, C43, A8, B7
  2610. MADPS C14, C14, A5, B8
  2611. PLU B3, B1, B1
  2612. FETCH $0, 8 * SIZE(PREA)
  2613. MADPS C24, C24, A6, B8
  2614. PLU B4, B2, B2
  2615. FETCH $0, 12 * SIZE(PREA)
  2616. MADPS C34, C34, A7, B8
  2617. MADPS C44, C44, A8, B8
  2618. MADPS C11, C11, A1, B1
  2619. MADPS C21, C21, A2, B1
  2620. gsLQC1(R13, F13, F12, 3) # B3 B4
  2621. MADPS C12, C12, A1, B2
  2622. MADPS C22, C22, A2, B2
  2623. gsLQC1(R12, F5, F4, 6) # A5 A6
  2624. MADPS C31, C31, A3, B1
  2625. MADPS C41, C41, A4, B1
  2626. gsLQC1(R12, F7, F6, 7) # A7 A8
  2627. MADPS C32, C32, A3, B2
  2628. MADPS C42, C42, A4, B2
  2629. FETCH $0, 8 * SIZE(PREB)
  2630. MADPS C13, C13, A1, B3
  2631. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  2632. MADPS C23, C23, A2, B3
  2633. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  2634. MADPS C33, C33, A3, B3
  2635. MADPS C43, C43, A4, B3
  2636. MADPS C14, C14, A1, B4
  2637. PLU B7, B5, B5
  2638. FETCH $0, 16 * SIZE(PREA)
  2639. MADPS C24, C24, A2, B4
  2640. PLU B8, B6, B6
  2641. FETCH $0, 20 * SIZE(PREA)
  2642. MADPS C34, C34, A3, B4
  2643. MADPS C44, C44, A4, B4
  2644. MADPS C11, C11, A5, B5
  2645. MADPS C21, C21, A6, B5
  2646. gsLQC1(R13, F9, F8, 0) # B1 B2
  2647. MADPS C12, C12, A5, B6
  2648. MADPS C22, C22, A6, B6
  2649. gsLQC1(R12, F1, F0, 0) # A1 A2
  2650. MADPS C31, C31, A7, B5
  2651. MADPS C41, C41, A8, B5
  2652. gsLQC1(R12, F3, F2, 1) # A3 A4
  2653. MADPS C32, C32, A7, B6
  2654. MADPS C42, C42, A8, B6
  2655. FETCH $0, 12 * SIZE(PREB)
  2656. MADPS C13, C13, A5, B7
  2657. MADPS C23, C23, A6, B7
  2658. daddiu PREB, PREB, 16 * SIZE
  2659. MADPS C33, C33, A7, B7
  2660. MADPS C43, C43, A8, B7
  2661. MADPS C14, C14, A5, B8
  2662. PLU B3, B1, B1
  2663. FETCH $0, 24 * SIZE(PREA)
  2664. MADPS C24, C24, A6, B8
  2665. PLU B4, B2, B2
  2666. FETCH $0, 28 * SIZE(PREA)
  2667. daddiu PREA, PREA, 32 * SIZE
  2668. MADPS C34, C34, A7, B8
  2669. MADPS C44, C44, A8, B8
  2670. .align 4
  2671. .L483:
  2672. #ifndef TRMMKERNEL
  2673. andi L, K, 16
  2674. #else
  2675. andi L, TEMP, 16
  2676. #endif
  2677. blez L, .L484
  2678. NOP
  2679. MADPS C11, C11, A1, B1
  2680. MADPS C21, C21, A2, B1
  2681. gsLQC1(R13, F13, F12, 1) # B3 B4
  2682. MADPS C12, C12, A1, B2
  2683. MADPS C22, C22, A2, B2
  2684. gsLQC1(R12, F5, F4, 2) # A5 A6
  2685. MADPS C31, C31, A3, B1
  2686. MADPS C41, C41, A4, B1
  2687. gsLQC1(R12, F7, F6, 3) # A7 A8
  2688. MADPS C32, C32, A3, B2
  2689. MADPS C42, C42, A4, B2
  2690. FETCH $0, 0 * SIZE(PREB)
  2691. MADPS C13, C13, A1, B3
  2692. MADPS C23, C23, A2, B3
  2693. MADPS C33, C33, A3, B3
  2694. MADPS C43, C43, A4, B3
  2695. MADPS C14, C14, A1, B4
  2696. PLU B7, B5, B5
  2697. FETCH $0, 0 * SIZE(PREA)
  2698. MADPS C24, C24, A2, B4
  2699. PLU B8, B6, B6
  2700. FETCH $0, 4 * SIZE(PREA)
  2701. MADPS C34, C34, A3, B4
  2702. MADPS C44, C44, A4, B4
  2703. MADPS C11, C11, A5, B5
  2704. MADPS C21, C21, A6, B5
  2705. gsLQC1(R13, F9, F8, 2) # B1 B2
  2706. MADPS C12, C12, A5, B6
  2707. MADPS C22, C22, A6, B6
  2708. gsLQC1(R12, F1, F0, 4) # A1 A2
  2709. MADPS C31, C31, A7, B5
  2710. MADPS C41, C41, A8, B5
  2711. gsLQC1(R12, F3, F2, 5) # A3 A4
  2712. MADPS C32, C32, A7, B6
  2713. MADPS C42, C42, A8, B6
  2714. FETCH $0, 4 * SIZE(PREB)
  2715. MADPS C13, C13, A5, B7
  2716. MADPS C23, C23, A6, B7
  2717. MADPS C33, C33, A7, B7
  2718. MADPS C43, C43, A8, B7
  2719. MADPS C14, C14, A5, B8
  2720. PLU B3, B1, B1
  2721. FETCH $0, 8 * SIZE(PREA)
  2722. MADPS C24, C24, A6, B8
  2723. PLU B4, B2, B2
  2724. FETCH $0, 12 * SIZE(PREA)
  2725. MADPS C34, C34, A7, B8
  2726. MADPS C44, C44, A8, B8
  2727. MADPS C11, C11, A1, B1
  2728. MADPS C21, C21, A2, B1
  2729. gsLQC1(R13, F13, F12, 3) # B3 B4
  2730. MADPS C12, C12, A1, B2
  2731. MADPS C22, C22, A2, B2
  2732. gsLQC1(R12, F5, F4, 6) # A5 A6
  2733. MADPS C31, C31, A3, B1
  2734. MADPS C41, C41, A4, B1
  2735. gsLQC1(R12, F7, F6, 7) # A7 A8
  2736. MADPS C32, C32, A3, B2
  2737. MADPS C42, C42, A4, B2
  2738. FETCH $0, 8 * SIZE(PREB)
  2739. MADPS C13, C13, A1, B3
  2740. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  2741. MADPS C23, C23, A2, B3
  2742. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  2743. MADPS C33, C33, A3, B3
  2744. MADPS C43, C43, A4, B3
  2745. MADPS C14, C14, A1, B4
  2746. PLU B7, B5, B5
  2747. FETCH $0, 16 * SIZE(PREA)
  2748. MADPS C24, C24, A2, B4
  2749. PLU B8, B6, B6
  2750. FETCH $0, 20 * SIZE(PREA)
  2751. MADPS C34, C34, A3, B4
  2752. MADPS C44, C44, A4, B4
  2753. MADPS C11, C11, A5, B5
  2754. MADPS C21, C21, A6, B5
  2755. gsLQC1(R13, F9, F8, 0) # B1 B2
  2756. MADPS C12, C12, A5, B6
  2757. MADPS C22, C22, A6, B6
  2758. gsLQC1(R12, F1, F0, 0) # A1 A2
  2759. MADPS C31, C31, A7, B5
  2760. MADPS C41, C41, A8, B5
  2761. gsLQC1(R12, F3, F2, 1) # A3 A4
  2762. MADPS C32, C32, A7, B6
  2763. MADPS C42, C42, A8, B6
  2764. FETCH $0, 12 * SIZE(PREB)
  2765. MADPS C13, C13, A5, B7
  2766. MADPS C23, C23, A6, B7
  2767. daddiu PREB, PREB, 16 * SIZE
  2768. MADPS C33, C33, A7, B7
  2769. MADPS C43, C43, A8, B7
  2770. MADPS C14, C14, A5, B8
  2771. PLU B3, B1, B1
  2772. FETCH $0, 24 * SIZE(PREA)
  2773. MADPS C24, C24, A6, B8
  2774. PLU B4, B2, B2
  2775. FETCH $0, 28 * SIZE(PREA)
  2776. daddiu PREA, PREA, 32 * SIZE
  2777. MADPS C34, C34, A7, B8
  2778. MADPS C44, C44, A8, B8
  2779. MADPS C11, C11, A1, B1
  2780. MADPS C21, C21, A2, B1
  2781. gsLQC1(R13, F13, F12, 1) # B3 B4
  2782. MADPS C12, C12, A1, B2
  2783. MADPS C22, C22, A2, B2
  2784. gsLQC1(R12, F5, F4, 2) # A5 A6
  2785. MADPS C31, C31, A3, B1
  2786. MADPS C41, C41, A4, B1
  2787. gsLQC1(R12, F7, F6, 3) # A7 A8
  2788. MADPS C32, C32, A3, B2
  2789. MADPS C42, C42, A4, B2
  2790. FETCH $0, 0 * SIZE(PREB)
  2791. MADPS C13, C13, A1, B3
  2792. MADPS C23, C23, A2, B3
  2793. MADPS C33, C33, A3, B3
  2794. MADPS C43, C43, A4, B3
  2795. MADPS C14, C14, A1, B4
  2796. PLU B7, B5, B5
  2797. FETCH $0, 0 * SIZE(PREA)
  2798. MADPS C24, C24, A2, B4
  2799. PLU B8, B6, B6
  2800. FETCH $0, 4 * SIZE(PREA)
  2801. MADPS C34, C34, A3, B4
  2802. MADPS C44, C44, A4, B4
  2803. MADPS C11, C11, A5, B5
  2804. MADPS C21, C21, A6, B5
  2805. gsLQC1(R13, F9, F8, 2) # B1 B2
  2806. MADPS C12, C12, A5, B6
  2807. MADPS C22, C22, A6, B6
  2808. gsLQC1(R12, F1, F0, 4) # A1 A2
  2809. MADPS C31, C31, A7, B5
  2810. MADPS C41, C41, A8, B5
  2811. gsLQC1(R12, F3, F2, 5) # A3 A4
  2812. MADPS C32, C32, A7, B6
  2813. MADPS C42, C42, A8, B6
  2814. FETCH $0, 4 * SIZE(PREB)
  2815. MADPS C13, C13, A5, B7
  2816. MADPS C23, C23, A6, B7
  2817. MADPS C33, C33, A7, B7
  2818. MADPS C43, C43, A8, B7
  2819. MADPS C14, C14, A5, B8
  2820. PLU B3, B1, B1
  2821. FETCH $0, 8 * SIZE(PREA)
  2822. MADPS C24, C24, A6, B8
  2823. PLU B4, B2, B2
  2824. FETCH $0, 12 * SIZE(PREA)
  2825. MADPS C34, C34, A7, B8
  2826. MADPS C44, C44, A8, B8
  2827. MADPS C11, C11, A1, B1
  2828. MADPS C21, C21, A2, B1
  2829. gsLQC1(R13, F13, F12, 3) # B3 B4
  2830. MADPS C12, C12, A1, B2
  2831. MADPS C22, C22, A2, B2
  2832. gsLQC1(R12, F5, F4, 6) # A5 A6
  2833. MADPS C31, C31, A3, B1
  2834. MADPS C41, C41, A4, B1
  2835. gsLQC1(R12, F7, F6, 7) # A7 A8
  2836. MADPS C32, C32, A3, B2
  2837. MADPS C42, C42, A4, B2
  2838. FETCH $0, 8 * SIZE(PREB)
  2839. MADPS C13, C13, A1, B3
  2840. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  2841. MADPS C23, C23, A2, B3
  2842. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  2843. MADPS C33, C33, A3, B3
  2844. MADPS C43, C43, A4, B3
  2845. MADPS C14, C14, A1, B4
  2846. PLU B7, B5, B5
  2847. FETCH $0, 16 * SIZE(PREA)
  2848. MADPS C24, C24, A2, B4
  2849. PLU B8, B6, B6
  2850. FETCH $0, 20 * SIZE(PREA)
  2851. MADPS C34, C34, A3, B4
  2852. MADPS C44, C44, A4, B4
  2853. MADPS C11, C11, A5, B5
  2854. MADPS C21, C21, A6, B5
  2855. gsLQC1(R13, F9, F8, 0) # B1 B2
  2856. MADPS C12, C12, A5, B6
  2857. MADPS C22, C22, A6, B6
  2858. gsLQC1(R12, F1, F0, 0) # A1 A2
  2859. MADPS C31, C31, A7, B5
  2860. MADPS C41, C41, A8, B5
  2861. gsLQC1(R12, F3, F2, 1) # A3 A4
  2862. MADPS C32, C32, A7, B6
  2863. MADPS C42, C42, A8, B6
  2864. FETCH $0, 12 * SIZE(PREB)
  2865. MADPS C13, C13, A5, B7
  2866. MADPS C23, C23, A6, B7
  2867. daddiu PREB, PREB, 16 * SIZE
  2868. MADPS C33, C33, A7, B7
  2869. MADPS C43, C43, A8, B7
  2870. MADPS C14, C14, A5, B8
  2871. PLU B3, B1, B1
  2872. FETCH $0, 24 * SIZE(PREA)
  2873. MADPS C24, C24, A6, B8
  2874. PLU B4, B2, B2
  2875. FETCH $0, 28 * SIZE(PREA)
  2876. daddiu PREA, PREA, 32 * SIZE
  2877. MADPS C34, C34, A7, B8
  2878. MADPS C44, C44, A8, B8
  2879. MADPS C11, C11, A1, B1
  2880. MADPS C21, C21, A2, B1
  2881. gsLQC1(R13, F13, F12, 1) # B3 B4
  2882. MADPS C12, C12, A1, B2
  2883. MADPS C22, C22, A2, B2
  2884. gsLQC1(R12, F5, F4, 2) # A5 A6
  2885. MADPS C31, C31, A3, B1
  2886. MADPS C41, C41, A4, B1
  2887. gsLQC1(R12, F7, F6, 3) # A7 A8
  2888. MADPS C32, C32, A3, B2
  2889. MADPS C42, C42, A4, B2
  2890. FETCH $0, 0 * SIZE(PREB)
  2891. MADPS C13, C13, A1, B3
  2892. MADPS C23, C23, A2, B3
  2893. MADPS C33, C33, A3, B3
  2894. MADPS C43, C43, A4, B3
  2895. MADPS C14, C14, A1, B4
  2896. PLU B7, B5, B5
  2897. FETCH $0, 0 * SIZE(PREA)
  2898. MADPS C24, C24, A2, B4
  2899. PLU B8, B6, B6
  2900. FETCH $0, 4 * SIZE(PREA)
  2901. MADPS C34, C34, A3, B4
  2902. MADPS C44, C44, A4, B4
  2903. MADPS C11, C11, A5, B5
  2904. MADPS C21, C21, A6, B5
  2905. gsLQC1(R13, F9, F8, 2) # B1 B2
  2906. MADPS C12, C12, A5, B6
  2907. MADPS C22, C22, A6, B6
  2908. gsLQC1(R12, F1, F0, 4) # A1 A2
  2909. MADPS C31, C31, A7, B5
  2910. MADPS C41, C41, A8, B5
  2911. gsLQC1(R12, F3, F2, 5) # A3 A4
  2912. MADPS C32, C32, A7, B6
  2913. MADPS C42, C42, A8, B6
  2914. FETCH $0, 4 * SIZE(PREB)
  2915. MADPS C13, C13, A5, B7
  2916. MADPS C23, C23, A6, B7
  2917. MADPS C33, C33, A7, B7
  2918. MADPS C43, C43, A8, B7
  2919. MADPS C14, C14, A5, B8
  2920. PLU B3, B1, B1
  2921. FETCH $0, 8 * SIZE(PREA)
  2922. MADPS C24, C24, A6, B8
  2923. PLU B4, B2, B2
  2924. FETCH $0, 12 * SIZE(PREA)
  2925. MADPS C34, C34, A7, B8
  2926. MADPS C44, C44, A8, B8
  2927. MADPS C11, C11, A1, B1
  2928. MADPS C21, C21, A2, B1
  2929. gsLQC1(R13, F13, F12, 3) # B3 B4
  2930. MADPS C12, C12, A1, B2
  2931. MADPS C22, C22, A2, B2
  2932. gsLQC1(R12, F5, F4, 6) # A5 A6
  2933. MADPS C31, C31, A3, B1
  2934. MADPS C41, C41, A4, B1
  2935. gsLQC1(R12, F7, F6, 7) # A7 A8
  2936. MADPS C32, C32, A3, B2
  2937. MADPS C42, C42, A4, B2
  2938. FETCH $0, 8 * SIZE(PREB)
  2939. MADPS C13, C13, A1, B3
  2940. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  2941. MADPS C23, C23, A2, B3
  2942. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  2943. MADPS C33, C33, A3, B3
  2944. MADPS C43, C43, A4, B3
  2945. MADPS C14, C14, A1, B4
  2946. PLU B7, B5, B5
  2947. FETCH $0, 16 * SIZE(PREA)
  2948. MADPS C24, C24, A2, B4
  2949. PLU B8, B6, B6
  2950. FETCH $0, 20 * SIZE(PREA)
  2951. MADPS C34, C34, A3, B4
  2952. MADPS C44, C44, A4, B4
  2953. MADPS C11, C11, A5, B5
  2954. MADPS C21, C21, A6, B5
  2955. gsLQC1(R13, F9, F8, 0) # B1 B2
  2956. MADPS C12, C12, A5, B6
  2957. MADPS C22, C22, A6, B6
  2958. gsLQC1(R12, F1, F0, 0) # A1 A2
  2959. MADPS C31, C31, A7, B5
  2960. MADPS C41, C41, A8, B5
  2961. gsLQC1(R12, F3, F2, 1) # A3 A4
  2962. MADPS C32, C32, A7, B6
  2963. MADPS C42, C42, A8, B6
  2964. FETCH $0, 12 * SIZE(PREB)
  2965. MADPS C13, C13, A5, B7
  2966. MADPS C23, C23, A6, B7
  2967. daddiu PREB, PREB, 16 * SIZE
  2968. MADPS C33, C33, A7, B7
  2969. MADPS C43, C43, A8, B7
  2970. MADPS C14, C14, A5, B8
  2971. PLU B3, B1, B1
  2972. FETCH $0, 24 * SIZE(PREA)
  2973. MADPS C24, C24, A6, B8
  2974. PLU B4, B2, B2
  2975. FETCH $0, 28 * SIZE(PREA)
  2976. daddiu PREA, PREA, 32 * SIZE
  2977. MADPS C34, C34, A7, B8
  2978. MADPS C44, C44, A8, B8
  2979. MADPS C11, C11, A1, B1
  2980. MADPS C21, C21, A2, B1
  2981. gsLQC1(R13, F13, F12, 1) # B3 B4
  2982. MADPS C12, C12, A1, B2
  2983. MADPS C22, C22, A2, B2
  2984. gsLQC1(R12, F5, F4, 2) # A5 A6
  2985. MADPS C31, C31, A3, B1
  2986. MADPS C41, C41, A4, B1
  2987. gsLQC1(R12, F7, F6, 3) # A7 A8
  2988. MADPS C32, C32, A3, B2
  2989. MADPS C42, C42, A4, B2
  2990. FETCH $0, 0 * SIZE(PREB)
  2991. MADPS C13, C13, A1, B3
  2992. MADPS C23, C23, A2, B3
  2993. MADPS C33, C33, A3, B3
  2994. MADPS C43, C43, A4, B3
  2995. MADPS C14, C14, A1, B4
  2996. PLU B7, B5, B5
  2997. FETCH $0, 0 * SIZE(PREA)
  2998. MADPS C24, C24, A2, B4
  2999. PLU B8, B6, B6
  3000. FETCH $0, 4 * SIZE(PREA)
  3001. MADPS C34, C34, A3, B4
  3002. MADPS C44, C44, A4, B4
  3003. MADPS C11, C11, A5, B5
  3004. MADPS C21, C21, A6, B5
  3005. gsLQC1(R13, F9, F8, 2) # B1 B2
  3006. MADPS C12, C12, A5, B6
  3007. MADPS C22, C22, A6, B6
  3008. gsLQC1(R12, F1, F0, 4) # A1 A2
  3009. MADPS C31, C31, A7, B5
  3010. MADPS C41, C41, A8, B5
  3011. gsLQC1(R12, F3, F2, 5) # A3 A4
  3012. MADPS C32, C32, A7, B6
  3013. MADPS C42, C42, A8, B6
  3014. FETCH $0, 4 * SIZE(PREB)
  3015. MADPS C13, C13, A5, B7
  3016. MADPS C23, C23, A6, B7
  3017. MADPS C33, C33, A7, B7
  3018. MADPS C43, C43, A8, B7
  3019. MADPS C14, C14, A5, B8
  3020. PLU B3, B1, B1
  3021. FETCH $0, 8 * SIZE(PREA)
  3022. MADPS C24, C24, A6, B8
  3023. PLU B4, B2, B2
  3024. FETCH $0, 12 * SIZE(PREA)
  3025. MADPS C34, C34, A7, B8
  3026. MADPS C44, C44, A8, B8
  3027. MADPS C11, C11, A1, B1
  3028. MADPS C21, C21, A2, B1
  3029. gsLQC1(R13, F13, F12, 3) # B3 B4
  3030. MADPS C12, C12, A1, B2
  3031. MADPS C22, C22, A2, B2
  3032. gsLQC1(R12, F5, F4, 6) # A5 A6
  3033. MADPS C31, C31, A3, B1
  3034. MADPS C41, C41, A4, B1
  3035. gsLQC1(R12, F7, F6, 7) # A7 A8
  3036. MADPS C32, C32, A3, B2
  3037. MADPS C42, C42, A4, B2
  3038. FETCH $0, 8 * SIZE(PREB)
  3039. MADPS C13, C13, A1, B3
  3040. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  3041. MADPS C23, C23, A2, B3
  3042. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  3043. MADPS C33, C33, A3, B3
  3044. MADPS C43, C43, A4, B3
  3045. MADPS C14, C14, A1, B4
  3046. PLU B7, B5, B5
  3047. FETCH $0, 16 * SIZE(PREA)
  3048. MADPS C24, C24, A2, B4
  3049. PLU B8, B6, B6
  3050. FETCH $0, 20 * SIZE(PREA)
  3051. MADPS C34, C34, A3, B4
  3052. MADPS C44, C44, A4, B4
  3053. MADPS C11, C11, A5, B5
  3054. MADPS C21, C21, A6, B5
  3055. gsLQC1(R13, F9, F8, 0) # B1 B2
  3056. MADPS C12, C12, A5, B6
  3057. MADPS C22, C22, A6, B6
  3058. gsLQC1(R12, F1, F0, 0) # A1 A2
  3059. MADPS C31, C31, A7, B5
  3060. MADPS C41, C41, A8, B5
  3061. gsLQC1(R12, F3, F2, 1) # A3 A4
  3062. MADPS C32, C32, A7, B6
  3063. MADPS C42, C42, A8, B6
  3064. FETCH $0, 12 * SIZE(PREB)
  3065. MADPS C13, C13, A5, B7
  3066. MADPS C23, C23, A6, B7
  3067. daddiu PREB, PREB, 16 * SIZE
  3068. MADPS C33, C33, A7, B7
  3069. MADPS C43, C43, A8, B7
  3070. MADPS C14, C14, A5, B8
  3071. PLU B3, B1, B1
  3072. FETCH $0, 24 * SIZE(PREA)
  3073. MADPS C24, C24, A6, B8
  3074. PLU B4, B2, B2
  3075. FETCH $0, 28 * SIZE(PREA)
  3076. daddiu PREA, PREA, 32 * SIZE
  3077. MADPS C34, C34, A7, B8
  3078. MADPS C44, C44, A8, B8
  3079. .align 4
  3080. .L484:
  3081. #ifndef TRMMKERNEL
  3082. andi L, K, 8
  3083. #else
  3084. andi L, TEMP, 8
  3085. #endif
  3086. blez L, .L485
  3087. NOP
  3088. MADPS C11, C11, A1, B1
  3089. MADPS C21, C21, A2, B1
  3090. gsLQC1(R13, F13, F12, 1) # B3 B4
  3091. MADPS C12, C12, A1, B2
  3092. MADPS C22, C22, A2, B2
  3093. gsLQC1(R12, F5, F4, 2) # A5 A6
  3094. MADPS C31, C31, A3, B1
  3095. MADPS C41, C41, A4, B1
  3096. gsLQC1(R12, F7, F6, 3) # A7 A8
  3097. MADPS C32, C32, A3, B2
  3098. MADPS C42, C42, A4, B2
  3099. FETCH $0, 0 * SIZE(PREB)
  3100. MADPS C13, C13, A1, B3
  3101. MADPS C23, C23, A2, B3
  3102. MADPS C33, C33, A3, B3
  3103. MADPS C43, C43, A4, B3
  3104. MADPS C14, C14, A1, B4
  3105. PLU B7, B5, B5
  3106. FETCH $0, 0 * SIZE(PREA)
  3107. MADPS C24, C24, A2, B4
  3108. PLU B8, B6, B6
  3109. FETCH $0, 4 * SIZE(PREA)
  3110. MADPS C34, C34, A3, B4
  3111. MADPS C44, C44, A4, B4
  3112. MADPS C11, C11, A5, B5
  3113. MADPS C21, C21, A6, B5
  3114. gsLQC1(R13, F9, F8, 2) # B1 B2
  3115. MADPS C12, C12, A5, B6
  3116. MADPS C22, C22, A6, B6
  3117. gsLQC1(R12, F1, F0, 4) # A1 A2
  3118. MADPS C31, C31, A7, B5
  3119. MADPS C41, C41, A8, B5
  3120. gsLQC1(R12, F3, F2, 5) # A3 A4
  3121. MADPS C32, C32, A7, B6
  3122. MADPS C42, C42, A8, B6
  3123. FETCH $0, 4 * SIZE(PREB)
  3124. MADPS C13, C13, A5, B7
  3125. MADPS C23, C23, A6, B7
  3126. MADPS C33, C33, A7, B7
  3127. MADPS C43, C43, A8, B7
  3128. MADPS C14, C14, A5, B8
  3129. PLU B3, B1, B1
  3130. FETCH $0, 8 * SIZE(PREA)
  3131. MADPS C24, C24, A6, B8
  3132. PLU B4, B2, B2
  3133. FETCH $0, 12 * SIZE(PREA)
  3134. MADPS C34, C34, A7, B8
  3135. MADPS C44, C44, A8, B8
  3136. MADPS C11, C11, A1, B1
  3137. MADPS C21, C21, A2, B1
  3138. gsLQC1(R13, F13, F12, 3) # B3 B4
  3139. MADPS C12, C12, A1, B2
  3140. MADPS C22, C22, A2, B2
  3141. gsLQC1(R12, F5, F4, 6) # A5 A6
  3142. MADPS C31, C31, A3, B1
  3143. MADPS C41, C41, A4, B1
  3144. gsLQC1(R12, F7, F6, 7) # A7 A8
  3145. MADPS C32, C32, A3, B2
  3146. MADPS C42, C42, A4, B2
  3147. FETCH $0, 8 * SIZE(PREB)
  3148. MADPS C13, C13, A1, B3
  3149. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  3150. MADPS C23, C23, A2, B3
  3151. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  3152. MADPS C33, C33, A3, B3
  3153. MADPS C43, C43, A4, B3
  3154. MADPS C14, C14, A1, B4
  3155. PLU B7, B5, B5
  3156. FETCH $0, 16 * SIZE(PREA)
  3157. MADPS C24, C24, A2, B4
  3158. PLU B8, B6, B6
  3159. FETCH $0, 20 * SIZE(PREA)
  3160. MADPS C34, C34, A3, B4
  3161. MADPS C44, C44, A4, B4
  3162. MADPS C11, C11, A5, B5
  3163. MADPS C21, C21, A6, B5
  3164. gsLQC1(R13, F9, F8, 0) # B1 B2
  3165. MADPS C12, C12, A5, B6
  3166. MADPS C22, C22, A6, B6
  3167. gsLQC1(R12, F1, F0, 0) # A1 A2
  3168. MADPS C31, C31, A7, B5
  3169. MADPS C41, C41, A8, B5
  3170. gsLQC1(R12, F3, F2, 1) # A3 A4
  3171. MADPS C32, C32, A7, B6
  3172. MADPS C42, C42, A8, B6
  3173. FETCH $0, 12 * SIZE(PREB)
  3174. MADPS C13, C13, A5, B7
  3175. MADPS C23, C23, A6, B7
  3176. daddiu PREB, PREB, 16 * SIZE
  3177. MADPS C33, C33, A7, B7
  3178. MADPS C43, C43, A8, B7
  3179. MADPS C14, C14, A5, B8
  3180. PLU B3, B1, B1
  3181. FETCH $0, 24 * SIZE(PREA)
  3182. MADPS C24, C24, A6, B8
  3183. PLU B4, B2, B2
  3184. FETCH $0, 28 * SIZE(PREA)
  3185. daddiu PREA, PREA, 32 * SIZE
  3186. MADPS C34, C34, A7, B8
  3187. MADPS C44, C44, A8, B8
  3188. MADPS C11, C11, A1, B1
  3189. MADPS C21, C21, A2, B1
  3190. gsLQC1(R13, F13, F12, 1) # B3 B4
  3191. MADPS C12, C12, A1, B2
  3192. MADPS C22, C22, A2, B2
  3193. gsLQC1(R12, F5, F4, 2) # A5 A6
  3194. MADPS C31, C31, A3, B1
  3195. MADPS C41, C41, A4, B1
  3196. gsLQC1(R12, F7, F6, 3) # A7 A8
  3197. MADPS C32, C32, A3, B2
  3198. MADPS C42, C42, A4, B2
  3199. FETCH $0, 0 * SIZE(PREB)
  3200. MADPS C13, C13, A1, B3
  3201. MADPS C23, C23, A2, B3
  3202. MADPS C33, C33, A3, B3
  3203. MADPS C43, C43, A4, B3
  3204. MADPS C14, C14, A1, B4
  3205. PLU B7, B5, B5
  3206. FETCH $0, 0 * SIZE(PREA)
  3207. MADPS C24, C24, A2, B4
  3208. PLU B8, B6, B6
  3209. FETCH $0, 4 * SIZE(PREA)
  3210. MADPS C34, C34, A3, B4
  3211. MADPS C44, C44, A4, B4
  3212. MADPS C11, C11, A5, B5
  3213. MADPS C21, C21, A6, B5
  3214. gsLQC1(R13, F9, F8, 2) # B1 B2
  3215. MADPS C12, C12, A5, B6
  3216. MADPS C22, C22, A6, B6
  3217. gsLQC1(R12, F1, F0, 4) # A1 A2
  3218. MADPS C31, C31, A7, B5
  3219. MADPS C41, C41, A8, B5
  3220. gsLQC1(R12, F3, F2, 5) # A3 A4
  3221. MADPS C32, C32, A7, B6
  3222. MADPS C42, C42, A8, B6
  3223. FETCH $0, 4 * SIZE(PREB)
  3224. MADPS C13, C13, A5, B7
  3225. MADPS C23, C23, A6, B7
  3226. MADPS C33, C33, A7, B7
  3227. MADPS C43, C43, A8, B7
  3228. MADPS C14, C14, A5, B8
  3229. PLU B3, B1, B1
  3230. FETCH $0, 8 * SIZE(PREA)
  3231. MADPS C24, C24, A6, B8
  3232. PLU B4, B2, B2
  3233. FETCH $0, 12 * SIZE(PREA)
  3234. MADPS C34, C34, A7, B8
  3235. MADPS C44, C44, A8, B8
  3236. MADPS C11, C11, A1, B1
  3237. MADPS C21, C21, A2, B1
  3238. gsLQC1(R13, F13, F12, 3) # B3 B4
  3239. MADPS C12, C12, A1, B2
  3240. MADPS C22, C22, A2, B2
  3241. gsLQC1(R12, F5, F4, 6) # A5 A6
  3242. MADPS C31, C31, A3, B1
  3243. MADPS C41, C41, A4, B1
  3244. gsLQC1(R12, F7, F6, 7) # A7 A8
  3245. MADPS C32, C32, A3, B2
  3246. MADPS C42, C42, A4, B2
  3247. FETCH $0, 8 * SIZE(PREB)
  3248. MADPS C13, C13, A1, B3
  3249. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  3250. MADPS C23, C23, A2, B3
  3251. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  3252. MADPS C33, C33, A3, B3
  3253. MADPS C43, C43, A4, B3
  3254. MADPS C14, C14, A1, B4
  3255. PLU B7, B5, B5
  3256. FETCH $0, 16 * SIZE(PREA)
  3257. MADPS C24, C24, A2, B4
  3258. PLU B8, B6, B6
  3259. FETCH $0, 20 * SIZE(PREA)
  3260. MADPS C34, C34, A3, B4
  3261. MADPS C44, C44, A4, B4
  3262. MADPS C11, C11, A5, B5
  3263. MADPS C21, C21, A6, B5
  3264. gsLQC1(R13, F9, F8, 0) # B1 B2
  3265. MADPS C12, C12, A5, B6
  3266. MADPS C22, C22, A6, B6
  3267. gsLQC1(R12, F1, F0, 0) # A1 A2
  3268. MADPS C31, C31, A7, B5
  3269. MADPS C41, C41, A8, B5
  3270. gsLQC1(R12, F3, F2, 1) # A3 A4
  3271. MADPS C32, C32, A7, B6
  3272. MADPS C42, C42, A8, B6
  3273. FETCH $0, 12 * SIZE(PREB)
  3274. MADPS C13, C13, A5, B7
  3275. MADPS C23, C23, A6, B7
  3276. daddiu PREB, PREB, 16 * SIZE
  3277. MADPS C33, C33, A7, B7
  3278. MADPS C43, C43, A8, B7
  3279. MADPS C14, C14, A5, B8
  3280. PLU B3, B1, B1
  3281. FETCH $0, 24 * SIZE(PREA)
  3282. MADPS C24, C24, A6, B8
  3283. PLU B4, B2, B2
  3284. FETCH $0, 28 * SIZE(PREA)
  3285. daddiu PREA, PREA, 32 * SIZE
  3286. MADPS C34, C34, A7, B8
  3287. MADPS C44, C44, A8, B8
  3288. .align 4
  3289. .L485:
  3290. #ifndef TRMMKERNEL
  3291. andi L, K, 4
  3292. #else
  3293. andi L, TEMP, 4
  3294. #endif
  3295. blez L, .L486
  3296. NOP
  3297. MADPS C11, C11, A1, B1
  3298. MADPS C21, C21, A2, B1
  3299. gsLQC1(R13, F13, F12, 1) # B3 B4
  3300. MADPS C12, C12, A1, B2
  3301. MADPS C22, C22, A2, B2
  3302. gsLQC1(R12, F5, F4, 2) # A5 A6
  3303. MADPS C31, C31, A3, B1
  3304. MADPS C41, C41, A4, B1
  3305. gsLQC1(R12, F7, F6, 3) # A7 A8
  3306. MADPS C32, C32, A3, B2
  3307. MADPS C42, C42, A4, B2
  3308. FETCH $0, 0 * SIZE(PREB)
  3309. MADPS C13, C13, A1, B3
  3310. MADPS C23, C23, A2, B3
  3311. MADPS C33, C33, A3, B3
  3312. MADPS C43, C43, A4, B3
  3313. MADPS C14, C14, A1, B4
  3314. PLU B7, B5, B5
  3315. FETCH $0, 0 * SIZE(PREA)
  3316. MADPS C24, C24, A2, B4
  3317. PLU B8, B6, B6
  3318. FETCH $0, 4 * SIZE(PREA)
  3319. MADPS C34, C34, A3, B4
  3320. MADPS C44, C44, A4, B4
  3321. MADPS C11, C11, A5, B5
  3322. MADPS C21, C21, A6, B5
  3323. gsLQC1(R13, F9, F8, 2) # B1 B2
  3324. MADPS C12, C12, A5, B6
  3325. MADPS C22, C22, A6, B6
  3326. gsLQC1(R12, F1, F0, 4) # A1 A2
  3327. MADPS C31, C31, A7, B5
  3328. MADPS C41, C41, A8, B5
  3329. gsLQC1(R12, F3, F2, 5) # A3 A4
  3330. MADPS C32, C32, A7, B6
  3331. MADPS C42, C42, A8, B6
  3332. FETCH $0, 4 * SIZE(PREB)
  3333. MADPS C13, C13, A5, B7
  3334. MADPS C23, C23, A6, B7
  3335. MADPS C33, C33, A7, B7
  3336. MADPS C43, C43, A8, B7
  3337. MADPS C14, C14, A5, B8
  3338. PLU B3, B1, B1
  3339. FETCH $0, 8 * SIZE(PREA)
  3340. MADPS C24, C24, A6, B8
  3341. PLU B4, B2, B2
  3342. FETCH $0, 12 * SIZE(PREA)
  3343. MADPS C34, C34, A7, B8
  3344. MADPS C44, C44, A8, B8
  3345. MADPS C11, C11, A1, B1
  3346. MADPS C21, C21, A2, B1
  3347. gsLQC1(R13, F13, F12, 3) # B3 B4
  3348. MADPS C12, C12, A1, B2
  3349. MADPS C22, C22, A2, B2
  3350. gsLQC1(R12, F5, F4, 6) # A5 A6
  3351. MADPS C31, C31, A3, B1
  3352. MADPS C41, C41, A4, B1
  3353. gsLQC1(R12, F7, F6, 7) # A7 A8
  3354. MADPS C32, C32, A3, B2
  3355. MADPS C42, C42, A4, B2
  3356. FETCH $0, 8 * SIZE(PREB)
  3357. MADPS C13, C13, A1, B3
  3358. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  3359. MADPS C23, C23, A2, B3
  3360. daddiu AO, AO, 32 * SIZE # 4KR*8MR
  3361. MADPS C33, C33, A3, B3
  3362. MADPS C43, C43, A4, B3
  3363. MADPS C14, C14, A1, B4
  3364. PLU B7, B5, B5
  3365. FETCH $0, 16 * SIZE(PREA)
  3366. MADPS C24, C24, A2, B4
  3367. PLU B8, B6, B6
  3368. FETCH $0, 20 * SIZE(PREA)
  3369. MADPS C34, C34, A3, B4
  3370. MADPS C44, C44, A4, B4
  3371. MADPS C11, C11, A5, B5
  3372. MADPS C21, C21, A6, B5
  3373. gsLQC1(R13, F9, F8, 0) # B1 B2
  3374. MADPS C12, C12, A5, B6
  3375. MADPS C22, C22, A6, B6
  3376. gsLQC1(R12, F1, F0, 0) # A1 A2
  3377. MADPS C31, C31, A7, B5
  3378. MADPS C41, C41, A8, B5
  3379. gsLQC1(R12, F3, F2, 1) # A3 A4
  3380. MADPS C32, C32, A7, B6
  3381. MADPS C42, C42, A8, B6
  3382. FETCH $0, 12 * SIZE(PREB)
  3383. MADPS C13, C13, A5, B7
  3384. MADPS C23, C23, A6, B7
  3385. daddiu PREB, PREB, 16 * SIZE
  3386. MADPS C33, C33, A7, B7
  3387. MADPS C43, C43, A8, B7
  3388. MADPS C14, C14, A5, B8
  3389. PLU B3, B1, B1
  3390. FETCH $0, 24 * SIZE(PREA)
  3391. MADPS C24, C24, A6, B8
  3392. PLU B4, B2, B2
  3393. FETCH $0, 28 * SIZE(PREA)
  3394. daddiu PREA, PREA, 32 * SIZE
  3395. MADPS C34, C34, A7, B8
  3396. MADPS C44, C44, A8, B8
  3397. .align 4
  3398. .L486:
  3399. #ifndef TRMMKERNEL
  3400. andi L, K, 2
  3401. #else
  3402. andi L, TEMP, 2
  3403. #endif
  3404. blez L, .L487
  3405. NOP
  3406. MADPS C11, C11, A1, B1
  3407. MADPS C21, C21, A2, B1
  3408. gsLQC1(R13, F13, F12, 1) # B3 B4
  3409. MADPS C12, C12, A1, B2
  3410. MADPS C22, C22, A2, B2
  3411. gsLQC1(R12, F5, F4, 2) # A5 A6
  3412. MADPS C31, C31, A3, B1
  3413. MADPS C41, C41, A4, B1
  3414. gsLQC1(R12, F7, F6, 3) # A7 A8
  3415. MADPS C32, C32, A3, B2
  3416. MADPS C42, C42, A4, B2
  3417. FETCH $0, 0 * SIZE(PREB)
  3418. MADPS C13, C13, A1, B3
  3419. daddiu BO, BO, 8 * SIZE # 4KR*4NR
  3420. MADPS C23, C23, A2, B3
  3421. daddiu AO, AO, 16 * SIZE # 4KR*8MR
  3422. MADPS C33, C33, A3, B3
  3423. MADPS C43, C43, A4, B3
  3424. MADPS C14, C14, A1, B4
  3425. PLU B7, B5, B5
  3426. FETCH $0, 0 * SIZE(PREA)
  3427. MADPS C24, C24, A2, B4
  3428. PLU B8, B6, B6
  3429. FETCH $0, 4 * SIZE(PREA)
  3430. MADPS C34, C34, A3, B4
  3431. MADPS C44, C44, A4, B4
  3432. MADPS C11, C11, A5, B5
  3433. MADPS C21, C21, A6, B5
  3434. gsLQC1(R13, F9, F8, 0) # B1 B2
  3435. MADPS C12, C12, A5, B6
  3436. MADPS C22, C22, A6, B6
  3437. gsLQC1(R12, F1, F0, 0) # A1 A2
  3438. MADPS C31, C31, A7, B5
  3439. MADPS C41, C41, A8, B5
  3440. gsLQC1(R12, F3, F2, 1) # A3 A4
  3441. MADPS C32, C32, A7, B6
  3442. MADPS C42, C42, A8, B6
  3443. FETCH $0, 4 * SIZE(PREB)
  3444. MADPS C13, C13, A5, B7
  3445. MADPS C23, C23, A6, B7
  3446. daddiu PREB, PREB, 8 * SIZE
  3447. MADPS C33, C33, A7, B7
  3448. MADPS C43, C43, A8, B7
  3449. MADPS C14, C14, A5, B8
  3450. PLU B3, B1, B1
  3451. FETCH $0, 8 * SIZE(PREA)
  3452. MADPS C24, C24, A6, B8
  3453. PLU B4, B2, B2
  3454. FETCH $0, 12 * SIZE(PREA)
  3455. MADPS C34, C34, A7, B8
  3456. MADPS C44, C44, A8, B8
  3457. daddiu PREA, PREA, 16 * SIZE
  3458. .align 4
  3459. .L487:
  3460. #ifndef TRMMKERNEL
  3461. andi L, K, 1
  3462. #else
  3463. andi L, TEMP, 1
  3464. #endif
  3465. blez L, .L480
  3466. LD ALPHA, 152($sp)
  3467. MADPS C11, C11, A1, B1
  3468. MADPS C21, C21, A2, B1
  3469. MADPS C12, C12, A1, B2
  3470. MADPS C22, C22, A2, B2
  3471. MADPS C31, C31, A3, B1
  3472. MADPS C41, C41, A4, B1
  3473. MADPS C32, C32, A3, B2
  3474. MADPS C42, C42, A4, B2
  3475. MADPS C13, C13, A1, B3
  3476. daddiu BO, BO, 4 * SIZE # 4KR*4NR
  3477. MADPS C23, C23, A2, B3
  3478. daddiu AO, AO, 8 * SIZE # 4KR*8MR
  3479. MADPS C33, C33, A3, B3
  3480. MADPS C43, C43, A4, B3
  3481. MADPS C14, C14, A1, B4
  3482. MADPS C24, C24, A2, B4
  3483. MADPS C34, C34, A3, B4
  3484. MADPS C44, C44, A4, B4
  3485. .align 4
  3486. .L480: # Write Back
  3487. #ifndef TRMMKERNEL
  3488. daddiu I, I, -1
  3489. CVTU A1, C13 # A1=C13.upper=c12
  3490. CVTU A2, C11 # A2=C11.upper=c22
  3491. CVTU A3, C23 # A3=C23.upper=c14
  3492. LD B1, 1 * SIZE(CO1)
  3493. CVTU A4, C21 # A4=C21.upper=c24
  3494. LD B2, 1 * SIZE(CO2)
  3495. CVTU A5, C33 # A5=C33.upper=c16
  3496. LD B3, 3 * SIZE(CO1)
  3497. CVTU A6, C31 # A6=C31.upper=c26
  3498. LD B4, 3 * SIZE(CO2)
  3499. CVTU A7, C43 # A7=C43.upper=c18
  3500. LD B5, 5 * SIZE(CO1)
  3501. CVTU A8, C41 # A8=C41.upper=c28
  3502. LD B6, 5 * SIZE(CO2)
  3503. MADD A1, B1, A1, ALPHA # c12
  3504. LD B7, 7 * SIZE(CO1)
  3505. MADD A2, B2, A2, ALPHA # c22
  3506. LD B1, 7 * SIZE(CO2)
  3507. MADD A3, B3, A3, ALPHA # c14
  3508. LD B2, 0 * SIZE(CO1)
  3509. MADD A4, B4, A4, ALPHA # c24
  3510. LD B3, 0 * SIZE(CO2)
  3511. MADD A5, B5, A5, ALPHA # c16
  3512. LD B4, 2 * SIZE(CO1)
  3513. MADD A6, B6, A6, ALPHA # c26
  3514. LD B5, 2 * SIZE(CO2)
  3515. MADD A7, B7, A7, ALPHA # c18
  3516. LD B6, 4 * SIZE(CO1)
  3517. MADD A8, B1, A8, ALPHA # c28
  3518. ST A1, 1 * SIZE(CO1)
  3519. MADD C11, B2, C11, ALPHA # c12
  3520. LD B7, 4 * SIZE(CO2)
  3521. MADD C13, B3, C13, ALPHA # c22
  3522. ST A2, 1 * SIZE(CO2)
  3523. MADD C21, B4, C21, ALPHA # c14
  3524. LD A1, 6 * SIZE(CO1)
  3525. MADD C23, B5, C23, ALPHA # c24
  3526. ST A3, 3 * SIZE(CO1)
  3527. MADD C31, B6, C31, ALPHA # c16
  3528. LD A2, 6 * SIZE(CO2)
  3529. MADD C33, B7, C33, ALPHA # c26
  3530. ST A4, 3 * SIZE(CO2)
  3531. ST A5, 5 * SIZE(CO1)
  3532. ST A6, 5 * SIZE(CO2)
  3533. ST A7, 7 * SIZE(CO1)
  3534. ST A8, 7 * SIZE(CO2)
  3535. MADD C41, A1, C41, ALPHA # c18
  3536. ST C11, 0 * SIZE(CO1)
  3537. MADD C43, A2, C43, ALPHA # c28
  3538. ST C13, 0 * SIZE(CO2)
  3539. ST C21, 2 * SIZE(CO1)
  3540. ST C23, 2 * SIZE(CO2)
  3541. ST C31, 4 * SIZE(CO1)
  3542. ST C33, 4 * SIZE(CO2)
  3543. ST C41, 6 * SIZE(CO1)
  3544. CVTU A1, C14 # B1=C12.upper=c42
  3545. ST C43, 6 * SIZE(CO2)
  3546. CVTU A2, C12 # B2=C14.upper=c32
  3547. LD B1, 1 * SIZE(CO3)
  3548. CVTU A3, C24 # B3=C22.upper=c44
  3549. LD B2, 1 * SIZE(CO4)
  3550. CVTU A4, C22 # B4=C24.upper=c34
  3551. LD B3, 3 * SIZE(CO3)
  3552. CVTU A5, C34 # B5=C32.upper=c46
  3553. LD B4, 3 * SIZE(CO4)
  3554. CVTU A6, C32 # B6=C24.upper=c36
  3555. LD B5, 5 * SIZE(CO3)
  3556. CVTU A7, C44 # B7=C42.upper=c48
  3557. LD B6, 5 * SIZE(CO4)
  3558. CVTU A8, C42 # A1=C44.upper=c38
  3559. LD B7, 7 * SIZE(CO3)
  3560. MADD A1, B1, A1, ALPHA # c31
  3561. LD C11, 7 * SIZE(CO4)
  3562. MADD A2, B2, A2, ALPHA
  3563. LD C13, 0 * SIZE(CO3)
  3564. MADD A3, B3, A3, ALPHA
  3565. LD C21, 0 * SIZE(CO4)
  3566. MADD A4, B4, A4, ALPHA
  3567. LD C23, 2 * SIZE(CO3)
  3568. MADD A5, B5, A5, ALPHA
  3569. LD C31, 2 * SIZE(CO4)
  3570. MADD A6, B6, A6, ALPHA
  3571. LD C33, 4 * SIZE(CO3)
  3572. MADD A7, B7, A7, ALPHA
  3573. LD C41, 4 * SIZE(CO4)
  3574. MADD A8, C11, A8, ALPHA
  3575. ST A1, 1 * SIZE(CO3)
  3576. MADD C12, C13, C12, ALPHA
  3577. LD C43, 6 * SIZE(CO3)
  3578. MADD C14, C21, C14, ALPHA
  3579. ST A2, 1 * SIZE(CO4)
  3580. MADD C22, C23, C22, ALPHA
  3581. LD B1, 6 * SIZE(CO4)
  3582. MADD C24, C31, C24, ALPHA
  3583. ST A3, 3 * SIZE(CO3)
  3584. MADD C32, C33, C32, ALPHA
  3585. ST A4, 3 * SIZE(CO4)
  3586. MADD C34, C41, C34, ALPHA
  3587. ST A5, 5 * SIZE(CO3)
  3588. MADD C42, C43, C42, ALPHA
  3589. ST A6, 5 * SIZE(CO4)
  3590. ST A7, 7 * SIZE(CO3)
  3591. NOP
  3592. MADD C44, B1, C44, ALPHA
  3593. ST A8, 7 * SIZE(CO4)
  3594. ST C12, 0 * SIZE(CO3)
  3595. ST C14, 0 * SIZE(CO4)
  3596. ST C22, 2 * SIZE(CO3)
  3597. ST C24, 2 * SIZE(CO4)
  3598. ST C32, 4 * SIZE(CO3)
  3599. ST C34, 4 * SIZE(CO4)
  3600. ST C42, 6 * SIZE(CO3)
  3601. ST C44, 6 * SIZE(CO4)
  3602. daddiu CO1, CO1, 8 * SIZE
  3603. daddiu CO2, CO2, 8 * SIZE
  3604. daddiu CO3, CO3, 8 * SIZE
  3605. bgtz I, .L481
  3606. daddiu CO4, CO4, 8 * SIZE
  3607. #else
  3608. daddiu I, I, -1
  3609. CVTU A1, C13 # A1=C13.upper=c12
  3610. CVTU A2, C11 # A2=C11.upper=c22
  3611. CVTU A3, C23 # A3=C23.upper=c14
  3612. CVTU A4, C21 # A4=C21.upper=c24
  3613. CVTU A5, C33 # A5=C33.upper=c16
  3614. CVTU A6, C31 # A6=C31.upper=c26
  3615. CVTU A7, C43 # A7=C43.upper=c18
  3616. CVTU A8, C41 # A8=C41.upper=c28
  3617. MUL A1, A1, ALPHA # c12
  3618. MUL A2, A2, ALPHA # c22
  3619. MUL A3, A3, ALPHA # c14
  3620. MUL A4, A4, ALPHA # c24
  3621. MUL A5, A5, ALPHA # c16
  3622. MUL A6, A6, ALPHA # c26
  3623. MUL A7, A7, ALPHA # c18
  3624. MUL A8, A8, ALPHA # c28
  3625. MUL C11, C11, ALPHA # c12
  3626. ST A1, 1 * SIZE(CO1)
  3627. MUL C13, C13, ALPHA # c22
  3628. ST A2, 1 * SIZE(CO2)
  3629. MUL C21, C21, ALPHA # c14
  3630. ST A3, 3 * SIZE(CO1)
  3631. MUL C23, C23, ALPHA # c24
  3632. ST A4, 3 * SIZE(CO2)
  3633. MUL C31, C31, ALPHA # c16
  3634. ST A5, 5 * SIZE(CO1)
  3635. MUL C33, C33, ALPHA # c26
  3636. ST A6, 5 * SIZE(CO2)
  3637. MUL C41, C41, ALPHA # c18
  3638. ST A7, 7 * SIZE(CO1)
  3639. MUL C43, C43, ALPHA # c28
  3640. ST A8, 7 * SIZE(CO2)
  3641. CVTU A1, C14 # B1=C12.upper=c42
  3642. ST C11, 0 * SIZE(CO1)
  3643. CVTU A2, C12 # B2=C14.upper=c32
  3644. ST C13, 0 * SIZE(CO2)
  3645. CVTU A3, C24 # B3=C22.upper=c44
  3646. ST C21, 2 * SIZE(CO1)
  3647. CVTU A4, C22 # B4=C24.upper=c34
  3648. ST C23, 2 * SIZE(CO2)
  3649. CVTU A5, C34 # B5=C32.upper=c46
  3650. ST C31, 4 * SIZE(CO1)
  3651. CVTU A6, C32 # B6=C24.upper=c36
  3652. ST C33, 4 * SIZE(CO2)
  3653. CVTU A7, C44 # B7=C42.upper=c48
  3654. ST C41, 6 * SIZE(CO1)
  3655. CVTU A8, C42 # A1=C44.upper=c38
  3656. ST C43, 6 * SIZE(CO2)
  3657. MUL A1, A1, ALPHA # c31
  3658. MUL A2, A2, ALPHA
  3659. MUL A3, A3, ALPHA
  3660. MUL A4, A4, ALPHA
  3661. MUL A5, A5, ALPHA
  3662. MUL A6, A6, ALPHA
  3663. MUL A7, A7, ALPHA
  3664. MUL A8, A8, ALPHA
  3665. MUL C12, C12, ALPHA
  3666. ST A1, 1 * SIZE(CO3)
  3667. MUL C14, C14, ALPHA
  3668. ST A2, 1 * SIZE(CO4)
  3669. MUL C22, C22, ALPHA
  3670. ST A3, 3 * SIZE(CO3)
  3671. MUL C24, C24, ALPHA
  3672. ST A4, 3 * SIZE(CO4)
  3673. MUL C32, C32, ALPHA
  3674. ST A5, 5 * SIZE(CO3)
  3675. MUL C34, C34, ALPHA
  3676. ST A6, 5 * SIZE(CO4)
  3677. MUL C42, C42, ALPHA
  3678. ST A7, 7 * SIZE(CO3)
  3679. MUL C44, C44, ALPHA
  3680. ST A8, 7 * SIZE(CO4)
  3681. ST C12, 0 * SIZE(CO3)
  3682. ST C14, 0 * SIZE(CO4)
  3683. ST C22, 2 * SIZE(CO3)
  3684. ST C24, 2 * SIZE(CO4)
  3685. ST C32, 4 * SIZE(CO3)
  3686. ST C34, 4 * SIZE(CO4)
  3687. ST C42, 6 * SIZE(CO3)
  3688. ST C44, 6 * SIZE(CO4)
  3689. daddiu CO1, CO1, 8 * SIZE
  3690. daddiu CO2, CO2, 8 * SIZE
  3691. daddiu CO3, CO3, 8 * SIZE
  3692. daddiu CO4, CO4, 8 * SIZE
  3693. #if ( defined(LEFT) && defined(TRANSA)) ||\
  3694. (!defined(LEFT) && !defined(TRANSA))
  3695. dsubu TEMP, K, KK
  3696. #ifdef LEFT
  3697. daddiu TEMP, TEMP, -8
  3698. #else
  3699. daddiu TEMP, TEMP, -4
  3700. #endif
  3701. dsll L, TEMP, 3 + BASE_SHIFT
  3702. dsll TEMP, TEMP, 2 + BASE_SHIFT
  3703. daddu AO, AO, L
  3704. daddu BO, BO, TEMP
  3705. #endif
  3706. #ifdef LEFT
  3707. daddiu KK, KK, 8
  3708. #endif
  3709. bgtz I, .L481
  3710. NOP
  3711. #endif
  3712. .align 4
  3713. .L44:
  3714. andi I, M, 4 # MR=4
  3715. blez I, .L42
  3716. NOP
  3717. .align 4
  3718. .L441:
  3719. #if defined(TRMMKERNEL)
  3720. #if (defined(LEFT) && defined(TRANSA)) ||\
  3721. (!defined(LEFT) && !defined(TRANSA))
  3722. move BO, B # Reset B
  3723. #else
  3724. dsll L, KK, 2 + BASE_SHIFT
  3725. dsll TEMP, KK, 2 + BASE_SHIFT
  3726. daddu AO, AO, L
  3727. daddu BO, B, TEMP
  3728. #endif
  3729. MTC $0, C11 # CLEAR REAULTS REGISTERS
  3730. MOV C12, C11
  3731. dsll PREB, K, BASE_SHIFT
  3732. MOV C21, C11
  3733. MOV C22, C11
  3734. MOV C31, C11
  3735. MOV C32, C11
  3736. gsLQC1(R13, F9, F8, 0) # B1 B2
  3737. MOV C41, C11
  3738. MOV C42, C11
  3739. gsLQC1(R12, F1, F0, 0) # A1 A2
  3740. MOV C13, C11
  3741. MOV C14, C11
  3742. MOV C23, C11
  3743. FETCH $0, 0 * SIZE(CO1)
  3744. MOV C24, C11
  3745. MOV C33, C11
  3746. FETCH $0, 0 * SIZE(CO2)
  3747. MOV C34, C11
  3748. daddu PREB, B, PREB
  3749. MOV C43, C11
  3750. FETCH $0, 0 * SIZE(CO3)
  3751. MOV C44, C11
  3752. PLU B3, B1, B1
  3753. FETCH $0, 0 * SIZE(CO4)
  3754. PLU B4, B2, B2
  3755. #if (defined(LEFT) && !defined(TRANSA)) ||\
  3756. (!defined(LEFT) && defined(TRANSA))
  3757. dsubu TEMP, K, KK
  3758. #elif defined(LEFT)
  3759. daddu TEMP, KK, 4
  3760. #else
  3761. daddu TEMP, KK, 4
  3762. #endif
  3763. dsra L, TEMP, 2
  3764. blez L, .L442
  3765. NOP
  3766. #else
  3767. move BO, B # Reset B
  3768. dsra L, K, 2 # UnRoll K=4
  3769. MTC $0, C11 # CLEAR REAULTS REGISTERS
  3770. MOV C12, C11
  3771. dsll PREB, K, BASE_SHIFT
  3772. MOV C21, C11
  3773. MOV C22, C11
  3774. MOV C31, C11
  3775. MOV C32, C11
  3776. gsLQC1(R13, F9, F8, 0) # B1 B2
  3777. MOV C41, C11
  3778. MOV C42, C11
  3779. gsLQC1(R12, F1, F0, 0) # A1 A2
  3780. MOV C13, C11
  3781. MOV C14, C11
  3782. MOV C23, C11
  3783. FETCH $0, 0 * SIZE(CO1)
  3784. MOV C24, C11
  3785. MOV C33, C11
  3786. FETCH $0, 0 * SIZE(CO2)
  3787. MOV C34, C11
  3788. daddu PREB, B, PREB
  3789. MOV C43, C11
  3790. FETCH $0, 0 * SIZE(CO3)
  3791. MOV C44, C11
  3792. PLU B3, B1, B1
  3793. FETCH $0, 0 * SIZE(CO4)
  3794. blez L, .L442
  3795. PLU B4, B2, B2
  3796. #endif
  3797. .L4410: #
  3798. daddiu L, L, -1
  3799. MADPS C11, C11, A1, B1
  3800. gsLQC1(R13, F13, F12, 1) # B3 B4
  3801. MADPS C21, C21, A2, B1
  3802. gsLQC1(R12, F3, F2, 1) # A3 A4
  3803. MADPS C12, C12, A1, B2
  3804. FETCH $0, 0 * SIZE(PREB)
  3805. MADPS C22, C22, A2, B2
  3806. FETCH $0, 0 * SIZE(PREA)
  3807. MADPS C13, C13, A1, B3
  3808. MADPS C23, C23, A2, B3
  3809. MADPS C14, C14, A1, B4
  3810. MADPS C24, C24, A2, B4
  3811. PLU B7, B5, B5
  3812. PLU B8, B6, B6
  3813. MADPS C11, C11, A3, B5
  3814. gsLQC1(R13, F9, F8, 2) # B1 B2
  3815. MADPS C21, C21, A4, B5
  3816. gsLQC1(R12, F5, F4, 2) # A5 A6
  3817. MADPS C12, C12, A3, B6
  3818. FETCH $0, 4 * SIZE(PREB)
  3819. MADPS C22, C22, A4, B6
  3820. FETCH $0, 4 * SIZE(PREA)
  3821. MADPS C13, C13, A3, B7
  3822. MADPS C23, C23, A4, B7
  3823. MADPS C14, C14, A3, B8
  3824. MADPS C24, C24, A4, B8
  3825. PLU B3, B1, B1
  3826. PLU B4, B2, B2
  3827. MADPS C11, C11, A5, B1
  3828. gsLQC1(R13, F13, F12, 3) # B3 B4
  3829. MADPS C21, C21, A6, B1
  3830. gsLQC1(R12, F7, F6, 3) # A7 A8
  3831. MADPS C12, C12, A5, B2
  3832. FETCH $0, 8 * SIZE(PREB)
  3833. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  3834. MADPS C22, C22, A6, B2
  3835. FETCH $0, 8 * SIZE(PREA)
  3836. daddiu AO, AO, 16 * SIZE # 4KR*4MR
  3837. MADPS C13, C13, A5, B3
  3838. MADPS C23, C23, A6, B3
  3839. MADPS C14, C14, A5, B4
  3840. MADPS C24, C24, A6, B4
  3841. PLU B7, B5, B5
  3842. PLU B8, B6, B6
  3843. MADPS C11, C11, A7, B5
  3844. gsLQC1(R13, F9, F8, 0) # B1 B2
  3845. MADPS C21, C21, A8, B5
  3846. gsLQC1(R12, F1, F0, 0) # A1 A2
  3847. MADPS C12, C12, A7, B6
  3848. FETCH $0, 12 * SIZE(PREB)
  3849. MADPS C22, C22, A8, B6
  3850. FETCH $0, 12 * SIZE(PREA)
  3851. MADPS C13, C13, A7, B7
  3852. daddiu PREA, PREA, 16 * SIZE
  3853. MADPS C23, C23, A8, B7
  3854. daddiu PREB, PREB, 16 * SIZE
  3855. MADPS C14, C14, A7, B8
  3856. MADPS C24, C24, A8, B8
  3857. PLU B3, B1, B1
  3858. bgtz L, .L4410
  3859. PLU B4, B2, B2
  3860. .align 4
  3861. .L442:
  3862. #ifndef TRMMKERNEL
  3863. andi L, K, 2
  3864. #else
  3865. andi L, TEMP, 2
  3866. #endif
  3867. blez L, .L443
  3868. NOP
  3869. MADPS C11, C11, A1, B1
  3870. gsLQC1(R13, F13, F12, 1) # B3 B4
  3871. MADPS C21, C21, A2, B1
  3872. gsLQC1(R12, F3, F2, 1) # A3 A4
  3873. MADPS C12, C12, A1, B2
  3874. FETCH $0, 0 * SIZE(PREB)
  3875. daddiu BO, BO, 8 * SIZE # 2KR*4NR
  3876. MADPS C22, C22, A2, B2
  3877. FETCH $0, 0 * SIZE(PREA)
  3878. daddiu AO, AO, 8 * SIZE # 2KR*4MR
  3879. MADPS C13, C13, A1, B3
  3880. MADPS C23, C23, A2, B3
  3881. MADPS C14, C14, A1, B4
  3882. MADPS C24, C24, A2, B4
  3883. PLU B7, B5, B5
  3884. PLU B8, B6, B6
  3885. MADPS C11, C11, A3, B5
  3886. gsLQC1(R13, F9, F8, 0) # B1 B2
  3887. MADPS C21, C21, A4, B5
  3888. gsLQC1(R12, F1, F0, 0) # A5 A6
  3889. MADPS C12, C12, A3, B6
  3890. FETCH $0, 4 * SIZE(PREB)
  3891. MADPS C22, C22, A4, B6
  3892. FETCH $0, 4 * SIZE(PREA)
  3893. MADPS C13, C13, A3, B7
  3894. daddiu PREB, PREB, 8
  3895. MADPS C23, C23, A4, B7
  3896. daddiu PREA, PREA, 8
  3897. MADPS C14, C14, A3, B8
  3898. MADPS C24, C24, A4, B8
  3899. PLU B3, B1, B1
  3900. PLU B4, B2, B2
  3901. .align 4
  3902. .L443:
  3903. #ifndef TRMMKERNEL
  3904. andi L, K, 1
  3905. #else
  3906. andi L, TEMP, 1
  3907. #endif
  3908. blez L, .L440
  3909. LD ALPHA, 152($sp)
  3910. MADPS C11, C11, A1, B1
  3911. MADPS C21, C21, A2, B1
  3912. MADPS C12, C12, A1, B2
  3913. daddiu BO, BO, 4 * SIZE # 1KR*4NR
  3914. MADPS C22, C22, A2, B2
  3915. daddiu AO, AO, 4 * SIZE # 1KR*4MR
  3916. MADPS C13, C13, A1, B3
  3917. MADPS C23, C23, A2, B3
  3918. MADPS C14, C14, A1, B4
  3919. MADPS C24, C24, A2, B4
  3920. .align 4
  3921. .L440:
  3922. #ifndef TRMMKERNEL
  3923. CVTU A1, C13 # A1=C13.upper=c12
  3924. LD B1, 1 * SIZE(CO1)
  3925. CVTU A2, C11 # A2=C11.upper=c22
  3926. LD B2, 1 * SIZE(CO2)
  3927. CVTU A3, C23 # A3=C23.upper=c14
  3928. LD B3, 3 * SIZE(CO1)
  3929. CVTU A4, C21 # A4=C21.upper=c24
  3930. LD B4, 3 * SIZE(CO2)
  3931. MADD A1, B1, A1, ALPHA # c12
  3932. LD B5, 0 * SIZE(CO1)
  3933. MADD A2, B2, A2, ALPHA # c22
  3934. LD B6, 0 * SIZE(CO2)
  3935. MADD A3, B3, A3, ALPHA # c14
  3936. LD B7, 2 * SIZE(CO1)
  3937. MADD A4, B4, A4, ALPHA # c24
  3938. LD B1, 2 * SIZE(CO2)
  3939. MADD C11, B5, C11, ALPHA # c12
  3940. ST A1, 1 * SIZE(CO1)
  3941. MADD C13, B6, C13, ALPHA # c22
  3942. ST A2, 1 * SIZE(CO2)
  3943. MADD C21, B7, C21, ALPHA # c14
  3944. ST A3, 3 * SIZE(CO1)
  3945. MADD C23, B1, C23, ALPHA # c24
  3946. ST A4, 3 * SIZE(CO2)
  3947. ST C11, 0 * SIZE(CO1)
  3948. ST C13, 0 * SIZE(CO2)
  3949. ST C21, 2 * SIZE(CO1)
  3950. ST C23, 2 * SIZE(CO2)
  3951. CVTU A1, C14 # B1=C12.upper=c42
  3952. LD B1, 1 * SIZE(CO3)
  3953. CVTU A2, C12 # B2=C14.upper=c32
  3954. LD B2, 1 * SIZE(CO4)
  3955. CVTU A3, C24 # B3=C22.upper=c44
  3956. LD B3, 3 * SIZE(CO3)
  3957. CVTU A4, C22 # B4=C24.upper=c34
  3958. LD B4, 3 * SIZE(CO4)
  3959. MADD A1, B1, A1, ALPHA # c31
  3960. LD A5, 0 * SIZE(CO3)
  3961. MADD A2, B2, A2, ALPHA
  3962. LD A6, 0 * SIZE(CO4)
  3963. MADD A3, B3, A3, ALPHA
  3964. LD A7, 2 * SIZE(CO3)
  3965. MADD A4, B4, A4, ALPHA
  3966. LD A8, 2 * SIZE(CO4)
  3967. MADD C12, A5, C12, ALPHA
  3968. ST A1, 1 * SIZE(CO3)
  3969. MADD C14, A6, C14, ALPHA
  3970. ST A2, 1 * SIZE(CO4)
  3971. MADD C22, A7, C22, ALPHA
  3972. ST A3, 3 * SIZE(CO3)
  3973. MADD C24, A8, C24, ALPHA
  3974. ST A4, 3 * SIZE(CO4)
  3975. ST C12, 0 * SIZE(CO3)
  3976. ST C14, 0 * SIZE(CO4)
  3977. ST C22, 2 * SIZE(CO3)
  3978. ST C24, 2 * SIZE(CO4)
  3979. daddiu CO1, CO1, 4 * SIZE
  3980. daddiu CO2, CO2, 4 * SIZE
  3981. daddiu CO3, CO3, 4 * SIZE
  3982. daddiu CO4, CO4, 4 * SIZE
  3983. #else
  3984. CVTU A1, C13 # A1=C13.upper=c12
  3985. CVTU A2, C11 # A2=C11.upper=c22
  3986. CVTU A3, C23 # A3=C23.upper=c14
  3987. CVTU A4, C21 # A4=C21.upper=c24
  3988. MUL A1, A1, ALPHA # c12
  3989. MUL A2, A2, ALPHA # c22
  3990. MUL A3, A3, ALPHA # c14
  3991. MUL A4, A4, ALPHA # c24
  3992. MUL C11, C11, ALPHA # c12
  3993. ST A1, 1 * SIZE(CO1)
  3994. MUL C13, C13, ALPHA # c22
  3995. ST A2, 1 * SIZE(CO2)
  3996. MUL C21, C21, ALPHA # c14
  3997. ST A3, 3 * SIZE(CO1)
  3998. MUL C23, C23, ALPHA # c24
  3999. ST A4, 3 * SIZE(CO2)
  4000. CVTU A5, C14 # B1=C12.upper=c42
  4001. ST C11, 0 * SIZE(CO1)
  4002. CVTU A6, C12 # B2=C14.upper=c32
  4003. ST C13, 0 * SIZE(CO2)
  4004. CVTU A7, C24 # B3=C22.upper=c44
  4005. ST C21, 2 * SIZE(CO1)
  4006. CVTU A8, C22 # B4=C24.upper=c34
  4007. ST C23, 2 * SIZE(CO2)
  4008. MUL A5, A5, ALPHA # c31
  4009. MUL A6, A6, ALPHA
  4010. MUL A7, A7, ALPHA
  4011. MUL A8, A8, ALPHA
  4012. MUL C12, C12, ALPHA
  4013. ST A5, 1 * SIZE(CO3)
  4014. MUL C14, C14, ALPHA
  4015. ST A6, 1 * SIZE(CO4)
  4016. MUL C22, C22, ALPHA
  4017. ST A7, 3 * SIZE(CO3)
  4018. MUL C24, C24, ALPHA
  4019. ST A8, 3 * SIZE(CO4)
  4020. ST C12, 0 * SIZE(CO3)
  4021. ST C14, 0 * SIZE(CO4)
  4022. ST C22, 2 * SIZE(CO3)
  4023. ST C24, 2 * SIZE(CO4)
  4024. daddiu CO1, CO1, 4 * SIZE
  4025. daddiu CO2, CO2, 4 * SIZE
  4026. daddiu CO3, CO3, 4 * SIZE
  4027. daddiu CO4, CO4, 4 * SIZE
  4028. #if ( defined(LEFT) && defined(TRANSA))||\
  4029. (!defined(LEFT) && !defined(TRANSA))
  4030. dsubu TEMP, K, KK
  4031. #ifdef LEFT
  4032. daddiu TEMP, TEMP, -4
  4033. #else
  4034. daddiu TEMP, TEMP, -4
  4035. #endif
  4036. dsll L, TEMP, 2 + BASE_SHIFT
  4037. dsll TEMP, TEMP, 2 + BASE_SHIFT
  4038. daddu AO, AO, L
  4039. daddu BO, BO, TEMP
  4040. #endif
  4041. #ifdef LEFT
  4042. daddiu KK, KK, 4
  4043. #endif
  4044. #endif
  4045. .align 4
  4046. .L42:
  4047. andi I, M, 2
  4048. blez I, .L41
  4049. NOP
  4050. .align 4
  4051. .L421:
  4052. #if defined(TRMMKERNEL)
  4053. #if (defined(LEFT) && defined(TRANSA)) ||\
  4054. (!defined(LEFT) && !defined(TRANSA))
  4055. move BO, B
  4056. #else
  4057. dsll L, KK, 1 + BASE_SHIFT
  4058. dsll TEMP, KK, 2 + BASE_SHIFT
  4059. daddu AO, AO, L
  4060. daddu BO, B, TEMP
  4061. #endif
  4062. MTC $0, C11 # CLEAR REAULTS REGISTERS
  4063. MOV C12, C11
  4064. MOV C21, C11
  4065. MOV C22, C11
  4066. MOV C31, C11
  4067. MOV C32, C11
  4068. gsLQC1(R13, F9, F8, 0) # B1 B2
  4069. MOV C41, C11
  4070. MOV C42, C11
  4071. gsLQC1(R12, F1, F0, 0) # A1 A2
  4072. MOV C13, C11
  4073. MOV C14, C11
  4074. MOV C23, C11
  4075. FETCH $0, 0 * SIZE(CO1)
  4076. MOV C24, C11
  4077. MOV C33, C11
  4078. FETCH $0, 0 * SIZE(CO2)
  4079. MOV C34, C11
  4080. MOV C43, C11
  4081. FETCH $0, 0 * SIZE(CO3)
  4082. MOV C44, C11
  4083. PLU B3, B1, B1
  4084. FETCH $0, 0 * SIZE(CO4)
  4085. PLU B4, B2, B2
  4086. #if (defined(LEFT) && !defined(TRANSA)) ||\
  4087. (!defined(LEFT) && defined(TRANSA))
  4088. dsubu TEMP, K, KK
  4089. #elif defined(LEFT)
  4090. daddiu TEMP, KK, 2
  4091. #else
  4092. daddiu TEMP, KK, 4
  4093. #endif
  4094. dsra L, TEMP, 2
  4095. blez L, .L422
  4096. NOP
  4097. #else
  4098. move BO, B # Reset B
  4099. dsra L, K, 2 # UnRoll K=4
  4100. MTC $0, C11 # CLEAR REAULTS REGISTERS
  4101. MOV C12, C11
  4102. MOV C21, C11
  4103. MOV C22, C11
  4104. MOV C31, C11
  4105. MOV C32, C11
  4106. gsLQC1(R13, F9, F8, 0) # B1 B2
  4107. MOV C41, C11
  4108. MOV C42, C11
  4109. gsLQC1(R12, F1, F0, 0) # A1 A2
  4110. MOV C13, C11
  4111. MOV C14, C11
  4112. MOV C23, C11
  4113. FETCH $0, 0 * SIZE(CO1)
  4114. MOV C24, C11
  4115. MOV C33, C11
  4116. FETCH $0, 0 * SIZE(CO2)
  4117. MOV C34, C11
  4118. MOV C43, C11
  4119. FETCH $0, 0 * SIZE(CO3)
  4120. MOV C44, C11
  4121. PLU B3, B1, B1
  4122. FETCH $0, 0 * SIZE(CO4)
  4123. blez L, .L422
  4124. PLU B4, B2, B2
  4125. #endif
  4126. .L4210:
  4127. daddiu L, L, -1
  4128. MADPS C11, C11, A1, B1
  4129. MADPS C12, C12, A1, B2
  4130. gsLQC1(R13, F13, F12, 1) # B3 B4
  4131. MADPS C13, C13, A1, B3
  4132. MADPS C14, C14, A1, B4
  4133. gsLQC1(R12, F3, F2, 1) # B1 B2
  4134. PLU B7, B5, B5
  4135. PLU B8, B6, B6
  4136. MADPS C11, C11, A2, B5
  4137. MADPS C12, C12, A2, B6
  4138. daddiu AO, AO, 8 * SIZE # 4KR*2MR
  4139. gsLQC1(R13, F9, F8, 2) # B1 B2
  4140. MADPS C13, C13, A2, B7
  4141. MADPS C14, C14, A2, B8
  4142. PLU B3, B1, B1
  4143. PLU B4, B2, B2
  4144. MADPS C11, C11, A3, B1
  4145. gsLQC1(R12, F1, F0, 0) # B3 B4
  4146. MADPS C12, C12, A3, B2
  4147. gsLQC1(R13, F13, F12, 3) # B3 B4
  4148. daddiu BO, BO, 16 * SIZE # 4KR*4NR
  4149. MADPS C13, C13, A3, B3
  4150. MADPS C14, C14, A3, B4
  4151. PLU B7, B5, B5
  4152. PLU B8, B6, B6
  4153. MADPS C11, C11, A4, B5
  4154. MADPS C12, C12, A4, B6
  4155. gsLQC1(R13, F9, F8, 0) # B3 B4
  4156. MADPS C13, C13, A4, B7
  4157. MADPS C14, C14, A4, B8
  4158. PLU B3, B1, B1
  4159. bgtz L, .L4210
  4160. PLU B4, B2, B2
  4161. .align 4
  4162. .L422:
  4163. #ifndef TRMMKERNEL
  4164. andi L, K, 2
  4165. #else
  4166. andi L, TEMP, 2
  4167. #endif
  4168. blez L, .L423
  4169. NOP
  4170. daddiu AO, AO, 4 * SIZE # 2KR*2MR
  4171. MADPS C11, C11, A1, B1
  4172. MADPS C12, C12, A1, B2
  4173. gsLQC1(R13, F13, F12, 1) # B3 B4
  4174. MADPS C13, C13, A1, B3
  4175. MADPS C14, C14, A1, B4
  4176. daddiu BO, BO, 8 * SIZE # 2KR*2MR
  4177. PLU B7, B5, B5
  4178. PLU B8, B6, B6
  4179. MADPS C11, C11, A2, B5
  4180. MADPS C12, C12, A2, B6
  4181. gsLQC1(R13, F9, F8, 0) # B1 B2
  4182. MADPS C13, C13, A2, B7
  4183. MADPS C14, C14, A2, B8
  4184. gsLQC1(R12, F1, F0, 0)
  4185. PLU B3, B1, B1
  4186. PLU B4, B2, B2
  4187. .L423:
  4188. #ifndef TRMMKERNEL
  4189. andi L, K, 1
  4190. #else
  4191. andi L, TEMP, 1
  4192. #endif
  4193. blez L, .L420
  4194. LD ALPHA, 152($sp)
  4195. MADPS C11, C11, A1, B1
  4196. MADPS C12, C12, A1, B2
  4197. daddiu BO, BO, 4 * SIZE # 2KR*4NR
  4198. daddiu AO, AO, 2 * SIZE # 2KR*4MR
  4199. MADPS C13, C13, A1, B3
  4200. MADPS C14, C14, A1, B4
  4201. .align 4
  4202. .L420:
  4203. #ifndef TRMMKERNEL
  4204. CVTU A1, C13 # A1=C13.upper=c12
  4205. LD B1, 1 * SIZE(CO1)
  4206. CVTU A2, C11 # A2=C11.upper=c22
  4207. LD B2, 1 * SIZE(CO2)
  4208. MADD A1, B1, A1, ALPHA # c12
  4209. LD B5, 0 * SIZE(CO1)
  4210. MADD A2, B2, A2, ALPHA # c22
  4211. LD B6, 0 * SIZE(CO2)
  4212. MADD C11, B5, C11, ALPHA # c12
  4213. ST A1, 1 * SIZE(CO1)
  4214. MADD C13, B6, C13, ALPHA # c22
  4215. ST A2, 1 * SIZE(CO2)
  4216. ST C11, 0 * SIZE(CO1)
  4217. ST C13, 0 * SIZE(CO2)
  4218. CVTU A1, C14 # B1=C12.upper=c42
  4219. LD B1, 1 * SIZE(CO3)
  4220. CVTU A2, C12 # B2=C14.upper=c32
  4221. LD B2, 1 * SIZE(CO4)
  4222. MADD A1, B1, A1, ALPHA # c31
  4223. LD A5, 0 * SIZE(CO3)
  4224. MADD A2, B2, A2, ALPHA
  4225. LD A6, 0 * SIZE(CO4)
  4226. MADD C12, A5, C12, ALPHA
  4227. ST A1, 1 * SIZE(CO3)
  4228. MADD C14, A6, C14, ALPHA
  4229. ST A2, 1 * SIZE(CO4)
  4230. ST C12, 0 * SIZE(CO3)
  4231. ST C14, 0 * SIZE(CO4)
  4232. daddiu CO1, CO1, 2 * SIZE
  4233. daddiu CO2, CO2, 2 * SIZE
  4234. daddiu CO3, CO3, 2 * SIZE
  4235. daddiu CO4, CO4, 2 * SIZE
  4236. #else
  4237. CVTU A1, C13 # A1=C13.upper=c12
  4238. CVTU A2, C11 # A2=C11.upper=c22
  4239. MUL A1, A1, ALPHA # c12
  4240. MUL A2, A2, ALPHA # c22
  4241. MUL C11, C11, ALPHA # c12
  4242. MUL C13, C13, ALPHA # c22
  4243. CVTU A3, C14 # B1=C12.upper=c42
  4244. CVTU A4, C12 # B2=C14.upper=c32
  4245. MUL A3, A3, ALPHA # c31
  4246. ST A1, 1 * SIZE(CO1)
  4247. MUL A4, A4, ALPHA
  4248. ST A2, 1 * SIZE(CO2)
  4249. MUL C12, C12, ALPHA
  4250. ST C11, 0 * SIZE(CO1)
  4251. MUL C14, C14, ALPHA
  4252. ST C13, 0 * SIZE(CO2)
  4253. ST A3, 1 * SIZE(CO3)
  4254. ST A4, 1 * SIZE(CO4)
  4255. ST C12, 0 * SIZE(CO3)
  4256. ST C14, 0 * SIZE(CO4)
  4257. daddiu CO1, CO1, 2 * SIZE
  4258. daddiu CO2, CO2, 2 * SIZE
  4259. daddiu CO3, CO3, 2 * SIZE
  4260. daddiu CO4, CO4, 2 * SIZE
  4261. #if ( defined(LEFT) && defined(TRANSA))||\
  4262. (!defined(LEFT) && !defined(TRANSA))
  4263. dsubu TEMP, K, KK
  4264. #ifdef LEFT
  4265. daddiu TEMP, TEMP, -2
  4266. #else
  4267. daddiu TEMP, TEMP, -4
  4268. #endif
  4269. dsll L, TEMP, 1 + BASE_SHIFT
  4270. dsll TEMP, TEMP, 2 + BASE_SHIFT
  4271. daddu AO, AO, L
  4272. daddu BO, BO, TEMP
  4273. #endif
  4274. #ifdef LEFT
  4275. daddiu KK, KK, 2
  4276. #endif
  4277. #endif
  4278. .align 4
  4279. .L41:
  4280. andi I, M, 1
  4281. blez I, .L40
  4282. NOP
  4283. .align 4
  4284. .L411:
  4285. #if defined(TRMMKERNEL)
  4286. #if (defined(LEFT) && defined(TRANSA)) ||\
  4287. (!defined(LEFT) && !defined(TRANSA))
  4288. move BO, B
  4289. #else
  4290. dsll L, KK, BASE_SHIFT
  4291. dsll TEMP, KK, 2 + BASE_SHIFT
  4292. daddu AO, AO, L
  4293. daddu BO, B, TEMP
  4294. #endif
  4295. MTC $0, C11 # CLEAR REAULTS REGISTERS
  4296. MOV C12, C11
  4297. LD B1, 0 * SIZE(BO)
  4298. MOV C21, C11
  4299. MOV C22, C11
  4300. LD A1, 0 * SIZE(AO)
  4301. MOV C31, C11
  4302. MOV C32, C11
  4303. LD B2, 1 * SIZE(BO)
  4304. MOV C41, C11
  4305. MOV C42, C11
  4306. LD B3, 2 * SIZE(BO)
  4307. MOV C13, C11
  4308. MOV C14, C11
  4309. LD B4, 3 * SIZE(BO)
  4310. MOV C23, C11
  4311. MOV C24, C11
  4312. MOV C33, C11
  4313. MOV C34, C11
  4314. MOV C43, C11
  4315. MOV C44, C11
  4316. #if (defined(LEFT) && !defined(TRANSA))||\
  4317. (!defined(LEFT) && defined(TRANSA))
  4318. dsubu TEMP, K, KK
  4319. #elif defined(LEFT)
  4320. daddiu TEMP, KK, 1
  4321. #else
  4322. daddiu TEMP, KK, 4
  4323. #endif
  4324. dsra L, TEMP, 2
  4325. blez L, .L412
  4326. #else
  4327. move BO, B # Reset B
  4328. dsra L, K, 2 # UnRoll K=4
  4329. MTC $0, C11 # CLEAR REAULTS REGISTERS
  4330. MOV C12, C11
  4331. LD B1, 0 * SIZE(BO)
  4332. MOV C21, C11
  4333. MOV C22, C11
  4334. LD A1, 0 * SIZE(AO)
  4335. MOV C31, C11
  4336. MOV C32, C11
  4337. LD B2, 1 * SIZE(BO)
  4338. MOV C41, C11
  4339. MOV C42, C11
  4340. LD B3, 2 * SIZE(BO)
  4341. MOV C13, C11
  4342. MOV C14, C11
  4343. LD B4, 3 * SIZE(BO)
  4344. MOV C23, C11
  4345. MOV C24, C11
  4346. MOV C33, C11
  4347. MOV C34, C11
  4348. MOV C43, C11
  4349. blez L, .L412
  4350. MOV C44, C11
  4351. #endif
  4352. .L4110:
  4353. daddiu L, L, -1
  4354. LD A2, 1 * SIZE(AO)
  4355. MADD C11, C11, A1, B1
  4356. LD B5, 4 * SIZE(BO)
  4357. MADD C12, C12, A1, B2
  4358. LD B6, 5 * SIZE(BO)
  4359. MADD C13, C13, A1, B3
  4360. LD B7, 6 * SIZE(BO)
  4361. MADD C14, C14, A1, B4
  4362. LD B8, 7 * SIZE(BO)
  4363. LD A3, 2 * SIZE(AO)
  4364. NOP
  4365. MADD C11, C11, A2, B5
  4366. LD B1, 8 * SIZE(BO)
  4367. MADD C12, C12, A2, B6
  4368. LD B2, 9 * SIZE(BO)
  4369. MADD C13, C13, A2, B7
  4370. LD B3, 10 * SIZE(BO)
  4371. MADD C14, C14, A2, B8
  4372. LD B4, 11 * SIZE(BO)
  4373. LD A4, 3 * SIZE(AO)
  4374. daddiu AO, AO, 4 * SIZE
  4375. MADD C11, C11, A3, B1
  4376. LD B5, 12 * SIZE(BO)
  4377. MADD C12, C12, A3, B2
  4378. LD B6, 13 * SIZE(BO)
  4379. MADD C13, C13, A3, B3
  4380. LD B7, 14 * SIZE(BO)
  4381. MADD C14, C14, A3, B4
  4382. LD B8, 15 * SIZE(BO)
  4383. LD A1, 0 * SIZE(AO)
  4384. daddiu BO, BO, 16 * SIZE
  4385. MADD C11, C11, A4, B5
  4386. LD B1, 0 * SIZE(BO)
  4387. MADD C12, C12, A4, B6
  4388. LD B2, 1 * SIZE(BO)
  4389. MADD C13, C13, A4, B7
  4390. LD B3, 2 * SIZE(BO)
  4391. MADD C14, C14, A4, B8
  4392. bgtz L, .L4110
  4393. LD B4, 3 * SIZE(BO)
  4394. .L412:
  4395. #ifndef TRMMKERNEL
  4396. andi L, K, 2
  4397. #else
  4398. andi L, TEMP, 2
  4399. #endif
  4400. blez L, .L413
  4401. NOP
  4402. LD A2, 1 * SIZE(AO)
  4403. daddiu AO, AO, 2 * SIZE
  4404. MADD C11, C11, A1, B1
  4405. LD B5, 4 * SIZE(BO)
  4406. MADD C12, C12, A1, B2
  4407. LD B6, 5 * SIZE(BO)
  4408. MADD C13, C13, A1, B3
  4409. LD B7, 6 * SIZE(BO)
  4410. MADD C14, C14, A1, B4
  4411. LD B8, 7 * SIZE(BO)
  4412. LD A1, 0 * SIZE(AO)
  4413. daddiu BO, BO, 8 * SIZE
  4414. MADD C11, C11, A2, B5
  4415. LD B1, 0 * SIZE(BO)
  4416. MADD C12, C12, A2, B6
  4417. LD B2, 1 * SIZE(BO)
  4418. MADD C13, C13, A2, B7
  4419. LD B3, 2 * SIZE(BO)
  4420. MADD C14, C14, A2, B8
  4421. LD B4, 3 * SIZE(BO)
  4422. .L413:
  4423. #ifndef TRMMKERNEL
  4424. andi L, K, 1
  4425. #else
  4426. andi L, TEMP, 1
  4427. #endif
  4428. blez L, .L410
  4429. LD ALPHA, 152($sp)
  4430. MADD C11, C11, A1, B1
  4431. MADD C12, C12, A1, B2
  4432. daddiu AO, AO, 1 * SIZE
  4433. MADD C13, C13, A1, B3
  4434. MADD C14, C14, A1, B4
  4435. daddiu BO, BO, 4 * SIZE
  4436. .align 4
  4437. .L410:
  4438. #ifndef TRMMKERNEL
  4439. LD A5, 0 * SIZE(CO1)
  4440. LD A6, 0 * SIZE(CO2)
  4441. LD A7, 0 * SIZE(CO3)
  4442. LD A8, 0 * SIZE(CO4)
  4443. MADD A5, A5, C11, ALPHA
  4444. MADD A6, A6, C12, ALPHA
  4445. MADD A7, A7, C13, ALPHA
  4446. MADD A8, A8, C14, ALPHA
  4447. ST A5, 0 * SIZE(CO1)
  4448. ST A6, 0 * SIZE(CO2)
  4449. ST A7, 0 * SIZE(CO3)
  4450. ST A8, 0 * SIZE(CO4)
  4451. daddiu CO1, CO1, 1 * SIZE
  4452. daddiu CO2, CO2, 1 * SIZE
  4453. daddiu CO3, CO3, 1 * SIZE
  4454. daddiu CO4, CO4, 1 * SIZE
  4455. #else
  4456. MUL A5, C11, ALPHA
  4457. MUL A6, C12, ALPHA
  4458. MUL A7, C13, ALPHA
  4459. MUL A8, C14, ALPHA
  4460. ST A5, 0 * SIZE(CO1)
  4461. ST A6, 0 * SIZE(CO2)
  4462. ST A7, 0 * SIZE(CO3)
  4463. ST A8, 0 * SIZE(CO4)
  4464. daddiu CO1, CO1, 1 * SIZE
  4465. daddiu CO2, CO2, 1 * SIZE
  4466. daddiu CO3, CO3, 1 * SIZE
  4467. daddiu CO4, CO4, 1 * SIZE
  4468. #if ( defined(LEFT) && defined(TRANSA))||\
  4469. (!defined(LEFT) && !defined(TRANSA))
  4470. dsubu TEMP, K, KK
  4471. #ifdef LEFT
  4472. daddiu TEMP, TEMP, -1
  4473. #else
  4474. daddiu TEMP, TEMP, -4
  4475. #endif
  4476. dsll L, TEMP, BASE_SHIFT
  4477. dsll TEMP, TEMP, 2 + BASE_SHIFT
  4478. daddu AO, AO, L
  4479. daddu BO, BO, TEMP
  4480. #endif
  4481. #ifdef LEFT
  4482. daddiu KK, KK, 1
  4483. #endif
  4484. #endif
  4485. .align 4
  4486. .L40:
  4487. #if defined(TRMMKERNEL) && !defined(LEFT)
  4488. daddiu KK, KK, 4
  4489. #endif
  4490. daddiu J, J, -1
  4491. move B, BO
  4492. bgtz J, .L48
  4493. NOP
  4494. .align 4
  4495. .L2: # Nr=2
  4496. andi J, N, 2
  4497. blez J, .L1
  4498. NOP
  4499. .L28:
  4500. dsra I, M, 3 # MR=8
  4501. move AO, A # Reset A
  4502. move CO1, C
  4503. #if defined(TRMMKERNEL) && defined(LEFT)
  4504. move KK, OFFSET
  4505. #endif
  4506. daddu CO2, C, LDC
  4507. blez I, .L24
  4508. daddu C, CO2, LDC
  4509. .align 4
  4510. .L281:
  4511. #if defined(TRMMKERNEL)
  4512. #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
  4513. move BO, B
  4514. #else
  4515. dsll L, KK, 3 + BASE_SHIFT
  4516. dsll TEMP, KK, 1 + BASE_SHIFT
  4517. daddu AO, AO, L
  4518. daddu BO, B, TEMP
  4519. #endif
  4520. MTC $0, C11 # CLEAR REAULTS REGISTERS
  4521. LD A1, 0 * SIZE(AO)
  4522. MOV C12, C11
  4523. LD A2, 1 * SIZE(AO)
  4524. MOV C21, C11
  4525. LD A3, 2 * SIZE(AO)
  4526. MOV C22, C11
  4527. LD A4, 3 * SIZE(AO)
  4528. MOV C31, C11
  4529. LD A5, 4 * SIZE(AO)
  4530. MOV C32, C11
  4531. LD A6, 5 * SIZE(AO)
  4532. MOV C41, C11
  4533. LD B1, 0 * SIZE(BO)
  4534. MOV C42, C11
  4535. LD B2, 1 * SIZE(BO)
  4536. MOV C13, C11
  4537. LD A7, 6 * SIZE(AO)
  4538. MOV C14, C11
  4539. LD A8, 7 * SIZE(AO)
  4540. MOV C23, C11
  4541. MOV C24, C11
  4542. MOV C33, C11
  4543. MOV C34, C11
  4544. MOV C43, C11
  4545. MOV C44, C11
  4546. #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
  4547. dsubu TEMP, K, KK
  4548. #elif defined(LEFT)
  4549. daddiu TEMP, KK, 8
  4550. #else
  4551. daddiu TEMP, KK, 2
  4552. #endif
  4553. dsra L, TEMP, 1
  4554. blez L, .L282
  4555. NOP
  4556. #else
  4557. move BO, B # Reset B
  4558. dsra L, K, 1 # UnRoll K=4
  4559. MTC $0, C11 # CLEAR REAULTS REGISTERS
  4560. LD A1, 0 * SIZE(AO)
  4561. MOV C12, C11
  4562. LD A2, 1 * SIZE(AO)
  4563. MOV C21, C11
  4564. LD A3, 2 * SIZE(AO)
  4565. MOV C22, C11
  4566. LD A4, 3 * SIZE(AO)
  4567. MOV C31, C11
  4568. LD A5, 4 * SIZE(AO)
  4569. MOV C32, C11
  4570. LD A6, 5 * SIZE(AO)
  4571. MOV C41, C11
  4572. LD B1, 0 * SIZE(BO)
  4573. MOV C42, C11
  4574. LD B2, 1 * SIZE(BO)
  4575. MOV C13, C11
  4576. LD A7, 6 * SIZE(AO)
  4577. MOV C14, C11
  4578. LD A8, 7 * SIZE(AO)
  4579. MOV C23, C11
  4580. MOV C24, C11
  4581. MOV C33, C11
  4582. MOV C34, C11
  4583. MOV C43, C11
  4584. blez L, .L282
  4585. MOV C44, C11
  4586. #endif
  4587. .align 4
  4588. .L2810:
  4589. daddiu L, L, -1
  4590. MADD C11, C11, A1, B1
  4591. LD B5, 8 * SIZE(AO)
  4592. MADD C21, C21, A2, B1
  4593. LD B6, 9 * SIZE(AO)
  4594. MADD C31, C31, A3, B1
  4595. LD B7, 10 * SIZE(AO)
  4596. MADD C41, C41, A4, B1
  4597. LD B8, 11 * SIZE(AO)
  4598. MADD C12, C12, A1, B2
  4599. MADD C22, C22, A2, B2
  4600. LD B3, 2 * SIZE(BO)
  4601. MADD C32, C32, A3, B2
  4602. MADD C42, C42, A4, B2
  4603. LD B4, 3 * SIZE(BO)
  4604. daddiu BO, BO, 4 * SIZE
  4605. MADD C13, C13, A5, B1
  4606. MADD C23, C23, A6, B1
  4607. LD A1, 12 * SIZE(AO)
  4608. MADD C33, C33, A7, B1
  4609. MADD C43, C43, A8, B1
  4610. LD A2, 13 * SIZE(AO)
  4611. MADD C14, C14, A5, B2
  4612. MADD C24, C24, A6, B2
  4613. LD A3, 14 * SIZE(AO)
  4614. MADD C34, C34, A7, B2
  4615. MADD C44, C44, A8, B2
  4616. LD A4, 15 * SIZE(AO)
  4617. daddiu AO, AO, 16 * SIZE
  4618. MADD C11, C11, B5, B3
  4619. LD A5, 4 * SIZE(AO)
  4620. MADD C21, C21, B6, B3
  4621. LD A6, 5 * SIZE(AO)
  4622. MADD C13, C13, A1, B3
  4623. MADD C23, C23, A2, B3
  4624. LD A7, 6 * SIZE(AO)
  4625. MADD C33, C33, A3, B3
  4626. MADD C43, C43, A4, B3
  4627. LD A8, 7 * SIZE(AO)
  4628. MADD C14, C14, A1, B4
  4629. MADD C24, C24, A2, B4
  4630. LD B1, 0 * SIZE(BO)
  4631. MADD C34, C34, A3, B4
  4632. MADD C44, C44, A4, B4
  4633. LD B2, 1 * SIZE(BO)
  4634. MADD C31, C31, B7, B3
  4635. MADD C41, C41, B8, B3
  4636. LD A1, 0 * SIZE(AO)
  4637. MADD C12, C12, B5, B4
  4638. LD A2, 1 * SIZE(AO)
  4639. MADD C22, C22, B6, B4
  4640. LD A3, 2 * SIZE(AO)
  4641. LD A4, 3 * SIZE(AO)
  4642. MADD C32, C32, B7, B4
  4643. bgtz L, .L2810
  4644. MADD C42, C42, B8, B4
  4645. .align 4
  4646. .L282:
  4647. #ifndef TRMMKERNEL
  4648. andi L, K, 1
  4649. #else
  4650. andi L, TEMP, 1
  4651. #endif
  4652. blez L, .L280
  4653. LD ALPHA, 152($sp)
  4654. MADD C13, C13, A5, B1
  4655. MADD C23, C23, A6, B1
  4656. MADD C33, C33, A7, B1
  4657. MADD C43, C43, A8, B1
  4658. MADD C14, C14, A5, B2
  4659. MADD C24, C24, A6, B2
  4660. MADD C34, C34, A7, B2
  4661. MADD C44, C44, A8, B2
  4662. daddiu AO, AO, 8 * SIZE
  4663. MADD C11, C11, A1, B1
  4664. MADD C21, C21, A2, B1
  4665. MADD C31, C31, A3, B1
  4666. MADD C41, C41, A4, B1
  4667. MADD C12, C12, A1, B2
  4668. MADD C22, C22, A2, B2
  4669. MADD C32, C32, A3, B2
  4670. MADD C42, C42, A4, B2
  4671. daddiu BO, BO, 2 * SIZE
  4672. .align 4
  4673. .L280: # Write Back
  4674. #ifndef TRMMKERNEL
  4675. daddiu I, I, -1
  4676. LD A1, 0 * SIZE(CO1)
  4677. LD A2, 1 * SIZE(CO1)
  4678. LD A3, 2 * SIZE(CO1)
  4679. LD A4, 3 * SIZE(CO1)
  4680. LD A5, 4 * SIZE(CO1)
  4681. LD A6, 5 * SIZE(CO1)
  4682. LD A7, 6 * SIZE(CO1)
  4683. LD A8, 7 * SIZE(CO1)
  4684. MADD A1, A1, C11, ALPHA
  4685. LD B1, 0 * SIZE(CO2)
  4686. MADD A2, A2, C21, ALPHA
  4687. LD B2, 1 * SIZE(CO2)
  4688. MADD A3, A3, C31, ALPHA
  4689. LD B3, 2 * SIZE(CO2)
  4690. MADD A4, A4, C41, ALPHA
  4691. LD B4, 3 * SIZE(CO2)
  4692. MADD A5, A5, C13, ALPHA
  4693. LD B5, 4 * SIZE(CO2)
  4694. MADD A6, A6, C23, ALPHA
  4695. LD B6, 5 * SIZE(CO2)
  4696. MADD A7, A7, C33, ALPHA
  4697. LD B7, 6 * SIZE(CO2)
  4698. MADD A8, A8, C43, ALPHA
  4699. LD C11, 7 * SIZE(CO2)
  4700. MADD B1, B1, C12, ALPHA
  4701. ST A1, 0 * SIZE(CO1)
  4702. MADD B2, B2, C22, ALPHA
  4703. ST A2, 1 * SIZE(CO1)
  4704. MADD B3, B3, C32, ALPHA
  4705. ST A3, 2 * SIZE(CO1)
  4706. MADD B4, B4, C42, ALPHA
  4707. ST A4, 3 * SIZE(CO1)
  4708. MADD B5, B5, C14, ALPHA
  4709. ST A5, 4 * SIZE(CO1)
  4710. MADD B6, B6, C24, ALPHA
  4711. ST A6, 5 * SIZE(CO1)
  4712. MADD B7, B7, C34, ALPHA
  4713. ST A7, 6 * SIZE(CO1)
  4714. MADD C11, C11, C44, ALPHA
  4715. ST A8, 7 * SIZE(CO1)
  4716. ST B1, 0 * SIZE(CO2)
  4717. ST B2, 1 * SIZE(CO2)
  4718. ST B3, 2 * SIZE(CO2)
  4719. ST B4, 3 * SIZE(CO2)
  4720. ST B5, 4 * SIZE(CO2)
  4721. ST B6, 5 * SIZE(CO2)
  4722. ST B7, 6 * SIZE(CO2)
  4723. ST C11, 7 * SIZE(CO2)
  4724. daddiu CO1, CO1, 8 * SIZE
  4725. bgtz I, .L281
  4726. daddiu CO2, CO2, 8 * SIZE
  4727. #else
  4728. daddiu I, I, -1
  4729. MUL A1, C11, ALPHA
  4730. MUL A2, C21, ALPHA
  4731. MUL A3, C31, ALPHA
  4732. MUL A4, C41, ALPHA
  4733. MUL A5, C13, ALPHA
  4734. MUL A6, C23, ALPHA
  4735. MUL A7, C33, ALPHA
  4736. MUL A8, C43, ALPHA
  4737. MUL B1, C12, ALPHA
  4738. ST A1, 0 * SIZE(CO1)
  4739. MUL B2, C22, ALPHA
  4740. ST A2, 1 * SIZE(CO1)
  4741. MUL B3, C32, ALPHA
  4742. ST A3, 2 * SIZE(CO1)
  4743. MUL B4, C42, ALPHA
  4744. ST A4, 3 * SIZE(CO1)
  4745. MUL B5, C14, ALPHA
  4746. ST A5, 4 * SIZE(CO1)
  4747. MUL B6, C24, ALPHA
  4748. ST A6, 5 * SIZE(CO1)
  4749. MUL B7, C34, ALPHA
  4750. ST A7, 6 * SIZE(CO1)
  4751. MUL C11, C44, ALPHA
  4752. ST A8, 7 * SIZE(CO1)
  4753. ST B1, 0 * SIZE(CO2)
  4754. ST B2, 1 * SIZE(CO2)
  4755. ST B3, 2 * SIZE(CO2)
  4756. ST B4, 3 * SIZE(CO2)
  4757. ST B5, 4 * SIZE(CO2)
  4758. ST B6, 5 * SIZE(CO2)
  4759. ST B7, 6 * SIZE(CO2)
  4760. ST C11, 7 * SIZE(CO2)
  4761. #if ( defined(LEFT) && defined(TRANSA)) ||(!defined(LEFT) && !defined(TRANSA))
  4762. dsubu TEMP, K, KK
  4763. #ifdef LEFT
  4764. daddiu TEMP, TEMP, -8
  4765. #else
  4766. daddiu TEMP, TEMP, -2
  4767. #endif
  4768. dsll L, TEMP, 3 + BASE_SHIFT
  4769. dsll TEMP, TEMP, 1 + BASE_SHIFT
  4770. daddu AO, AO, L
  4771. daddu BO, BO, TEMP
  4772. #endif
  4773. #ifdef LEFT
  4774. daddiu KK, KK, 8
  4775. #endif
  4776. daddiu CO1, CO1, 8 * SIZE
  4777. bgtz I, .L281
  4778. daddiu CO2, CO2, 8 * SIZE
  4779. #endif
  4780. .align 4
  4781. .L24:
  4782. andi I, M, 4 # MR=4
  4783. blez I, .L22
  4784. NOP
  4785. .align 4
  4786. .L241:
  4787. #if defined(TRMMKERNEL)
  4788. #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
  4789. move BO, B
  4790. #else
  4791. dsll L, KK, 2 + BASE_SHIFT
  4792. dsll TEMP, KK, 1 + BASE_SHIFT
  4793. daddu AO, AO, L
  4794. daddu BO, B, TEMP
  4795. #endif
  4796. MTC $0, C11 # CLEAR REAULTS REGISTERS
  4797. MOV C12, C11
  4798. LD A1, 0 * SIZE(AO)
  4799. MOV C21, C11
  4800. MOV C22, C11
  4801. LD A2, 1 * SIZE(AO)
  4802. MOV C31, C11
  4803. MOV C32, C11
  4804. LD A3, 2 * SIZE(AO)
  4805. MOV C41, C11
  4806. MOV C42, C11
  4807. LD A4, 3 * SIZE(AO)
  4808. MOV C13, C11
  4809. MOV C14, C11
  4810. LD B1, 0 * SIZE(BO)
  4811. MOV C23, C11
  4812. MOV C24, C11
  4813. LD B2, 1 * SIZE(BO)
  4814. MOV C33, C11
  4815. MOV C34, C11
  4816. MOV C43, C11
  4817. MOV C44, C11
  4818. #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
  4819. dsubu TEMP, K, KK
  4820. #elif defined(LEFT)
  4821. daddiu TEMP, KK, 4
  4822. #else
  4823. daddiu TEMP, KK, 2
  4824. #endif
  4825. dsra L, TEMP, 1
  4826. blez L, .L242
  4827. NOP
  4828. #else
  4829. move BO, B # Reset B
  4830. dsra L, K, 1 # UnRoll K=4
  4831. MTC $0, C11 # CLEAR REAULTS REGISTERS
  4832. MOV C12, C11
  4833. LD A1, 0 * SIZE(AO)
  4834. MOV C21, C11
  4835. MOV C22, C11
  4836. LD A2, 1 * SIZE(AO)
  4837. MOV C31, C11
  4838. MOV C32, C11
  4839. LD A3, 2 * SIZE(AO)
  4840. MOV C41, C11
  4841. MOV C42, C11
  4842. LD A4, 3 * SIZE(AO)
  4843. MOV C13, C11
  4844. MOV C14, C11
  4845. LD B1, 0 * SIZE(BO)
  4846. MOV C23, C11
  4847. MOV C24, C11
  4848. LD B2, 1 * SIZE(BO)
  4849. MOV C33, C11
  4850. MOV C34, C11
  4851. MOV C43, C11
  4852. blez L, .L242
  4853. MOV C44, C11
  4854. #endif
  4855. .align 4
  4856. .L2410:
  4857. daddiu L, L, -1
  4858. MADD C11, C11, A1, B1
  4859. LD A5, 4 * SIZE(AO)
  4860. MADD C21, C21, A2, B1
  4861. LD B3, 2 * SIZE(BO)
  4862. MADD C31, C31, A3, B1
  4863. LD B4, 3 * SIZE(BO)
  4864. MADD C41, C41, A4, B1
  4865. LD A6, 5 * SIZE(AO)
  4866. daddiu BO, BO, 4 * SIZE
  4867. MADD C12, C12, A1, B2
  4868. LD A7, 6 * SIZE(AO)
  4869. MADD C22, C22, A2, B2
  4870. LD A8, 7 * SIZE(AO)
  4871. daddiu AO, AO, 8 * SIZE
  4872. MADD C32, C32, A3, B2
  4873. MADD C42, C42, A4, B2
  4874. MADD C11, C11, A5, B3
  4875. LD A1, 0 * SIZE(AO)
  4876. MADD C21, C21, A6, B3
  4877. LD B1, 0 * SIZE(BO)
  4878. MADD C31, C31, A7, B3
  4879. LD B2, 1 * SIZE(BO)
  4880. MADD C41, C41, A8, B3
  4881. LD A2, 1 * SIZE(AO)
  4882. MADD C12, C12, A5, B4
  4883. LD A3, 2 * SIZE(AO)
  4884. MADD C22, C22, A6, B4
  4885. LD A4, 3 * SIZE(AO)
  4886. MADD C32, C32, A7, B4
  4887. bgtz L, .L2410
  4888. MADD C42, C42, A8, B4
  4889. .align 4
  4890. .L242:
  4891. #ifndef TRMMKERNEL
  4892. andi L, K, 1
  4893. #else
  4894. andi L, TEMP, 1
  4895. #endif
  4896. blez L, .L240
  4897. LD ALPHA, 152($sp)
  4898. MADD C11, C11, A1, B1
  4899. MADD C21, C21, A2, B1
  4900. MADD C31, C31, A3, B1
  4901. MADD C41, C41, A4, B1
  4902. MADD C12, C12, A1, B2
  4903. MADD C22, C22, A2, B2
  4904. MADD C32, C32, A3, B2
  4905. MADD C42, C42, A4, B2
  4906. daddiu AO, AO, 4 * SIZE
  4907. daddiu BO, BO, 2 * SIZE
  4908. .align 4
  4909. .L240: # Write Back
  4910. #ifndef TRMMKERNEL
  4911. LD A1, 0 * SIZE(CO1)
  4912. LD A2, 1 * SIZE(CO1)
  4913. LD A3, 2 * SIZE(CO1)
  4914. LD A4, 3 * SIZE(CO1)
  4915. MADD A1, A1, C11, ALPHA
  4916. LD B1, 0 * SIZE(CO2)
  4917. MADD A2, A2, C21, ALPHA
  4918. LD B2, 1 * SIZE(CO2)
  4919. MADD A3, A3, C31, ALPHA
  4920. LD B3, 2 * SIZE(CO2)
  4921. MADD A4, A4, C41, ALPHA
  4922. LD B4, 3 * SIZE(CO2)
  4923. MADD B1, B1, C12, ALPHA
  4924. ST A1, 0 * SIZE(CO1)
  4925. MADD B2, B2, C22, ALPHA
  4926. ST A2, 1 * SIZE(CO1)
  4927. MADD B3, B3, C32, ALPHA
  4928. ST A3, 2 * SIZE(CO1)
  4929. MADD B4, B4, C42, ALPHA
  4930. ST A4, 3 * SIZE(CO1)
  4931. ST B1, 0 * SIZE(CO2)
  4932. ST B2, 1 * SIZE(CO2)
  4933. ST B3, 2 * SIZE(CO2)
  4934. ST B4, 3 * SIZE(CO2)
  4935. daddiu CO1, CO1, 4 * SIZE
  4936. daddiu CO2, CO2, 4 * SIZE
  4937. #else
  4938. MUL A1, C11, ALPHA
  4939. MUL A2, C21, ALPHA
  4940. MUL A3, C31, ALPHA
  4941. MUL A4, C41, ALPHA
  4942. MUL B1, C12, ALPHA
  4943. ST A1, 0 * SIZE(CO1)
  4944. MUL B2, C22, ALPHA
  4945. ST A2, 1 * SIZE(CO1)
  4946. MUL B3, C32, ALPHA
  4947. ST A3, 2 * SIZE(CO1)
  4948. MUL B4, C42, ALPHA
  4949. ST A4, 3 * SIZE(CO1)
  4950. ST B1, 0 * SIZE(CO2)
  4951. ST B2, 1 * SIZE(CO2)
  4952. ST B3, 2 * SIZE(CO2)
  4953. ST B4, 3 * SIZE(CO2)
  4954. daddiu CO1, CO1, 4 * SIZE
  4955. daddiu CO2, CO2, 4 * SIZE
  4956. #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
  4957. dsubu TEMP, K, KK
  4958. #ifdef LEFT
  4959. daddiu TEMP, TEMP, -4
  4960. #else
  4961. daddiu TEMP, TEMP, -2
  4962. #endif
  4963. dsll L, TEMP, 2 + BASE_SHIFT
  4964. dsll TEMP, TEMP, 1 + BASE_SHIFT
  4965. daddu AO, AO, L
  4966. daddu BO, BO, TEMP
  4967. #endif
  4968. #ifdef LEFT
  4969. daddiu KK, KK, 4
  4970. #endif
  4971. #endif
  4972. .align 4
  4973. .L22:
  4974. andi I, M, 2
  4975. blez I, .L21
  4976. NOP
  4977. .align 4
  4978. .L221:
  4979. #if defined(TRMMKERNEL)
  4980. #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
  4981. move BO, B
  4982. #else
  4983. dsll L, KK, 1 + BASE_SHIFT
  4984. dsll TEMP, KK, 1 + BASE_SHIFT
  4985. daddu AO, AO, L
  4986. daddu BO, B, TEMP
  4987. #endif
  4988. MTC $0, C11 # CLEAR REAULTS REGISTERS
  4989. MOV C12, C11
  4990. LD A1, 0 * SIZE(AO)
  4991. MOV C21, C11
  4992. MOV C22, C11
  4993. LD A2, 1 * SIZE(AO)
  4994. MOV C31, C11
  4995. MOV C32, C11
  4996. LD B1, 0 * SIZE(BO)
  4997. MOV C41, C11
  4998. MOV C42, C11
  4999. LD B2, 1 * SIZE(BO)
  5000. MOV C43, C11
  5001. MOV C44, C11
  5002. #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
  5003. dsubu TEMP, K, KK
  5004. #elif defined(LEFT)
  5005. daddiu TEMP, KK, 2
  5006. #else
  5007. daddiu TEMP, KK, 2
  5008. #endif
  5009. dsra L, TEMP, 1
  5010. blez L, .L222
  5011. NOP
  5012. #else
  5013. move BO, B # Reset B
  5014. dsra L, K, 1 # UnRoll K=4
  5015. MTC $0, C11 # CLEAR REAULTS REGISTERS
  5016. MOV C12, C11
  5017. LD A1, 0 * SIZE(AO)
  5018. MOV C21, C11
  5019. MOV C22, C11
  5020. LD A2, 1 * SIZE(AO)
  5021. MOV C31, C11
  5022. MOV C32, C11
  5023. LD B1, 0 * SIZE(BO)
  5024. MOV C41, C11
  5025. MOV C42, C11
  5026. LD B2, 1 * SIZE(BO)
  5027. MOV C43, C11
  5028. blez L, .L222
  5029. MOV C44, C11
  5030. #endif
  5031. .align 4
  5032. .L2210:
  5033. daddiu L, L, -1
  5034. MADD C11, C11, A1, B1
  5035. LD A3, 2 * SIZE(AO)
  5036. MADD C21, C21, A2, B1
  5037. LD B3, 2 * SIZE(BO)
  5038. MADD C12, C12, A1, B2
  5039. LD A4, 3 * SIZE(AO)
  5040. daddiu AO, AO, 4 * SIZE
  5041. MADD C22, C22, A2, B2
  5042. LD B4, 3 * SIZE(BO)
  5043. daddiu BO, BO, 4 * SIZE
  5044. MADD C11, C11, A3, B3
  5045. LD A1, 0 * SIZE(AO)
  5046. MADD C21, C21, A4, B3
  5047. LD B1, 0 * SIZE(BO)
  5048. MADD C12, C12, A3, B4
  5049. LD B2, 1 * SIZE(BO)
  5050. MADD C22, C22, A4, B4
  5051. bgtz L, .L2210
  5052. LD A2, 1 * SIZE(AO)
  5053. .align 4
  5054. .L222:
  5055. #ifndef TRMMKERNEL
  5056. andi L, K, 1
  5057. #else
  5058. andi L, TEMP, 1
  5059. #endif
  5060. blez L, .L220
  5061. LD ALPHA, 152($sp)
  5062. MADD C11, C11, A1, B1
  5063. MADD C21, C21, A2, B1
  5064. MADD C12, C12, A1, B2
  5065. MADD C22, C22, A2, B2
  5066. daddiu AO, AO, 2 * SIZE
  5067. daddiu BO, BO, 2 * SIZE
  5068. .align 4
  5069. .L220: # Write Back
  5070. #ifndef TRMMKERNEL
  5071. LD A1, 0 * SIZE(CO1)
  5072. LD A2, 1 * SIZE(CO1)
  5073. MADD A1, A1, C11, ALPHA
  5074. LD B1, 0 * SIZE(CO2)
  5075. MADD A2, A2, C21, ALPHA
  5076. LD B2, 1 * SIZE(CO2)
  5077. MADD B1, B1, C12, ALPHA
  5078. ST A1, 0 * SIZE(CO1)
  5079. MADD B2, B2, C22, ALPHA
  5080. ST A2, 1 * SIZE(CO1)
  5081. ST B1, 0 * SIZE(CO2)
  5082. ST B2, 1 * SIZE(CO2)
  5083. daddiu CO1, CO1, 2 * SIZE
  5084. daddiu CO2, CO2, 2 * SIZE
  5085. #else
  5086. MUL A1, C11, ALPHA
  5087. MUL A2, C21, ALPHA
  5088. MUL B1, C12, ALPHA
  5089. MUL B2, C22, ALPHA
  5090. ST A1, 0 * SIZE(CO1)
  5091. ST A2, 1 * SIZE(CO1)
  5092. ST B1, 0 * SIZE(CO2)
  5093. ST B2, 1 * SIZE(CO2)
  5094. daddiu CO1, CO1, 2 * SIZE
  5095. daddiu CO2, CO2, 2 * SIZE
  5096. #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
  5097. dsubu TEMP, K, KK
  5098. #ifdef LEFT
  5099. daddiu TEMP, TEMP, -2
  5100. #else
  5101. daddiu TEMP, TEMP, -2
  5102. #endif
  5103. dsll L, TEMP, 1 + BASE_SHIFT
  5104. dsll TEMP, TEMP, 1 + BASE_SHIFT
  5105. daddu AO, AO, L
  5106. daddu BO, BO, TEMP
  5107. #endif
  5108. #ifdef LEFT
  5109. daddu KK, KK, 2
  5110. #endif
  5111. #endif
  5112. .align 4
  5113. .L21:
  5114. andi I, M, 1
  5115. blez I, .L20
  5116. NOP
  5117. .align 4
  5118. .L211:
  5119. #if defined(TRMMKERNEL)
  5120. #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
  5121. move BO, B # Reset B
  5122. #else
  5123. dsll L, KK, BASE_SHIFT
  5124. dsll TEMP, KK, 1 + BASE_SHIFT
  5125. daddu AO, AO, L
  5126. daddu BO, B, TEMP
  5127. #endif
  5128. MTC $0, C11 # CLEAR REAULTS REGISTERS
  5129. MOV C12, C11
  5130. LD A1, 0 * SIZE(AO)
  5131. MOV C21, C11
  5132. MOV C22, C11
  5133. MOV C31, C11
  5134. MOV C32, C11
  5135. LD B1, 0 * SIZE(BO)
  5136. MOV C41, C11
  5137. MOV C42, C11
  5138. LD B2, 1 * SIZE(BO)
  5139. MOV C43, C11
  5140. MOV C44, C11
  5141. #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
  5142. dsubu TEMP, K, KK
  5143. #elif defined(LEFT)
  5144. daddiu TEMP, KK, 1
  5145. #else
  5146. daddiu TEMP, KK, 2
  5147. #endif
  5148. dsra L, TEMP, 1
  5149. blez L, .L212
  5150. NOP
  5151. #else
  5152. move BO, B # Reset B
  5153. dsra L, K, 1 # UnRoll K=4
  5154. MTC $0, C11 # CLEAR REAULTS REGISTERS
  5155. MOV C12, C11
  5156. LD A1, 0 * SIZE(AO)
  5157. MOV C21, C11
  5158. MOV C22, C11
  5159. MOV C31, C11
  5160. MOV C32, C11
  5161. LD B1, 0 * SIZE(BO)
  5162. MOV C41, C11
  5163. MOV C42, C11
  5164. LD B2, 1 * SIZE(BO)
  5165. MOV C43, C11
  5166. blez L, .L212
  5167. MOV C44, C11
  5168. #endif
  5169. .align 4
  5170. .L2110:
  5171. daddiu L, L, -1
  5172. MADD C11, C11, A1, B1
  5173. LD A2, 1 * SIZE(AO)
  5174. MADD C12, C12, A1, B2
  5175. LD B3, 2 * SIZE(BO)
  5176. LD B4, 3 * SIZE(BO)
  5177. daddiu AO, AO, 2 * SIZE
  5178. daddiu BO, BO, 4 * SIZE
  5179. MADD C11, C11, A2, B3
  5180. LD A1, 0 * SIZE(AO)
  5181. MADD C12, C12, A2, B4
  5182. LD B1, 0 * SIZE(BO)
  5183. bgtz L, .L2110
  5184. LD B2, 1 * SIZE(BO)
  5185. .align 4
  5186. .L212:
  5187. #ifndef TRMMKERNEL
  5188. andi L, K, 1
  5189. #else
  5190. andi L, TEMP, 1
  5191. #endif
  5192. blez L, .L210
  5193. LD ALPHA, 152($sp)
  5194. MADD C11, C11, A1, B1
  5195. MADD C12, C12, A1, B2
  5196. daddiu AO, AO, 1 * SIZE
  5197. daddiu BO, BO, 2 * SIZE
  5198. .align 4
  5199. .L210: # Write Back
  5200. #ifndef TRMMKERNEL
  5201. LD A1, 0 * SIZE(CO1)
  5202. MADD A1, A1, C11, ALPHA
  5203. LD B1, 0 * SIZE(CO2)
  5204. MADD B1, B1, C12, ALPHA
  5205. ST A1, 0 * SIZE(CO1)
  5206. ST B1, 0 * SIZE(CO2)
  5207. daddiu CO1, CO1, 1 * SIZE
  5208. daddiu CO2, CO2, 1 * SIZE
  5209. #else
  5210. MUL A1, C11, ALPHA
  5211. MUL B1, C12, ALPHA
  5212. ST A1, 0 * SIZE(CO1)
  5213. ST B1, 0 * SIZE(CO2)
  5214. daddiu CO1, CO1, 1 * SIZE
  5215. daddiu CO2, CO2, 1 * SIZE
  5216. #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
  5217. dsubu TEMP, K, KK
  5218. #ifdef LEFT
  5219. daddiu TEMP, TEMP, -1
  5220. #else
  5221. daddiu TEMP, TEMP, -2
  5222. #endif
  5223. dsll L, TEMP, BASE_SHIFT
  5224. dsll TEMP, TEMP, 1 + BASE_SHIFT
  5225. daddu AO, AO, L
  5226. daddu BO, BO, TEMP
  5227. #endif
  5228. #ifdef LEFT
  5229. daddiu KK, KK, 1
  5230. #endif
  5231. #endif
  5232. .align 4
  5233. .L20:
  5234. #if defined(TRMMKERNEL) && !defined(LEFT)
  5235. daddiu KK, KK, 2
  5236. #endif
  5237. move B, BO
  5238. .align 4
  5239. .L1:
  5240. andi J, N, 1
  5241. blez J, .L999
  5242. NOP
  5243. .L18:
  5244. dsra I, M, 3 # MR=8
  5245. move AO, A # Reset A
  5246. #if defined(TRMMKERNEL) && defined(LEFT)
  5247. move KK, OFFSET
  5248. #endif
  5249. blez I, .L14
  5250. NOP
  5251. .align 4
  5252. .L181:
  5253. #if defined(TRMMKERNEL)
  5254. #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
  5255. move BO, B # Reset B
  5256. #else
  5257. dsll L, KK, 3 + BASE_SHIFT
  5258. dsll TEMP, KK, BASE_SHIFT
  5259. daddu AO, AO, L
  5260. daddu BO, B, TEMP
  5261. #endif
  5262. MTC $0, C11 # CLEAR REAULTS REGISTERS
  5263. LD A1, 0 * SIZE(AO)
  5264. MOV C12, C11
  5265. LD A2, 1 * SIZE(AO)
  5266. MOV C21, C11
  5267. LD A3, 2 * SIZE(AO)
  5268. MOV C22, C11
  5269. LD A4, 3 * SIZE(AO)
  5270. MOV C31, C11
  5271. LD A5, 4 * SIZE(AO)
  5272. MOV C32, C11
  5273. LD A6, 5 * SIZE(AO)
  5274. MOV C41, C11
  5275. LD B1, 0 * SIZE(BO)
  5276. MOV C42, C11
  5277. LD A7, 6 * SIZE(AO)
  5278. MOV C13, C11
  5279. LD A8, 7 * SIZE(AO)
  5280. MOV C14, C11
  5281. MOV C23, C11
  5282. MOV C24, C11
  5283. MOV C33, C11
  5284. MOV C34, C11
  5285. MOV C43, C11
  5286. MOV C44, C11
  5287. #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
  5288. dsubu TEMP, K, KK
  5289. #elif defined(LEFT)
  5290. daddiu TEMP, KK, 8
  5291. #else
  5292. daddiu TEMP, KK, 1
  5293. #endif
  5294. dsra L, TEMP, 1
  5295. blez L, .L182
  5296. NOP
  5297. #else
  5298. move BO, B # Reset B
  5299. dsra L, K, 1 # UnRoll K=4
  5300. MTC $0, C11 # CLEAR REAULTS REGISTERS
  5301. LD A1, 0 * SIZE(AO)
  5302. MOV C12, C11
  5303. LD A2, 1 * SIZE(AO)
  5304. MOV C21, C11
  5305. LD A3, 2 * SIZE(AO)
  5306. MOV C22, C11
  5307. LD A4, 3 * SIZE(AO)
  5308. MOV C31, C11
  5309. LD A5, 4 * SIZE(AO)
  5310. MOV C32, C11
  5311. LD A6, 5 * SIZE(AO)
  5312. MOV C41, C11
  5313. LD B1, 0 * SIZE(BO)
  5314. MOV C42, C11
  5315. LD A7, 6 * SIZE(AO)
  5316. MOV C13, C11
  5317. LD A8, 7 * SIZE(AO)
  5318. MOV C14, C11
  5319. MOV C23, C11
  5320. MOV C24, C11
  5321. MOV C33, C11
  5322. MOV C34, C11
  5323. MOV C43, C11
  5324. blez L, .L182
  5325. MOV C44, C11
  5326. #endif
  5327. .align 4
  5328. .L1810:
  5329. daddiu L, L, -1
  5330. MADD C11, C11, A1, B1
  5331. LD B5, 8 * SIZE(AO)
  5332. MADD C21, C21, A2, B1
  5333. LD B6, 9 * SIZE(AO)
  5334. MADD C31, C31, A3, B1
  5335. LD B7, 10 * SIZE(AO)
  5336. MADD C41, C41, A4, B1
  5337. LD B8, 11 * SIZE(AO)
  5338. MADD C13, C13, A5, B1
  5339. LD B2, 1 * SIZE(BO)
  5340. daddiu BO, BO, 2 * SIZE
  5341. MADD C23, C23, A6, B1
  5342. LD A1, 12 * SIZE(AO)
  5343. MADD C33, C33, A7, B1
  5344. LD A2, 13 * SIZE(AO)
  5345. MADD C43, C43, A8, B1
  5346. LD A3, 14 * SIZE(AO)
  5347. LD A4, 15 * SIZE(AO)
  5348. daddiu AO, AO, 16 * SIZE
  5349. MADD C11, C11, B5, B2
  5350. LD A5, 4 * SIZE(AO)
  5351. MADD C21, C21, B6, B2
  5352. LD A6, 5 * SIZE(AO)
  5353. MADD C13, C13, A1, B2
  5354. LD A7, 6 * SIZE(AO)
  5355. MADD C23, C23, A2, B2
  5356. LD A8, 7 * SIZE(AO)
  5357. MADD C33, C33, A3, B2
  5358. LD B1, 0 * SIZE(BO)
  5359. MADD C43, C43, A4, B2
  5360. LD A1, 0 * SIZE(AO)
  5361. MADD C31, C31, B7, B2
  5362. LD A2, 1 * SIZE(AO)
  5363. MADD C41, C41, B8, B2
  5364. LD A3, 2 * SIZE(AO)
  5365. bgtz L, .L1810
  5366. LD A4, 3 * SIZE(AO)
  5367. .align 4
  5368. .L182:
  5369. #ifndef TRMMKERNEL
  5370. andi L, K, 1
  5371. #else
  5372. andi L, TEMP, 1
  5373. #endif
  5374. blez L, .L180
  5375. LD ALPHA, 152($sp)
  5376. MADD C13, C13, A5, B1
  5377. MADD C23, C23, A6, B1
  5378. MADD C33, C33, A7, B1
  5379. MADD C43, C43, A8, B1
  5380. daddiu AO, AO, 8 * SIZE
  5381. MADD C11, C11, A1, B1
  5382. MADD C21, C21, A2, B1
  5383. MADD C31, C31, A3, B1
  5384. MADD C41, C41, A4, B1
  5385. daddiu BO, BO, 1 * SIZE
  5386. .align 4
  5387. .L180: # Write Back
  5388. #ifndef TRMMKERNEL
  5389. daddiu I, I, -1
  5390. LD A1, 0 * SIZE(C)
  5391. LD A2, 1 * SIZE(C)
  5392. LD A3, 2 * SIZE(C)
  5393. LD A4, 3 * SIZE(C)
  5394. LD A5, 4 * SIZE(C)
  5395. LD A6, 5 * SIZE(C)
  5396. LD A7, 6 * SIZE(C)
  5397. LD A8, 7 * SIZE(C)
  5398. MADD A1, A1, C11, ALPHA
  5399. MADD A2, A2, C21, ALPHA
  5400. MADD A3, A3, C31, ALPHA
  5401. MADD A4, A4, C41, ALPHA
  5402. MADD A5, A5, C13, ALPHA
  5403. MADD A6, A6, C23, ALPHA
  5404. MADD A7, A7, C33, ALPHA
  5405. MADD A8, A8, C43, ALPHA
  5406. ST A1, 0 * SIZE(C)
  5407. ST A2, 1 * SIZE(C)
  5408. ST A3, 2 * SIZE(C)
  5409. ST A4, 3 * SIZE(C)
  5410. ST A5, 4 * SIZE(C)
  5411. ST A6, 5 * SIZE(C)
  5412. ST A7, 6 * SIZE(C)
  5413. ST A8, 7 * SIZE(C)
  5414. daddiu C, C, 8 * SIZE
  5415. bgtz I, .L181
  5416. NOP
  5417. #else
  5418. daddiu I, I, -1
  5419. MUL A1, C11, ALPHA
  5420. MUL A2, C21, ALPHA
  5421. MUL A3, C31, ALPHA
  5422. MUL A4, C41, ALPHA
  5423. MUL A5, C13, ALPHA
  5424. MUL A6, C23, ALPHA
  5425. MUL A7, C33, ALPHA
  5426. MUL A8, C43, ALPHA
  5427. ST A1, 0 * SIZE(C)
  5428. ST A2, 1 * SIZE(C)
  5429. ST A3, 2 * SIZE(C)
  5430. ST A4, 3 * SIZE(C)
  5431. ST A5, 4 * SIZE(C)
  5432. ST A6, 5 * SIZE(C)
  5433. ST A7, 6 * SIZE(C)
  5434. ST A8, 7 * SIZE(C)
  5435. #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
  5436. dsubu TEMP, K, KK
  5437. #ifdef LEFT
  5438. daddiu TEMP, TEMP, -8
  5439. #else
  5440. daddiu TEMP, TEMP, -1
  5441. #endif
  5442. dsll L, TEMP, 3 + BASE_SHIFT
  5443. dsll TEMP, TEMP, BASE_SHIFT
  5444. daddu AO, AO, L
  5445. daddu BO, BO, TEMP
  5446. #endif
  5447. #ifdef LEFT
  5448. daddiu KK, KK, 8
  5449. #endif
  5450. daddiu C, C, 8 * SIZE
  5451. bgtz I, .L181
  5452. NOP
  5453. #endif
  5454. .align 4
  5455. .L14:
  5456. andi I, M, 4 # MR=4
  5457. blez I, .L12
  5458. NOP
  5459. .align 4
  5460. .L141:
  5461. #if defined(TRMMKERNEL)
  5462. #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
  5463. move BO, B
  5464. #else
  5465. dsll L, KK, 2 + BASE_SHIFT
  5466. dsll TEMP, KK, BASE_SHIFT
  5467. daddu AO, AO, L
  5468. daddu BO, B, TEMP
  5469. #endif
  5470. MTC $0, C11 # CLEAR REAULTS REGISTERS
  5471. MOV C12, C11
  5472. LD A1, 0 * SIZE(AO)
  5473. MOV C21, C11
  5474. MOV C22, C11
  5475. LD A2, 1 * SIZE(AO)
  5476. MOV C31, C11
  5477. MOV C32, C11
  5478. LD A3, 2 * SIZE(AO)
  5479. MOV C41, C11
  5480. MOV C42, C11
  5481. LD A4, 3 * SIZE(AO)
  5482. MOV C13, C11
  5483. MOV C14, C11
  5484. LD B1, 0 * SIZE(BO)
  5485. MOV C23, C11
  5486. MOV C24, C11
  5487. MOV C33, C11
  5488. MOV C34, C11
  5489. MOV C43, C11
  5490. MOV C44, C11
  5491. #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
  5492. dsubu TEMP, K, KK
  5493. #elif defined(LEFT)
  5494. daddiu TEMP, KK, 4
  5495. #else
  5496. daddiu TEMP, KK, 1
  5497. #endif
  5498. dsra L, TEMP, 1
  5499. blez L, .L142
  5500. NOP
  5501. #else
  5502. move BO, B # Reset B
  5503. dsra L, K, 1 # UnRoll K=4
  5504. MTC $0, C11 # CLEAR REAULTS REGISTERS
  5505. MOV C12, C11
  5506. LD A1, 0 * SIZE(AO)
  5507. MOV C21, C11
  5508. MOV C22, C11
  5509. LD A2, 1 * SIZE(AO)
  5510. MOV C31, C11
  5511. MOV C32, C11
  5512. LD A3, 2 * SIZE(AO)
  5513. MOV C41, C11
  5514. MOV C42, C11
  5515. LD A4, 3 * SIZE(AO)
  5516. MOV C13, C11
  5517. MOV C14, C11
  5518. LD B1, 0 * SIZE(BO)
  5519. MOV C23, C11
  5520. MOV C24, C11
  5521. MOV C33, C11
  5522. MOV C34, C11
  5523. MOV C43, C11
  5524. blez L, .L142
  5525. MOV C44, C11
  5526. #endif
  5527. .align 4
  5528. .L1410:
  5529. daddiu L, L, -1
  5530. MADD C11, C11, A1, B1
  5531. LD A5, 4 * SIZE(AO)
  5532. MADD C21, C21, A2, B1
  5533. LD B3, 1 * SIZE(BO)
  5534. MADD C31, C31, A3, B1
  5535. LD A6, 5 * SIZE(AO)
  5536. daddiu BO, BO, 2 * SIZE
  5537. MADD C41, C41, A4, B1
  5538. LD A7, 6 * SIZE(AO)
  5539. LD A8, 7 * SIZE(AO)
  5540. daddiu AO, AO, 8 * SIZE
  5541. MADD C11, C11, A5, B3
  5542. LD A1, 0 * SIZE(AO)
  5543. MADD C21, C21, A6, B3
  5544. LD B1, 0 * SIZE(BO)
  5545. MADD C31, C31, A7, B3
  5546. LD A2, 1 * SIZE(AO)
  5547. MADD C41, C41, A8, B3
  5548. LD A3, 2 * SIZE(AO)
  5549. bgtz L, .L1410
  5550. LD A4, 3 * SIZE(AO)
  5551. .align 4
  5552. .L142:
  5553. #ifndef TRMMKERNEL
  5554. andi L, K, 1
  5555. #else
  5556. andi L, TEMP, 1
  5557. #endif
  5558. blez L, .L140
  5559. LD ALPHA, 152($sp)
  5560. MADD C11, C11, A1, B1
  5561. MADD C21, C21, A2, B1
  5562. MADD C31, C31, A3, B1
  5563. MADD C41, C41, A4, B1
  5564. daddiu AO, AO, 4 * SIZE
  5565. daddiu BO, BO, 1 * SIZE
  5566. .align 4
  5567. .L140: # Write Back
  5568. #ifndef TRMMKERNEL
  5569. LD A1, 0 * SIZE(C)
  5570. LD A2, 1 * SIZE(C)
  5571. LD A3, 2 * SIZE(C)
  5572. LD A4, 3 * SIZE(C)
  5573. MADD A1, A1, C11, ALPHA
  5574. MADD A2, A2, C21, ALPHA
  5575. MADD A3, A3, C31, ALPHA
  5576. MADD A4, A4, C41, ALPHA
  5577. ST A1, 0 * SIZE(C)
  5578. ST A2, 1 * SIZE(C)
  5579. ST A3, 2 * SIZE(C)
  5580. ST A4, 3 * SIZE(C)
  5581. daddiu C, C, 4 * SIZE
  5582. #else
  5583. MUL A1, C11, ALPHA
  5584. MUL A2, C21, ALPHA
  5585. MUL A3, C31, ALPHA
  5586. MUL A4, C41, ALPHA
  5587. ST A1, 0 * SIZE(C)
  5588. ST A2, 1 * SIZE(C)
  5589. ST A3, 2 * SIZE(C)
  5590. ST A4, 3 * SIZE(C)
  5591. daddiu C, C, 4 * SIZE
  5592. #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
  5593. dsubu TEMP, K, KK
  5594. #ifdef LEFT
  5595. daddiu TEMP, TEMP, -4
  5596. #else
  5597. daddiu TEMP, TEMP, -1
  5598. #endif
  5599. dsll L, TEMP, 2 + BASE_SHIFT
  5600. dsll TEMP, TEMP, BASE_SHIFT
  5601. daddu AO, AO, L
  5602. daddu BO, BO, TEMP
  5603. #endif
  5604. #ifdef LEFT
  5605. daddiu KK, KK, 4
  5606. #endif
  5607. #endif
  5608. .align 4
  5609. .L12:
  5610. andi I, M, 2
  5611. blez I, .L11
  5612. NOP
  5613. .align 4
  5614. .L121:
  5615. #if defined(TRMMKERNEL)
  5616. #if (defined(LEFT) && defined(TRANSA)) ||\
  5617. (!defined(LEFT) && !defined(TRANSA))
  5618. move BO, B # Reset B
  5619. #else
  5620. dsll L, KK, 1 + BASE_SHIFT
  5621. dsll TEMP, KK, BASE_SHIFT
  5622. daddu AO, AO, L
  5623. daddu BO, B, TEMP
  5624. #endif
  5625. MTC $0, C11 # CLEAR REAULTS REGISTERS
  5626. MOV C12, C11
  5627. LD A1, 0 * SIZE(AO)
  5628. MOV C21, C11
  5629. MOV C22, C11
  5630. LD A2, 1 * SIZE(AO)
  5631. MOV C31, C11
  5632. MOV C32, C11
  5633. LD B1, 0 * SIZE(BO)
  5634. MOV C41, C11
  5635. MOV C42, C11
  5636. MOV C43, C11
  5637. MOV C44, C11
  5638. #if (defined(LEFT) && !defined(TRANSA)) ||\
  5639. (!defined(LEFT) && defined(TRANSA))
  5640. dsubu TEMP, K, KK
  5641. #elif defined(LEFT)
  5642. daddiu TEMP, KK, 2
  5643. #else
  5644. daddiu TEMP, KK, 1
  5645. #endif
  5646. dsra L, TEMP, 1
  5647. blez L, .L122
  5648. NOP
  5649. #else
  5650. move BO, B # Reset B
  5651. dsra L, K, 1 # UnRoll K=4
  5652. MTC $0, C11 # CLEAR REAULTS REGISTERS
  5653. MOV C12, C11
  5654. LD A1, 0 * SIZE(AO)
  5655. MOV C21, C11
  5656. MOV C22, C11
  5657. LD A2, 1 * SIZE(AO)
  5658. MOV C31, C11
  5659. MOV C32, C11
  5660. LD B1, 0 * SIZE(BO)
  5661. MOV C41, C11
  5662. MOV C42, C11
  5663. MOV C43, C11
  5664. blez L, .L122
  5665. MOV C44, C11
  5666. #endif
  5667. .align 4
  5668. .L1210:
  5669. daddiu L, L, -1
  5670. MADD C11, C11, A1, B1
  5671. LD B3, 1 * SIZE(BO)
  5672. MADD C21, C21, A2, B1
  5673. daddiu BO, BO, 2 * SIZE
  5674. LD A3, 2 * SIZE(AO)
  5675. LD A4, 3 * SIZE(AO)
  5676. daddiu AO, AO, 4 * SIZE
  5677. MADD C11, C11, A3, B3
  5678. LD B1, 0 * SIZE(BO)
  5679. MADD C21, C21, A4, B3
  5680. LD A1, 0 * SIZE(AO)
  5681. bgtz L, .L1210
  5682. LD A2, 1 * SIZE(AO)
  5683. .align 4
  5684. .L122:
  5685. #ifndef TRMMKERNEL
  5686. andi L, K, 1
  5687. #else
  5688. andi L, TEMP, 1
  5689. #endif
  5690. blez L, .L120
  5691. LD ALPHA, 152($sp)
  5692. MADD C11, C11, A1, B1
  5693. MADD C21, C21, A2, B1
  5694. daddiu AO, AO, 2 * SIZE
  5695. daddiu BO, BO, 1 * SIZE
  5696. .align 4
  5697. .L120: # Write Back
  5698. #ifndef TRMMKERNEL
  5699. LD A1, 0 * SIZE(C)
  5700. LD A2, 1 * SIZE(C)
  5701. MADD A1, A1, C11, ALPHA
  5702. MADD A2, A2, C21, ALPHA
  5703. ST A1, 0 * SIZE(C)
  5704. ST A2, 1 * SIZE(C)
  5705. daddiu C, C, 2 * SIZE
  5706. #else
  5707. MUL A1, C11, ALPHA
  5708. MUL A2, C21, ALPHA
  5709. ST A1, 0 * SIZE(C)
  5710. ST A2, 1 * SIZE(C)
  5711. daddiu C, C, 2 * SIZE
  5712. #if ( defined(LEFT) && defined(TRANSA))||\
  5713. (!defined(LEFT) && !defined(TRANSA))
  5714. dsubu TEMP, K, KK
  5715. #ifdef LEFT
  5716. daddiu TEMP, TEMP, -2
  5717. #else
  5718. daddiu TEMP, TEMP, -1
  5719. #endif
  5720. dsll L, TEMP, 1 + BASE_SHIFT
  5721. dsll TEMP, TEMP, BASE_SHIFT
  5722. daddu AO, AO, L
  5723. daddu BO, BO, TEMP
  5724. #endif
  5725. #ifdef LEFT
  5726. daddiu KK, KK, 2
  5727. #endif
  5728. #endif
  5729. .align 4
  5730. .L11:
  5731. andi I, M, 1
  5732. blez I, .L10
  5733. NOP
  5734. .align 4
  5735. .L111:
  5736. #if defined(TRMMKERNEL)
  5737. #if (defined(LEFT) && defined(TRANSA))||\
  5738. (!defined(LEFT) && !defined(TRANSA))
  5739. move BO, B
  5740. #else
  5741. dsll L, KK, BASE_SHIFT
  5742. daddu AO, AO, L
  5743. daddu BO, B, L
  5744. #endif
  5745. MTC $0, C11 # CLEAR REAULTS REGISTERS
  5746. MOV C12, C11
  5747. LD A1, 0 * SIZE(AO)
  5748. MOV C21, C11
  5749. MOV C22, C11
  5750. LD B1, 0 * SIZE(BO)
  5751. MOV C31, C11
  5752. MOV C32, C11
  5753. #if (defined(LEFT) && !defined(TRANSA))||\
  5754. (!defined(LEFT) && defined(TRANSA))
  5755. dsubu TEMP, K, KK
  5756. #elif defined(LEFT)
  5757. daddiu TEMP, KK, 1
  5758. #else
  5759. daddiu TEMP, KK, 1
  5760. #endif
  5761. dsra L, TEMP, 1
  5762. blez L, .L112
  5763. NOP
  5764. #else
  5765. move BO, B # Reset B
  5766. dsra L, K, 1 # UnRoll K=4
  5767. MTC $0, C11 # CLEAR REAULTS REGISTERS
  5768. MOV C12, C11
  5769. LD A1, 0 * SIZE(AO)
  5770. MOV C21, C11
  5771. MOV C22, C11
  5772. LD B1, 0 * SIZE(BO)
  5773. MOV C31, C11
  5774. blez L, .L112
  5775. MOV C32, C11
  5776. #endif
  5777. .align 4
  5778. .L1110:
  5779. daddiu L, L, -1
  5780. MADD C11, C11, A1, B1
  5781. LD A2, 1 * SIZE(AO)
  5782. LD B2, 1 * SIZE(BO)
  5783. daddiu AO, AO, 2 * SIZE
  5784. daddiu BO, BO, 2 * SIZE
  5785. MADD C11, C11, A2, B2
  5786. LD A1, 0 * SIZE(AO)
  5787. LD B1, 0 * SIZE(BO)
  5788. bgtz L, .L1110
  5789. NOP
  5790. .align 4
  5791. .L112:
  5792. #ifndef TRMMKERNEL
  5793. andi L, K, 1
  5794. #else
  5795. andi L, TEMP, 1
  5796. #endif
  5797. blez L, .L110
  5798. LD ALPHA, 152($sp)
  5799. MADD C11, C11, A1, B1
  5800. daddiu AO, AO, 1 * SIZE
  5801. daddiu BO, BO, 1 * SIZE
  5802. .align 4
  5803. .L110: # Write Back
  5804. #ifndef TRMMKERNEL
  5805. LD A1, 0 * SIZE(C)
  5806. MADD A1, A1, C11, ALPHA
  5807. ST A1, 0 * SIZE(C)
  5808. daddiu C, C, 1 * SIZE
  5809. #else
  5810. MUL A1, C11, ALPHA
  5811. ST A1, 0 * SIZE(C)
  5812. daddiu C, C, 1 * SIZE
  5813. #endif
  5814. .align 4
  5815. .L10:
  5816. move B, BO
  5817. NOP
  5818. .L999:
  5819. ld $16, 0($sp)
  5820. ld $17, 8($sp)
  5821. ld $18, 16($sp)
  5822. ld $19, 24($sp)
  5823. ld $20, 32($sp)
  5824. ld $21, 40($sp)
  5825. ld $22, 48($sp)
  5826. ldc1 $f24, 56($sp)
  5827. ldc1 $f25, 64($sp)
  5828. ldc1 $f26, 72($sp)
  5829. ldc1 $f27, 80($sp)
  5830. ldc1 $f28, 88($sp)
  5831. #if defined(TRMMKERNEL)
  5832. ld $23, 96($sp)
  5833. ld $24, 104($sp)
  5834. ld $25, 112($sp)
  5835. #endif
  5836. #ifndef __64BIT__
  5837. ldc1 $f20,120($sp)
  5838. ldc1 $f21,128($sp)
  5839. ldc1 $f22,136($sp)
  5840. ldc1 $f23,144($sp)
  5841. #endif
  5842. daddiu $sp,$sp,STACKSIZE
  5843. j $31
  5844. nop
  5845. EPILOGUE
  5846. # .set macro
  5847. # .set reorder
  5848. # .end gemm
  5849. # .size gemm, .-gemm
  5850. # .ident "GCC: (Debian 4.4.6-6) 4.4.6"