You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ztrsm_kernel_RT.S 171 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #ifdef DOUBLE
  41. #define PREFETCHSIZE (16 * 8)
  42. #else
  43. #define PREFETCHSIZE (32 * 8)
  44. #endif
  45. #ifndef LN
  46. #define CPREFETCHSIZE 7
  47. #else
  48. #define CPREFETCHSIZE -8
  49. #endif
  50. #define CPREFETCH lfetch.excl.nt1
  51. #define M r32
  52. #define N r33
  53. #define K r34
  54. #define A r37
  55. #define B r38
  56. #define C r39
  57. #define LDC r35
  58. #define I r15
  59. #define J r16
  60. #define AOFFSET r17
  61. #define BOFFSET r18
  62. #define TEMP r19
  63. #define L r20
  64. #define C1 r21
  65. #define C2 r22
  66. #define C3 r23
  67. #define C4 r24
  68. #define C5 r25
  69. #define C6 r26
  70. #define C7 r27
  71. #define C8 r28
  72. #define PREA r8
  73. #define PREB r9
  74. #define PREC r10
  75. #define SP r12
  76. #define ARLC r29
  77. #define PR r30
  78. #define ARPFS r31
  79. #define ALPHA_R f8
  80. #define ALPHA_I f9
  81. #define AORIG loc0
  82. #define KK loc1
  83. #define KK8 loc2
  84. #define OFFSET loc3
  85. #define AOFFSET2 loc4
  86. #define BOFFSET2 loc5
  87. #ifndef CONJ
  88. #define FCALC_A FSUB
  89. #define FCALC_B FADD
  90. #define FMA_A FNMA
  91. #define FMA_B FMA
  92. #else
  93. #define FCALC_A FADD
  94. #define FCALC_B FSUB
  95. #define FMA_A FMA
  96. #define FMA_B FNMA
  97. #endif
  98. #ifndef CONJ
  99. #define FCALC_C FMA
  100. #define FCALC_D FNMA
  101. #else
  102. #define FCALC_C FNMA
  103. #define FCALC_D FMA
  104. #endif
  105. #ifndef CONJ
  106. #define FMA_C FNMA
  107. #define FMA_D FMA
  108. #define FSUB_A FSUB
  109. #else
  110. #define FMA_C FMA
  111. #define FMA_D FMS
  112. #define FSUB_A FADD
  113. #endif
  114. PROLOGUE
  115. .prologue
  116. PROFCODE
  117. { .mfi
  118. .save ar.pfs, ARPFS
  119. alloc ARPFS = ar.pfs, 8, 8, 0, 0
  120. mov f64 = f0
  121. adds r14 = 16, SP
  122. }
  123. { .mfi
  124. nop __LINE__
  125. mov f65 = f0
  126. adds r15 = 24, SP
  127. }
  128. ;;
  129. { .mfi
  130. ld8 LDC = [r14]
  131. mov f81 = f0
  132. mov PR = pr
  133. }
  134. { .mfi
  135. ld8 OFFSET = [r15]
  136. mov f96 = f0
  137. }
  138. ;;
  139. { .mfi
  140. shladd LDC = LDC, ZBASE_SHIFT, r0
  141. mov f97 = f0
  142. }
  143. { .mfi
  144. nop __LINE__
  145. mov f113 = f0
  146. }
  147. ;;
  148. #ifdef LN
  149. { .mmi
  150. setf.sig f32 = M
  151. setf.sig f33 = K
  152. shladd C = M, ZBASE_SHIFT, C
  153. }
  154. ;;
  155. {.mmf
  156. nop __LINE__
  157. nop __LINE__
  158. xmpy.l f32 = f32, f33
  159. }
  160. ;;
  161. { .mmi
  162. getf.sig r2 = f32
  163. ;;
  164. nop __LINE__
  165. shladd A = r2, ZBASE_SHIFT, A
  166. }
  167. ;;
  168. #endif
  169. #ifdef RN
  170. sub KK = r0, OFFSET
  171. #endif
  172. #ifdef RT
  173. { .mmi
  174. setf.sig f32 = N
  175. setf.sig f33 = K
  176. nop __LINE__
  177. }
  178. ;;
  179. { .mmi
  180. setf.sig f34 = LDC
  181. nop __LINE__
  182. nop __LINE__
  183. }
  184. ;;
  185. { .mmf
  186. nop __LINE__
  187. nop __LINE__
  188. xmpy.l f33 = f32, f33
  189. }
  190. { .mmf
  191. nop __LINE__
  192. sub KK = N, OFFSET
  193. xmpy.l f34 = f32, f34
  194. }
  195. ;;
  196. { .mmi
  197. getf.sig r2 = f33
  198. getf.sig r3 = f34
  199. }
  200. ;;
  201. shladd B = r2, ZBASE_SHIFT, B
  202. add C = r3, C
  203. #endif
  204. ;;
  205. .body
  206. { .mfi
  207. nop __LINE__
  208. mov f80 = f0
  209. mov ARLC = ar.lc
  210. }
  211. { .mfb
  212. mov f112 = f0
  213. }
  214. ;;
  215. ;;
  216. shr I = M, 2
  217. tbit.z p6, p0 = N, 0
  218. (p6) br.cond.dpnt .L050
  219. ;;
  220. #ifdef RT
  221. { .mmi
  222. shl r2 = K, ZBASE_SHIFT
  223. }
  224. ;;
  225. { .mmi
  226. sub B = B, r2
  227. sub C = C, LDC
  228. nop __LINE__
  229. }
  230. ;;
  231. #endif
  232. mov C1 = C
  233. #ifdef LN
  234. add KK = M, OFFSET
  235. #elif defined LT
  236. mov KK = OFFSET
  237. #else
  238. nop __LINE__
  239. #endif
  240. ;;
  241. #if defined(LN) || defined(RT)
  242. mov AORIG = A
  243. #else
  244. mov AOFFSET = A
  245. #endif
  246. ;;
  247. #if defined(LT) || defined(RN)
  248. mov L = KK
  249. #else
  250. sub L = K, KK
  251. #endif
  252. ;;
  253. { .mib
  254. cmp.eq p6, p7 = 0, I
  255. #ifndef RT
  256. add C = LDC, C
  257. #else
  258. nop __LINE__
  259. #endif
  260. (p6) br.cond.dpnt .L100
  261. }
  262. ;;
  263. .align 16
  264. .L092:
  265. { .mmi
  266. cmp.ne p7, p0 = r0, L
  267. adds BOFFSET = 0 * SIZE, B
  268. shl r2 = K, 2 + ZBASE_SHIFT
  269. }
  270. { .mmi
  271. shladd r3 = KK, ZBASE_SHIFT, r0
  272. nop __LINE__
  273. nop __LINE__
  274. }
  275. ;;
  276. #if defined(LT) || defined(RN)
  277. { .mfb
  278. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  279. mov f66 = f0
  280. nop __LINE__
  281. }
  282. { .mmf
  283. nop __LINE__
  284. nop __LINE__
  285. mov f67 = f0
  286. }
  287. ;;
  288. #else
  289. { .mfi
  290. add BOFFSET = r3, B
  291. mov f66 = f0
  292. #ifdef LN
  293. sub AORIG = AORIG, r2
  294. #else
  295. nop __LINE__
  296. #endif
  297. }
  298. ;;
  299. { .mfi
  300. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  301. mov f67 = f0
  302. shladd AOFFSET = r3, 2, AORIG
  303. }
  304. ;;
  305. #endif
  306. ;;
  307. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  308. adds L = 1, L
  309. ;;
  310. { .mfi
  311. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  312. tbit.z p12, p0 = L, 0
  313. }
  314. { .mfi
  315. adds PREC = CPREFETCHSIZE * SIZE, C1
  316. shr L = L, 1
  317. }
  318. ;;
  319. { .mfi
  320. adds PREA = (PREFETCHSIZE + 0) * SIZE, AOFFSET
  321. adds L = -1, L
  322. }
  323. { .mmf
  324. (p7) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  325. CPREFETCH [PREC]
  326. }
  327. ;;
  328. { .mfi
  329. (p7) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  330. mov ar.lc = L
  331. }
  332. { .mmi
  333. adds C5 = 4 * SIZE, C1
  334. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  335. cmp.eq p3, p0 = r0, r0
  336. }
  337. ;;
  338. cmp.eq p6, p0 = -1, L
  339. (p6) br.cond.dpnt .L098
  340. ;;
  341. .align 16
  342. .L093:
  343. /* 1 */
  344. { .mfi
  345. lfetch.nt1 [PREA], 16 * SIZE
  346. FMA f64 = f32, f48, f64 // A1 * B1
  347. cmp.ne p4, p5 = 0, L
  348. }
  349. { .mfi
  350. nop __LINE__
  351. FMA_B f65 = f32, f49, f65 // A1 * B2
  352. (p12) cmp.ne p3, p0 = 0, L
  353. }
  354. ;;
  355. { .mfi
  356. lfetch.nt1 [PREB], 4 * SIZE
  357. FMA f80 = f34, f48, f80 // A3 * B1
  358. nop __LINE__
  359. }
  360. { .mfi
  361. nop __LINE__
  362. FMA_B f81 = f34, f49, f81 // A3 * B2
  363. nop __LINE__
  364. }
  365. ;;
  366. { .mfi
  367. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  368. FMA f96 = f36, f48, f96 // A5 * B1
  369. nop __LINE__
  370. }
  371. { .mfi
  372. nop __LINE__
  373. FMA_B f97 = f36, f49, f97 // A5 * B2
  374. nop __LINE__
  375. }
  376. ;;
  377. { .mfb
  378. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  379. FMA f112 = f38, f48, f112 // A7 * B1
  380. nop __LINE__
  381. }
  382. { .mfb
  383. nop __LINE__
  384. FMA_B f113 = f38, f49, f113 // A7 * B2
  385. nop __LINE__
  386. }
  387. ;;
  388. { .mfb
  389. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  390. FMA f65 = f33, f48, f65 // A2 * B1
  391. nop __LINE__
  392. }
  393. { .mfb
  394. nop __LINE__
  395. FMA_A f64 = f33, f49, f64 // A2 * B2
  396. nop __LINE__
  397. }
  398. ;;
  399. { .mfb
  400. (p3) LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  401. FMA f81 = f35, f48, f81 // A4 * B1
  402. nop __LINE__
  403. }
  404. { .mfb
  405. nop __LINE__
  406. FMA_A f80 = f35, f49, f80 // A4 * B2
  407. nop __LINE__
  408. }
  409. ;;
  410. { .mfb
  411. (p3) LDFPD f46, f47 = [AOFFSET], 2 * SIZE
  412. FMA f97 = f37, f48, f97 // A6 * B1
  413. nop __LINE__
  414. }
  415. { .mfb
  416. nop __LINE__
  417. FMA_A f96 = f37, f49, f96 // A6 * B2
  418. nop __LINE__
  419. }
  420. ;;
  421. { .mfb
  422. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  423. FMA f113 = f39, f48, f113 // A8 * B1
  424. nop __LINE__
  425. }
  426. { .mfb
  427. nop __LINE__
  428. FMA_A f112 = f39, f49, f112 // A8 * B2
  429. nop __LINE__
  430. }
  431. ;;
  432. { .mfb
  433. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  434. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  435. nop __LINE__
  436. }
  437. { .mfb
  438. nop __LINE__
  439. (p3) FMA_B f65 = f40, f57, f65 // A1 * B2
  440. nop __LINE__
  441. }
  442. ;;
  443. { .mfb
  444. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  445. (p3) FMA f80 = f42, f56, f80 // A3 * B1
  446. nop __LINE__
  447. }
  448. { .mfb
  449. nop __LINE__
  450. (p3) FMA_B f81 = f42, f57, f81 // A3 * B2
  451. nop __LINE__
  452. }
  453. ;;
  454. { .mfb
  455. (p4) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  456. (p3) FMA f96 = f44, f56, f96 // A5 * B1
  457. nop __LINE__
  458. }
  459. { .mfb
  460. nop __LINE__
  461. (p3) FMA_B f97 = f44, f57, f97 // A5 * B2
  462. nop __LINE__
  463. }
  464. ;;
  465. { .mfb
  466. (p4) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  467. (p3) FMA f112 = f46, f56, f112 // A7 * B1
  468. nop __LINE__
  469. }
  470. { .mfb
  471. nop __LINE__
  472. (p3) FMA_B f113 = f46, f57, f113 // A7 * B2
  473. nop __LINE__
  474. }
  475. ;;
  476. { .mfb
  477. nop __LINE__
  478. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  479. nop __LINE__
  480. }
  481. { .mfb
  482. nop __LINE__
  483. (p3) FMA_A f64 = f41, f57, f64 // A2 * B2
  484. nop __LINE__
  485. }
  486. ;;
  487. { .mfb
  488. nop __LINE__
  489. (p3) FMA f81 = f43, f56, f81 // A4 * B1
  490. nop __LINE__
  491. }
  492. { .mfb
  493. nop __LINE__
  494. (p3) FMA_A f80 = f43, f57, f80 // A4 * B2
  495. nop __LINE__
  496. }
  497. ;;
  498. { .mfb
  499. nop __LINE__
  500. (p3) FMA f97 = f45, f56, f97 // A6 * B1
  501. nop __LINE__
  502. }
  503. { .mfb
  504. nop __LINE__
  505. (p3) FMA_A f96 = f45, f57, f96 // A6 * B2
  506. nop __LINE__
  507. }
  508. ;;
  509. { .mfi
  510. nop __LINE__
  511. (p3) FMA f113 = f47, f56, f113 // A8 * B1
  512. adds L = -1, L
  513. }
  514. { .mfb
  515. nop __LINE__
  516. (p3) FMA_A f112 = f47, f57, f112 // A8 * B2
  517. br.cloop.sptk.few .L093
  518. }
  519. ;;
  520. .L098:
  521. #if defined(LN) || defined(RT)
  522. #ifdef LN
  523. adds r2 = -4, KK
  524. #else
  525. adds r2 = -1, KK
  526. #endif
  527. ;;
  528. shladd r2 = r2, ZBASE_SHIFT, r0
  529. ;;
  530. shladd AOFFSET = r2, 2, AORIG
  531. add BOFFSET = r2, B
  532. ;;
  533. #endif
  534. #if defined(LN) || defined(LT)
  535. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  536. ;;
  537. LDFPD f74, f75 = [BOFFSET], 2 * SIZE
  538. ;;
  539. LDFPD f88, f89 = [BOFFSET], 2 * SIZE
  540. ;;
  541. LDFPD f90, f91 = [BOFFSET]
  542. adds BOFFSET = -6 * SIZE, BOFFSET
  543. ;;
  544. FSUB f64 = f72, f64
  545. FSUB_A f65 = f73, f65
  546. FSUB f80 = f74, f80
  547. FSUB_A f81 = f75, f81
  548. FSUB f96 = f88, f96
  549. FSUB_A f97 = f89, f97
  550. FSUB f112 = f90, f112
  551. FSUB_A f113 = f91, f113
  552. ;;
  553. #else
  554. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  555. ;;
  556. LDFPD f74, f75 = [AOFFSET], 2 * SIZE
  557. ;;
  558. LDFPD f88, f89 = [AOFFSET], 2 * SIZE
  559. ;;
  560. LDFPD f90, f91 = [AOFFSET]
  561. adds AOFFSET = -6 * SIZE, AOFFSET
  562. ;;
  563. FSUB f64 = f72, f64
  564. FSUB f65 = f73, f65
  565. FSUB f80 = f74, f80
  566. FSUB f81 = f75, f81
  567. FSUB f96 = f88, f96
  568. FSUB f97 = f89, f97
  569. FSUB f112 = f90, f112
  570. FSUB f113 = f91, f113
  571. ;;
  572. #endif
  573. #ifdef LN
  574. adds AOFFSET = 30 * SIZE, AOFFSET
  575. ;;
  576. LDFPD f72, f73 = [AOFFSET]
  577. adds AOFFSET = - 2 * SIZE, AOFFSET
  578. ;;
  579. LDFPD f74, f75 = [AOFFSET]
  580. adds AOFFSET = - 2 * SIZE, AOFFSET
  581. ;;
  582. LDFPD f76, f77 = [AOFFSET]
  583. adds AOFFSET = - 2 * SIZE, AOFFSET
  584. ;;
  585. LDFPD f78, f79 = [AOFFSET]
  586. adds AOFFSET = - 4 * SIZE, AOFFSET
  587. ;;
  588. LDFPD f88, f89 = [AOFFSET]
  589. adds AOFFSET = - 2 * SIZE, AOFFSET
  590. ;;
  591. LDFPD f90, f91 = [AOFFSET]
  592. adds AOFFSET = - 2 * SIZE, AOFFSET
  593. ;;
  594. LDFPD f92, f93 = [AOFFSET]
  595. adds AOFFSET = - 6 * SIZE, AOFFSET
  596. ;;
  597. LDFPD f104, f105 = [AOFFSET]
  598. adds AOFFSET = - 2 * SIZE, AOFFSET
  599. ;;
  600. LDFPD f106, f107 = [AOFFSET]
  601. adds AOFFSET = - 8 * SIZE, AOFFSET
  602. ;;
  603. LDFPD f120, f121 = [AOFFSET]
  604. ;;
  605. FMPY f32 = f72, f112
  606. FMPY f33 = f73, f112
  607. ;;
  608. FMA_C f112 = f73, f113, f32
  609. FMA_D f113 = f72, f113, f33
  610. ;;
  611. FNMA f96 = f74, f112, f96
  612. FMA_A f97 = f75, f112, f97
  613. FNMA f80 = f76, f112, f80
  614. FMA_A f81 = f77, f112, f81
  615. FNMA f64 = f78, f112, f64
  616. FMA_A f65 = f79, f112, f65
  617. ;;
  618. FMA_B f96 = f75, f113, f96
  619. FNMA f97 = f74, f113, f97
  620. FMA_B f80 = f77, f113, f80
  621. FNMA f81 = f76, f113, f81
  622. FMA_B f64 = f79, f113, f64
  623. FNMA f65 = f78, f113, f65
  624. ;;
  625. FMPY f32 = f88, f96
  626. FMPY f33 = f89, f96
  627. ;;
  628. FMA_C f96 = f89, f97, f32
  629. FMA_D f97 = f88, f97, f33
  630. ;;
  631. FNMA f80 = f90, f96, f80
  632. FMA_A f81 = f91, f96, f81
  633. FNMA f64 = f92, f96, f64
  634. FMA_A f65 = f93, f96, f65
  635. ;;
  636. FMA_B f80 = f91, f97, f80
  637. FNMA f81 = f90, f97, f81
  638. FMA_B f64 = f93, f97, f64
  639. FNMA f65 = f92, f97, f65
  640. ;;
  641. FMPY f32 = f104, f80
  642. FMPY f33 = f105, f80
  643. ;;
  644. FMA_C f80 = f105, f81, f32
  645. FMA_D f81 = f104, f81, f33
  646. ;;
  647. FNMA f64 = f106, f80, f64
  648. FMA_A f65 = f107, f80, f65
  649. ;;
  650. FMA_B f64 = f107, f81, f64
  651. FNMA f65 = f106, f81, f65
  652. ;;
  653. FMPY f32 = f120, f64
  654. FMPY f33 = f121, f64
  655. ;;
  656. FMA_C f64 = f121, f65, f32
  657. FMA_D f65 = f120, f65, f33
  658. ;;
  659. #endif
  660. #ifdef LT
  661. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  662. ;;
  663. LDFPD f74, f75 = [AOFFSET], 2 * SIZE
  664. ;;
  665. LDFPD f76, f77 = [AOFFSET], 2 * SIZE
  666. ;;
  667. LDFPD f78, f79 = [AOFFSET]
  668. adds AOFFSET = 4 * SIZE, AOFFSET
  669. ;;
  670. LDFPD f90, f91 = [AOFFSET], 2 * SIZE
  671. ;;
  672. LDFPD f92, f93 = [AOFFSET], 2 * SIZE
  673. ;;
  674. LDFPD f94, f95 = [AOFFSET]
  675. adds AOFFSET = 6 * SIZE, AOFFSET
  676. ;;
  677. LDFPD f108, f109 = [AOFFSET], 2 * SIZE
  678. ;;
  679. LDFPD f110, f111 = [AOFFSET]
  680. adds AOFFSET = 8 * SIZE, AOFFSET
  681. ;;
  682. LDFPD f126, f127 = [AOFFSET]
  683. adds AOFFSET = - 30 * SIZE, AOFFSET
  684. ;;
  685. FMPY f32 = f72, f64
  686. FMPY f33 = f73, f64
  687. ;;
  688. FMA_C f64 = f73, f65, f32
  689. FMA_D f65 = f72, f65, f33
  690. ;;
  691. FNMA f80 = f74, f64, f80
  692. FMA_A f81 = f75, f64, f81
  693. FNMA f96 = f76, f64, f96
  694. FMA_A f97 = f77, f64, f97
  695. FNMA f112 = f78, f64, f112
  696. FMA_A f113 = f79, f64, f113
  697. ;;
  698. FMA_B f80 = f75, f65, f80
  699. FNMA f81 = f74, f65, f81
  700. FMA_B f96 = f77, f65, f96
  701. FNMA f97 = f76, f65, f97
  702. FMA_B f112 = f79, f65, f112
  703. FNMA f113 = f78, f65, f113
  704. ;;
  705. FMPY f32 = f90, f80
  706. FMPY f33 = f91, f80
  707. ;;
  708. FMA_C f80 = f91, f81, f32
  709. FMA_D f81 = f90, f81, f33
  710. ;;
  711. FNMA f96 = f92, f80, f96
  712. FMA_A f97 = f93, f80, f97
  713. FNMA f112 = f94, f80, f112
  714. FMA_A f113 = f95, f80, f113
  715. ;;
  716. FMA_B f96 = f93, f81, f96
  717. FNMA f97 = f92, f81, f97
  718. FMA_B f112 = f95, f81, f112
  719. FNMA f113 = f94, f81, f113
  720. ;;
  721. FMPY f32 = f108, f96
  722. FMPY f33 = f109, f96
  723. ;;
  724. FMA_C f96 = f109, f97, f32
  725. FMA_D f97 = f108, f97, f33
  726. ;;
  727. FNMA f112 = f110, f96, f112
  728. FMA_A f113 = f111, f96, f113
  729. ;;
  730. FMA_B f112 = f111, f97, f112
  731. FNMA f113 = f110, f97, f113
  732. ;;
  733. FMPY f32 = f126, f112
  734. FMPY f33 = f127, f112
  735. ;;
  736. FMA_C f112 = f127, f113, f32
  737. FMA_D f113 = f126, f113, f33
  738. ;;
  739. #endif
  740. #ifdef RN
  741. LDFPD f72, f73 = [BOFFSET]
  742. ;;
  743. FMPY f32 = f72, f64
  744. FMPY f33 = f73, f64
  745. FMPY f34 = f72, f80
  746. FMPY f35 = f73, f80
  747. FMPY f36 = f72, f96
  748. FMPY f37 = f73, f96
  749. FMPY f38 = f72, f112
  750. FMPY f39 = f73, f112
  751. ;;
  752. FMA_C f64 = f73, f65, f32
  753. FMA_D f65 = f72, f65, f33
  754. FMA_C f80 = f73, f81, f34
  755. FMA_D f81 = f72, f81, f35
  756. FMA_C f96 = f73, f97, f36
  757. FMA_D f97 = f72, f97, f37
  758. FMA_C f112 = f73, f113, f38
  759. FMA_D f113 = f72, f113, f39
  760. ;;
  761. #endif
  762. #ifdef RT
  763. LDFPD f72, f73 = [BOFFSET]
  764. ;;
  765. FMPY f32 = f72, f64
  766. FMPY f33 = f73, f64
  767. FMPY f34 = f72, f80
  768. FMPY f35 = f73, f80
  769. FMPY f36 = f72, f96
  770. FMPY f37 = f73, f96
  771. FMPY f38 = f72, f112
  772. FMPY f39 = f73, f112
  773. ;;
  774. FMA_C f64 = f73, f65, f32
  775. FMA_D f65 = f72, f65, f33
  776. FMA_C f80 = f73, f81, f34
  777. FMA_D f81 = f72, f81, f35
  778. FMA_C f96 = f73, f97, f36
  779. FMA_D f97 = f72, f97, f37
  780. FMA_C f112 = f73, f113, f38
  781. FMA_D f113 = f72, f113, f39
  782. ;;
  783. #endif
  784. #if defined(LN) || defined(LT)
  785. adds BOFFSET2 = 4 * SIZE, BOFFSET
  786. ;;
  787. STFD [BOFFSET] = f64, SIZE
  788. STFD [BOFFSET2] = f96, SIZE
  789. ;;
  790. STFD [BOFFSET] = f65, SIZE
  791. STFD [BOFFSET2] = f97, SIZE
  792. ;;
  793. STFD [BOFFSET] = f80, SIZE
  794. STFD [BOFFSET2] = f112, SIZE
  795. ;;
  796. STFD [BOFFSET] = f81, 5 * SIZE
  797. STFD [BOFFSET2] = f113, 5 * SIZE
  798. ;;
  799. adds BOFFSET = - 8 * SIZE, BOFFSET
  800. ;;
  801. #else
  802. adds AOFFSET2 = 4 * SIZE, AOFFSET
  803. ;;
  804. STFD [AOFFSET] = f64, SIZE
  805. STFD [AOFFSET2] = f96, SIZE
  806. ;;
  807. STFD [AOFFSET] = f65, SIZE
  808. STFD [AOFFSET2] = f97, SIZE
  809. ;;
  810. STFD [AOFFSET] = f80, SIZE
  811. STFD [AOFFSET2] = f112, SIZE
  812. ;;
  813. STFD [AOFFSET] = f81, 5 * SIZE
  814. STFD [AOFFSET2] = f113, 5 * SIZE
  815. ;;
  816. adds AOFFSET = - 8 * SIZE, AOFFSET
  817. ;;
  818. #endif
  819. #ifdef LN
  820. adds C1 = -8 * SIZE, C1
  821. adds C5 = -8 * SIZE, C5
  822. #endif
  823. ;;
  824. STFD [C1 ] = f64, SIZE
  825. STFD [C5 ] = f96, SIZE
  826. ;;
  827. STFD [C1 ] = f65, SIZE
  828. STFD [C5 ] = f97, SIZE
  829. ;;
  830. STFD [C1 ] = f80, SIZE
  831. STFD [C5 ] = f112, SIZE
  832. ;;
  833. STFD [C1 ] = f81, 5 * SIZE
  834. STFD [C5 ] = f113, 5 * SIZE
  835. ;;
  836. mov f64 = f0
  837. mov f65 = f0
  838. mov f80 = f0
  839. mov f81 = f0
  840. mov f96 = f0
  841. mov f97 = f0
  842. mov f112 = f0
  843. mov f113 = f0
  844. ;;
  845. #ifdef LN
  846. adds C1 = -8 * SIZE, C1
  847. adds C5 = -8 * SIZE, C5
  848. #endif
  849. ;;
  850. cmp.ne p6, p0 = 1, I
  851. ;;
  852. adds I = -1, I
  853. ;;
  854. shladd r2 = K, ZBASE_SHIFT, r0
  855. ;;
  856. sub L = K, KK
  857. ;;
  858. #ifdef RT
  859. shladd AORIG = r2, 2, AORIG
  860. #endif
  861. ;;
  862. #if defined(LT) || defined(RN)
  863. shladd L = L, ZBASE_SHIFT, r0
  864. ;;
  865. shladd AOFFSET = L, 2, AOFFSET
  866. add BOFFSET = L, BOFFSET
  867. #endif
  868. ;;
  869. #ifdef LT
  870. adds KK = 4, KK
  871. #elif defined LN
  872. adds KK = -4, KK
  873. #else
  874. nop __LINE__
  875. #endif
  876. ;;
  877. #if defined(LT) || defined(RN)
  878. mov L = KK
  879. #else
  880. sub L = K, KK
  881. #endif
  882. ;;
  883. (p6) br.cond.dptk .L092
  884. ;;
  885. .align 16
  886. .L100:
  887. { .mib
  888. #if defined(LT) || defined(RN)
  889. mov L = KK
  890. #else
  891. sub L = K, KK
  892. #endif
  893. tbit.z p6, p7 = M, 1
  894. (p6) br.cond.dptk .L110
  895. }
  896. ;;
  897. { .mmi
  898. cmp.ne p7, p0 = r0, L
  899. adds BOFFSET = 0 * SIZE, B
  900. shl r2 = K, 1 + ZBASE_SHIFT
  901. }
  902. { .mmi
  903. shladd r3 = KK, ZBASE_SHIFT, r0
  904. nop __LINE__
  905. nop __LINE__
  906. }
  907. ;;
  908. #if defined(LT) || defined(RN)
  909. { .mfb
  910. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  911. mov f66 = f0
  912. nop __LINE__
  913. }
  914. { .mmf
  915. nop __LINE__
  916. nop __LINE__
  917. mov f67 = f0
  918. }
  919. ;;
  920. #else
  921. { .mfi
  922. add BOFFSET = r3, B
  923. mov f66 = f0
  924. #ifdef LN
  925. sub AORIG = AORIG, r2
  926. #else
  927. nop __LINE__
  928. #endif
  929. }
  930. ;;
  931. { .mfi
  932. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  933. mov f67 = f0
  934. shladd AOFFSET = r3, 1, AORIG
  935. }
  936. ;;
  937. #endif
  938. ;;
  939. adds L = 1, L
  940. ;;
  941. { .mii
  942. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  943. tbit.z p12, p0 = L, 0
  944. shr L = L, 1
  945. }
  946. ;;
  947. { .mmi
  948. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  949. nop __LINE__
  950. adds L = -1, L
  951. }
  952. ;;
  953. { .mmi
  954. adds PREA = (PREFETCHSIZE + 0) * SIZE, AOFFSET
  955. cmp.eq p3, p0 = r0, r0
  956. mov ar.lc = L
  957. }
  958. ;;
  959. cmp.eq p6, p0 = -1, L
  960. (p6) br.cond.dpnt .L108
  961. ;;
  962. .align 16
  963. .L102:
  964. { .mfi
  965. lfetch.nt1 [PREA], 8 * SIZE
  966. FMA f64 = f32, f48, f64 // A1 * B1
  967. cmp.ne p4, p5 = 0, L
  968. }
  969. { .mfi
  970. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  971. FMA f80 = f32, f49, f80 // A1 * B2
  972. (p12) cmp.ne p3, p0 = 0, L
  973. }
  974. ;;
  975. { .mfb
  976. lfetch.nt1 [PREB], 4 * SIZE
  977. FMA f65 = f33, f48, f65 // A2 * B1
  978. nop __LINE__
  979. }
  980. { .mfb
  981. nop __LINE__
  982. FMA f81 = f33, f49, f81 // A2 * B2
  983. nop __LINE__
  984. }
  985. ;;
  986. { .mfb
  987. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  988. FMA f96 = f34, f48, f96 // A3 * B1
  989. nop __LINE__
  990. }
  991. { .mfb
  992. nop __LINE__
  993. FMA f112 = f34, f49, f112 // A3 * B2
  994. nop __LINE__
  995. }
  996. ;;
  997. { .mfb
  998. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  999. FMA f97 = f35, f48, f97 // A4 * B1
  1000. nop __LINE__
  1001. }
  1002. { .mfb
  1003. nop __LINE__
  1004. FMA f113 = f35, f49, f113 // A4 * B2
  1005. nop __LINE__
  1006. }
  1007. ;;
  1008. { .mfb
  1009. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  1010. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  1011. nop __LINE__
  1012. }
  1013. { .mfb
  1014. nop __LINE__
  1015. (p3) FMA f80 = f40, f57, f80 // A1 * B2
  1016. nop __LINE__
  1017. }
  1018. ;;
  1019. { .mfb
  1020. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  1021. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  1022. nop __LINE__
  1023. }
  1024. { .mfb
  1025. nop __LINE__
  1026. (p3) FMA f81 = f41, f57, f81 // A2 * B2
  1027. nop __LINE__
  1028. }
  1029. ;;
  1030. { .mfb
  1031. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  1032. (p3) FMA f96 = f42, f56, f96 // A3 * B1
  1033. nop __LINE__
  1034. }
  1035. { .mfb
  1036. nop __LINE__
  1037. (p3) FMA f112 = f42, f57, f112 // A3 * B2
  1038. nop __LINE__
  1039. }
  1040. ;;
  1041. { .mfi
  1042. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  1043. (p3) FMA f97 = f43, f56, f97 // A4 * B1
  1044. adds L = -1, L
  1045. }
  1046. { .mfb
  1047. nop __LINE__
  1048. (p3) FMA f113 = f43, f57, f113 // A4 * B2
  1049. br.cloop.sptk.few .L102
  1050. }
  1051. ;;
  1052. { .mfb
  1053. nop __LINE__
  1054. FCALC_A f64 = f64, f81
  1055. nop __LINE__
  1056. }
  1057. { .mfb
  1058. nop __LINE__
  1059. FCALC_B f65 = f65, f80
  1060. nop __LINE__
  1061. }
  1062. { .mfb
  1063. nop __LINE__
  1064. FCALC_A f96 = f96, f113
  1065. nop __LINE__
  1066. }
  1067. { .mfb
  1068. nop __LINE__
  1069. FCALC_B f97 = f97, f112
  1070. nop __LINE__
  1071. }
  1072. ;;
  1073. .L108:
  1074. #if defined(LN) || defined(RT)
  1075. #ifdef LN
  1076. adds r2 = -2, KK
  1077. #else
  1078. adds r2 = -1, KK
  1079. #endif
  1080. ;;
  1081. shladd r2 = r2, ZBASE_SHIFT, r0
  1082. ;;
  1083. shladd AOFFSET = r2, 1, AORIG
  1084. add BOFFSET = r2, B
  1085. ;;
  1086. #endif
  1087. #if defined(LN) || defined(LT)
  1088. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  1089. ;;
  1090. LDFPD f88, f89 = [BOFFSET]
  1091. adds BOFFSET = -2 * SIZE, BOFFSET
  1092. ;;
  1093. FSUB f64 = f72, f64
  1094. FSUB_A f65 = f73, f65
  1095. FSUB f96 = f88, f96
  1096. FSUB_A f97 = f89, f97
  1097. ;;
  1098. #else
  1099. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  1100. ;;
  1101. LDFPD f88, f89 = [AOFFSET]
  1102. adds AOFFSET = -2 * SIZE, AOFFSET
  1103. ;;
  1104. FSUB f64 = f72, f64
  1105. FSUB f65 = f73, f65
  1106. FSUB f96 = f88, f96
  1107. FSUB f97 = f89, f97
  1108. ;;
  1109. #endif
  1110. #ifdef LN
  1111. adds AOFFSET = 6 * SIZE, AOFFSET
  1112. ;;
  1113. LDFPD f104, f105 = [AOFFSET]
  1114. adds AOFFSET = - 2 * SIZE, AOFFSET
  1115. ;;
  1116. LDFPD f106, f107 = [AOFFSET]
  1117. adds AOFFSET = - 4 * SIZE, AOFFSET
  1118. ;;
  1119. LDFPD f120, f121 = [AOFFSET]
  1120. ;;
  1121. FMPY f32 = f104, f96
  1122. FMPY f33 = f105, f96
  1123. ;;
  1124. FMA_C f96 = f105, f97, f32
  1125. FMA_D f97 = f104, f97, f33
  1126. ;;
  1127. FNMA f64 = f106, f96, f64
  1128. FMA_A f65 = f107, f96, f65
  1129. ;;
  1130. FMA_B f64 = f107, f97, f64
  1131. FNMA f65 = f106, f97, f65
  1132. ;;
  1133. FMPY f32 = f120, f64
  1134. FMPY f33 = f121, f64
  1135. ;;
  1136. FMA_C f64 = f121, f65, f32
  1137. FMA_D f65 = f120, f65, f33
  1138. ;;
  1139. #endif
  1140. #ifdef LT
  1141. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  1142. ;;
  1143. LDFPD f74, f75 = [AOFFSET]
  1144. adds AOFFSET = 4 * SIZE, AOFFSET
  1145. ;;
  1146. LDFPD f90, f91 = [AOFFSET]
  1147. adds AOFFSET = - 6 * SIZE, AOFFSET
  1148. ;;
  1149. FMPY f32 = f72, f64
  1150. FMPY f33 = f73, f64
  1151. ;;
  1152. FMA_C f64 = f73, f65, f32
  1153. FMA_D f65 = f72, f65, f33
  1154. ;;
  1155. FNMA f96 = f74, f64, f96
  1156. FMA_A f97 = f75, f64, f97
  1157. ;;
  1158. FMA_B f96 = f75, f65, f96
  1159. FNMA f97 = f74, f65, f97
  1160. ;;
  1161. FMPY f32 = f90, f96
  1162. FMPY f33 = f91, f96
  1163. ;;
  1164. FMA_C f96 = f91, f97, f32
  1165. FMA_D f97 = f90, f97, f33
  1166. ;;
  1167. #endif
  1168. #ifdef RN
  1169. LDFPD f72, f73 = [BOFFSET]
  1170. ;;
  1171. FMPY f32 = f72, f64
  1172. FMPY f33 = f73, f64
  1173. FMPY f36 = f72, f96
  1174. FMPY f37 = f73, f96
  1175. ;;
  1176. FMA_C f64 = f73, f65, f32
  1177. FMA_D f65 = f72, f65, f33
  1178. FMA_C f96 = f73, f97, f36
  1179. FMA_D f97 = f72, f97, f37
  1180. ;;
  1181. #endif
  1182. #ifdef RT
  1183. LDFPD f72, f73 = [BOFFSET]
  1184. ;;
  1185. FMPY f32 = f72, f64
  1186. FMPY f33 = f73, f64
  1187. FMPY f36 = f72, f96
  1188. FMPY f37 = f73, f96
  1189. ;;
  1190. FMA_C f64 = f73, f65, f32
  1191. FMA_D f65 = f72, f65, f33
  1192. FMA_C f96 = f73, f97, f36
  1193. FMA_D f97 = f72, f97, f37
  1194. ;;
  1195. #endif
  1196. #if defined(LN) || defined(LT)
  1197. STFD [BOFFSET] = f64, SIZE
  1198. ;;
  1199. STFD [BOFFSET] = f65, SIZE
  1200. ;;
  1201. STFD [BOFFSET] = f96, SIZE
  1202. ;;
  1203. STFD [BOFFSET] = f97, SIZE
  1204. ;;
  1205. adds BOFFSET = - 4 * SIZE, BOFFSET
  1206. ;;
  1207. #else
  1208. adds AOFFSET2 = 4 * SIZE, AOFFSET
  1209. ;;
  1210. STFD [AOFFSET] = f64, SIZE
  1211. ;;
  1212. STFD [AOFFSET] = f65, SIZE
  1213. ;;
  1214. STFD [AOFFSET] = f96, SIZE
  1215. ;;
  1216. STFD [AOFFSET] = f97, SIZE
  1217. ;;
  1218. adds AOFFSET = - 4 * SIZE, AOFFSET
  1219. ;;
  1220. #endif
  1221. #ifdef LN
  1222. adds C1 = -4 * SIZE, C1
  1223. adds C5 = -4 * SIZE, C5
  1224. #endif
  1225. ;;
  1226. STFD [C1 ] = f64, SIZE
  1227. ;;
  1228. STFD [C1 ] = f65, SIZE
  1229. ;;
  1230. STFD [C1 ] = f96, SIZE
  1231. ;;
  1232. STFD [C1 ] = f97, SIZE
  1233. ;;
  1234. mov f64 = f0
  1235. mov f65 = f0
  1236. mov f80 = f0
  1237. mov f81 = f0
  1238. mov f96 = f0
  1239. mov f97 = f0
  1240. mov f112 = f0
  1241. mov f113 = f0
  1242. ;;
  1243. #ifdef LN
  1244. adds C1 = -4 * SIZE, C1
  1245. adds C5 = -4 * SIZE, C5
  1246. #endif
  1247. ;;
  1248. cmp.ne p6, p0 = 1, I
  1249. ;;
  1250. adds I = -1, I
  1251. ;;
  1252. shladd r2 = K, ZBASE_SHIFT, r0
  1253. ;;
  1254. sub L = K, KK
  1255. ;;
  1256. #ifdef RT
  1257. shladd AORIG = r2, 1, AORIG
  1258. #endif
  1259. ;;
  1260. #if defined(LT) || defined(RN)
  1261. shladd L = L, ZBASE_SHIFT, r0
  1262. ;;
  1263. shladd AOFFSET = L, 1, AOFFSET
  1264. add BOFFSET = L, BOFFSET
  1265. #endif
  1266. ;;
  1267. #ifdef LT
  1268. adds KK = 2, KK
  1269. #elif defined LN
  1270. adds KK = -2, KK
  1271. #else
  1272. nop __LINE__
  1273. #endif
  1274. ;;
  1275. #if defined(LT) || defined(RN)
  1276. mov L = KK
  1277. #else
  1278. sub L = K, KK
  1279. #endif
  1280. ;;
  1281. .align 16
  1282. .L110:
  1283. { .mib
  1284. #if defined(LT) || defined(RN)
  1285. mov L = KK
  1286. #else
  1287. sub L = K, KK
  1288. #endif
  1289. tbit.z p6, p7 = M, 0
  1290. (p6) br.cond.dptk .L119
  1291. }
  1292. ;;
  1293. { .mmi
  1294. cmp.ne p7, p0 = r0, L
  1295. adds BOFFSET = 0 * SIZE, B
  1296. shl r2 = K, ZBASE_SHIFT
  1297. }
  1298. { .mmi
  1299. shladd r3 = KK, ZBASE_SHIFT, r0
  1300. nop __LINE__
  1301. nop __LINE__
  1302. }
  1303. ;;
  1304. #if defined(LT) || defined(RN)
  1305. { .mfb
  1306. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  1307. mov f66 = f0
  1308. nop __LINE__
  1309. }
  1310. { .mmf
  1311. nop __LINE__
  1312. nop __LINE__
  1313. mov f67 = f0
  1314. }
  1315. ;;
  1316. #else
  1317. { .mfi
  1318. add BOFFSET = r3, B
  1319. mov f66 = f0
  1320. #ifdef LN
  1321. sub AORIG = AORIG, r2
  1322. #else
  1323. nop __LINE__
  1324. #endif
  1325. }
  1326. ;;
  1327. { .mfi
  1328. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  1329. mov f67 = f0
  1330. add AOFFSET = r3, AORIG
  1331. }
  1332. ;;
  1333. #endif
  1334. ;;
  1335. adds L = 1, L
  1336. ;;
  1337. { .mii
  1338. nop __LINE__
  1339. tbit.z p12, p0 = L, 0
  1340. shr L = L, 1
  1341. }
  1342. ;;
  1343. { .mmi
  1344. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  1345. cmp.eq p3, p0 = r0, r0
  1346. adds L = -1, L
  1347. }
  1348. ;;
  1349. { .mmi
  1350. adds PREA = (PREFETCHSIZE + 0) * SIZE, AOFFSET
  1351. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  1352. mov ar.lc = L
  1353. }
  1354. ;;
  1355. cmp.eq p6, p0 = -1, L
  1356. (p6) br.cond.dpnt .L118
  1357. ;;
  1358. .align 16
  1359. .L112:
  1360. { .mfi
  1361. lfetch.nt1 [PREA], 4 * SIZE
  1362. FMA f64 = f32, f48, f64 // A1 * B1
  1363. cmp.ne p4, p5 = 0, L
  1364. }
  1365. { .mfi
  1366. lfetch.nt1 [PREB], 4 * SIZE
  1367. FMA f80 = f32, f49, f80 // A1 * B2
  1368. (p12) cmp.ne p3, p0 = 0, L
  1369. }
  1370. ;;
  1371. { .mmf
  1372. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  1373. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  1374. FMA f65 = f33, f48, f65 // A2 * B1
  1375. }
  1376. { .mmf
  1377. nop __LINE__
  1378. nop __LINE__
  1379. FMA f81 = f33, f49, f81 // A2 * B2
  1380. }
  1381. ;;
  1382. { .mfb
  1383. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  1384. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  1385. nop __LINE__
  1386. }
  1387. { .mfb
  1388. nop __LINE__
  1389. (p3) FMA f80 = f40, f57, f80 // A1 * B2
  1390. nop __LINE__
  1391. }
  1392. ;;
  1393. { .mfi
  1394. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  1395. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  1396. adds L = -1, L
  1397. }
  1398. { .mfb
  1399. (p3) FMA f81 = f41, f57, f81 // A2 * B2
  1400. br.cloop.sptk.few .L112
  1401. }
  1402. ;;
  1403. { .mfb
  1404. nop __LINE__
  1405. FCALC_A f64 = f64, f81
  1406. nop __LINE__
  1407. }
  1408. { .mfb
  1409. nop __LINE__
  1410. FCALC_B f65 = f65, f80
  1411. nop __LINE__
  1412. }
  1413. ;;
  1414. .L118:
  1415. #if defined(LN) || defined(RT)
  1416. #ifdef LN
  1417. adds r2 = -1, KK
  1418. #else
  1419. adds r2 = -1, KK
  1420. #endif
  1421. ;;
  1422. shladd r2 = r2, ZBASE_SHIFT, r0
  1423. ;;
  1424. add AOFFSET = r2, AORIG
  1425. add BOFFSET = r2, B
  1426. ;;
  1427. #endif
  1428. #if defined(LN) || defined(LT)
  1429. LDFPD f72, f73 = [BOFFSET]
  1430. ;;
  1431. FSUB f64 = f72, f64
  1432. FSUB_A f65 = f73, f65
  1433. ;;
  1434. #else
  1435. LDFPD f72, f73 = [AOFFSET]
  1436. ;;
  1437. FSUB f64 = f72, f64
  1438. FSUB f65 = f73, f65
  1439. ;;
  1440. #endif
  1441. #ifdef LN
  1442. LDFPD f120, f121 = [AOFFSET]
  1443. ;;
  1444. FMPY f32 = f120, f64
  1445. FMPY f33 = f121, f64
  1446. ;;
  1447. FMA_C f64 = f121, f65, f32
  1448. FMA_D f65 = f120, f65, f33
  1449. ;;
  1450. #endif
  1451. #ifdef LT
  1452. LDFPD f72, f73 = [AOFFSET]
  1453. ;;
  1454. FMPY f32 = f72, f64
  1455. FMPY f33 = f73, f64
  1456. ;;
  1457. FMA_C f64 = f73, f65, f32
  1458. FMA_D f65 = f72, f65, f33
  1459. ;;
  1460. #endif
  1461. #ifdef RN
  1462. LDFPD f72, f73 = [BOFFSET]
  1463. ;;
  1464. FMPY f32 = f72, f64
  1465. FMPY f33 = f73, f64
  1466. ;;
  1467. FMA_C f64 = f73, f65, f32
  1468. FMA_D f65 = f72, f65, f33
  1469. ;;
  1470. #endif
  1471. #ifdef RT
  1472. LDFPD f72, f73 = [BOFFSET]
  1473. ;;
  1474. FMPY f32 = f72, f64
  1475. FMPY f33 = f73, f64
  1476. ;;
  1477. FMA_C f64 = f73, f65, f32
  1478. FMA_D f65 = f72, f65, f33
  1479. ;;
  1480. #endif
  1481. #if defined(LN) || defined(LT)
  1482. STFD [BOFFSET] = f64, SIZE
  1483. ;;
  1484. STFD [BOFFSET] = f65, SIZE
  1485. ;;
  1486. adds BOFFSET = - 2 * SIZE, BOFFSET
  1487. ;;
  1488. #else
  1489. STFD [AOFFSET] = f64, SIZE
  1490. ;;
  1491. STFD [AOFFSET] = f65, SIZE
  1492. ;;
  1493. adds AOFFSET = - 2 * SIZE, AOFFSET
  1494. ;;
  1495. #endif
  1496. #ifdef LN
  1497. adds C1 = -2 * SIZE, C1
  1498. #endif
  1499. ;;
  1500. STFD [C1 ] = f64, SIZE
  1501. ;;
  1502. STFD [C1 ] = f65, SIZE
  1503. ;;
  1504. mov f64 = f0
  1505. mov f65 = f0
  1506. mov f80 = f0
  1507. mov f81 = f0
  1508. ;;
  1509. #ifdef LN
  1510. adds C1 = -2 * SIZE, C1
  1511. #endif
  1512. ;;
  1513. cmp.ne p6, p0 = 1, I
  1514. ;;
  1515. adds I = -1, I
  1516. ;;
  1517. shladd r2 = K, ZBASE_SHIFT, r0
  1518. ;;
  1519. sub L = K, KK
  1520. ;;
  1521. #ifdef RT
  1522. add AORIG = r2, AORIG
  1523. #endif
  1524. ;;
  1525. #if defined(LT) || defined(RN)
  1526. shladd L = L, ZBASE_SHIFT, r0
  1527. ;;
  1528. add AOFFSET = L, AOFFSET
  1529. add BOFFSET = L, BOFFSET
  1530. #endif
  1531. ;;
  1532. #ifdef LT
  1533. adds KK = 1, KK
  1534. #elif defined LN
  1535. adds KK = -1, KK
  1536. #else
  1537. nop __LINE__
  1538. #endif
  1539. ;;
  1540. #if defined(LT) || defined(RN)
  1541. mov L = KK
  1542. #else
  1543. sub L = K, KK
  1544. #endif
  1545. .align 16
  1546. .L119:
  1547. #ifdef LN
  1548. shladd KK8 = K, ZBASE_SHIFT, r0
  1549. ;;
  1550. add B = KK8, B
  1551. #endif
  1552. #if defined(LT) || defined(RN)
  1553. mov B = BOFFSET
  1554. #endif
  1555. #ifdef RN
  1556. adds KK = 1, KK
  1557. #endif
  1558. #ifdef RT
  1559. adds KK = -1, KK
  1560. #endif
  1561. ;;
  1562. { .mmi
  1563. mov AOFFSET = A
  1564. nop __LINE__
  1565. }
  1566. ;;
  1567. .align 16
  1568. .L050:
  1569. { .mmi
  1570. shr I = M, 2
  1571. }
  1572. { .mib
  1573. tbit.z p6, p0 = N, 1
  1574. (p6) br.cond.dpnt .L010
  1575. }
  1576. ;;
  1577. #ifdef RT
  1578. { .mmi
  1579. shladd r3 = LDC, 1, r0
  1580. nop __LINE__
  1581. shl r2 = K, 1 + ZBASE_SHIFT
  1582. }
  1583. ;;
  1584. { .mmi
  1585. sub B = B, r2
  1586. sub C = C, r3
  1587. nop __LINE__
  1588. }
  1589. ;;
  1590. #endif
  1591. mov C1 = C
  1592. add C2 = LDC, C
  1593. ;;
  1594. #ifdef LN
  1595. add KK = M, OFFSET
  1596. #elif defined LT
  1597. mov KK = OFFSET
  1598. #else
  1599. nop __LINE__
  1600. #endif
  1601. ;;
  1602. #if defined(LN) || defined(RT)
  1603. mov AORIG = A
  1604. #else
  1605. mov AOFFSET = A
  1606. #endif
  1607. ;;
  1608. #if defined(LT) || defined(RN)
  1609. mov L = KK
  1610. #else
  1611. sub L = K, KK
  1612. #endif
  1613. ;;
  1614. { .mib
  1615. cmp.eq p6, p7 = 0, I
  1616. #ifndef RT
  1617. shladd C = LDC, 1, C
  1618. #else
  1619. nop __LINE__
  1620. #endif
  1621. (p6) br.cond.dpnt .L060
  1622. }
  1623. ;;
  1624. .align 16
  1625. .L052:
  1626. { .mmi
  1627. cmp.ne p7, p0 = r0, L
  1628. adds BOFFSET = 0 * SIZE, B
  1629. shl r2 = K, 2 + ZBASE_SHIFT
  1630. }
  1631. { .mmi
  1632. shladd r3 = KK, ZBASE_SHIFT, r0
  1633. nop __LINE__
  1634. nop __LINE__
  1635. }
  1636. ;;
  1637. #if defined(LT) || defined(RN)
  1638. { .mfb
  1639. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  1640. mov f66 = f0
  1641. nop __LINE__
  1642. }
  1643. { .mmf
  1644. nop __LINE__
  1645. nop __LINE__
  1646. mov f67 = f0
  1647. }
  1648. ;;
  1649. #else
  1650. { .mfi
  1651. shladd BOFFSET = r3, 1, B
  1652. mov f66 = f0
  1653. #ifdef LN
  1654. sub AORIG = AORIG, r2
  1655. #else
  1656. nop __LINE__
  1657. #endif
  1658. }
  1659. ;;
  1660. { .mfi
  1661. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  1662. mov f67 = f0
  1663. shladd AOFFSET = r3, 2, AORIG
  1664. }
  1665. ;;
  1666. #endif
  1667. { .mfi
  1668. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  1669. mov f82 = f0
  1670. adds PREC = CPREFETCHSIZE * SIZE, C1
  1671. }
  1672. { .mfi
  1673. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  1674. mov f83 = f0
  1675. nop __LINE__
  1676. }
  1677. ;;
  1678. { .mfi
  1679. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  1680. mov f98 = f0
  1681. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  1682. }
  1683. { .mfi
  1684. cmp.eq p3, p0 = r0, r0
  1685. mov f99 = f0
  1686. adds L = 1, L
  1687. }
  1688. ;;
  1689. { .mfi
  1690. (p7) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  1691. mov f114 = f0
  1692. tbit.z p12, p0 = L, 0
  1693. }
  1694. { .mfi
  1695. CPREFETCH [PREC], LDC
  1696. mov f115 = f0
  1697. shr L = L, 1
  1698. }
  1699. ;;
  1700. { .mmi
  1701. (p7) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  1702. adds C5 = 4 * SIZE, C1
  1703. adds L = -1, L
  1704. }
  1705. ;;
  1706. { .mmi
  1707. CPREFETCH [PREC], LDC
  1708. adds C6 = 4 * SIZE, C2
  1709. mov ar.lc = L
  1710. }
  1711. ;;
  1712. cmp.eq p6, p0 = -1, L
  1713. (p6) br.cond.dpnt .L058
  1714. ;;
  1715. .align 16
  1716. .L053:
  1717. { .mfb
  1718. lfetch.nt1 [PREA], 16 * SIZE
  1719. FMA f64 = f32, f48, f64 // A1 * B1
  1720. nop __LINE__
  1721. }
  1722. { .mfi
  1723. nop __LINE__
  1724. FMA_B f65 = f32, f49, f65 // A1 * B2
  1725. (p12) cmp.ne p3, p0 = 0, L
  1726. }
  1727. ;;
  1728. { .mfi
  1729. lfetch.nt1 [PREB], 8 * SIZE
  1730. FMA f80 = f32, f50, f80 // A1 * B3
  1731. cmp.ne p4, p5 = 0, L
  1732. }
  1733. { .mfi
  1734. nop __LINE__
  1735. FMA_B f81 = f32, f51, f81 // A1 * B4
  1736. nop __LINE__
  1737. }
  1738. ;;
  1739. { .mfi
  1740. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  1741. FMA f96 = f34, f48, f96 // A3 * B1
  1742. nop __LINE__
  1743. }
  1744. { .mfi
  1745. FMA_B f97 = f34, f49, f97 // A3 * B2
  1746. nop __LINE__
  1747. }
  1748. ;;
  1749. { .mfi
  1750. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  1751. FMA f112 = f34, f50, f112 // A3 * B3
  1752. nop __LINE__
  1753. }
  1754. { .mfb
  1755. nop __LINE__
  1756. FMA_B f113 = f34, f51, f113 // A3 * B4
  1757. nop __LINE__
  1758. }
  1759. ;;
  1760. { .mfb
  1761. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  1762. FMA f65 = f33, f48, f65 // A2 * B1
  1763. nop __LINE__
  1764. }
  1765. { .mfb
  1766. nop __LINE__
  1767. FMA_A f64 = f33, f49, f64 // A2 * B2
  1768. nop __LINE__
  1769. }
  1770. ;;
  1771. { .mfb
  1772. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  1773. FMA f81 = f33, f50, f81 // A2 * B3
  1774. nop __LINE__
  1775. }
  1776. { .mfb
  1777. nop __LINE__
  1778. FMA_A f80 = f33, f51, f80 // A2 * B4
  1779. nop __LINE__
  1780. }
  1781. ;;
  1782. { .mfb
  1783. (p3) LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  1784. FMA f97 = f35, f48, f97 // A4 * B1
  1785. nop __LINE__
  1786. }
  1787. { .mfb
  1788. nop __LINE__
  1789. FMA_A f96 = f35, f49, f96 // A4 * B2
  1790. nop __LINE__
  1791. }
  1792. ;;
  1793. { .mfb
  1794. (p3) LDFPD f46, f47 = [AOFFSET], 2 * SIZE
  1795. FMA f113 = f35, f50, f113 // A4 * B3
  1796. nop __LINE__
  1797. }
  1798. { .mfb
  1799. nop __LINE__
  1800. FMA_A f112 = f35, f51, f112 // A4 * B4
  1801. nop __LINE__
  1802. }
  1803. ;;
  1804. { .mfb
  1805. nop __LINE__
  1806. FMA f66 = f36, f48, f66 // A5 * B1
  1807. nop __LINE__
  1808. }
  1809. { .mfb
  1810. nop __LINE__
  1811. FMA_B f67 = f36, f49, f67 // A5 * B2
  1812. nop __LINE__
  1813. }
  1814. ;;
  1815. { .mfb
  1816. nop __LINE__
  1817. FMA f82 = f36, f50, f82 // A5 * B3
  1818. nop __LINE__
  1819. }
  1820. { .mfb
  1821. nop __LINE__
  1822. FMA_B f83 = f36, f51, f83 // A5 * B4
  1823. nop __LINE__
  1824. }
  1825. ;;
  1826. { .mfb
  1827. nop __LINE__
  1828. FMA f98 = f38, f48, f98 // A7 * B1
  1829. nop __LINE__
  1830. }
  1831. { .mfb
  1832. nop __LINE__
  1833. FMA_B f99 = f38, f49, f99 // A7 * B2
  1834. nop __LINE__
  1835. }
  1836. ;;
  1837. { .mfb
  1838. nop __LINE__
  1839. FMA f114 = f38, f50, f114 // A7 * B3
  1840. nop __LINE__
  1841. }
  1842. { .mfb
  1843. nop __LINE__
  1844. FMA_B f115 = f38, f51, f115 // A7 * B4
  1845. nop __LINE__
  1846. }
  1847. ;;
  1848. { .mfb
  1849. nop __LINE__
  1850. FMA f67 = f37, f48, f67 // A6 * B1
  1851. nop __LINE__
  1852. }
  1853. { .mfb
  1854. nop __LINE__
  1855. FMA_A f66 = f37, f49, f66 // A6 * B2
  1856. nop __LINE__
  1857. }
  1858. ;;
  1859. { .mfb
  1860. nop __LINE__
  1861. FMA f83 = f37, f50, f83 // A6 * B3
  1862. nop __LINE__
  1863. }
  1864. { .mfb
  1865. nop __LINE__
  1866. FMA_A f82 = f37, f51, f82 // A6 * B4
  1867. nop __LINE__
  1868. }
  1869. ;;
  1870. { .mfb
  1871. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  1872. FMA f99 = f39, f48, f99 // A8 * B1
  1873. nop __LINE__
  1874. }
  1875. { .mfb
  1876. nop __LINE__
  1877. FMA_A f98 = f39, f49, f98 // A8 * B2
  1878. nop __LINE__
  1879. }
  1880. ;;
  1881. { .mfb
  1882. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  1883. FMA f115 = f39, f50, f115 // A8 * B3
  1884. nop __LINE__
  1885. }
  1886. { .mfb
  1887. nop __LINE__
  1888. FMA_A f114 = f39, f51, f114 // A8 * B4
  1889. nop __LINE__
  1890. }
  1891. ;;
  1892. { .mfb
  1893. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  1894. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  1895. nop __LINE__
  1896. }
  1897. { .mfb
  1898. nop __LINE__
  1899. (p3) FMA_B f65 = f40, f57, f65 // A1 * B2
  1900. nop __LINE__
  1901. }
  1902. ;;
  1903. { .mfb
  1904. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  1905. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  1906. nop __LINE__
  1907. }
  1908. { .mfb
  1909. nop __LINE__
  1910. (p3) FMA_B f81 = f40, f59, f81 // A1 * B4
  1911. nop __LINE__
  1912. }
  1913. ;;
  1914. { .mfb
  1915. (p4) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  1916. (p3) FMA f96 = f42, f56, f96 // A3 * B1
  1917. nop __LINE__
  1918. }
  1919. { .mfb
  1920. nop __LINE__
  1921. (p3) FMA_B f97 = f42, f57, f97 // A3 * B2
  1922. nop __LINE__
  1923. }
  1924. ;;
  1925. { .mfb
  1926. (p4) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  1927. (p3) FMA f112 = f42, f58, f112 // A3 * B3
  1928. nop __LINE__
  1929. }
  1930. { .mfb
  1931. nop __LINE__
  1932. (p3) FMA_B f113 = f42, f59, f113 // A3 * B4
  1933. nop __LINE__
  1934. }
  1935. ;;
  1936. { .mfb
  1937. nop __LINE__
  1938. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  1939. nop __LINE__
  1940. }
  1941. { .mfb
  1942. nop __LINE__
  1943. (p3) FMA_A f64 = f41, f57, f64 // A2 * B2
  1944. nop __LINE__
  1945. }
  1946. ;;
  1947. { .mfb
  1948. nop __LINE__
  1949. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  1950. nop __LINE__
  1951. }
  1952. { .mfb
  1953. nop __LINE__
  1954. (p3) FMA_A f80 = f41, f59, f80 // A2 * B4
  1955. nop __LINE__
  1956. }
  1957. ;;
  1958. { .mfb
  1959. nop __LINE__
  1960. (p3) FMA f97 = f43, f56, f97 // A4 * B1
  1961. nop __LINE__
  1962. }
  1963. { .mfb
  1964. nop __LINE__
  1965. (p3) FMA_A f96 = f43, f57, f96 // A4 * B2
  1966. nop __LINE__
  1967. }
  1968. ;;
  1969. { .mfb
  1970. nop __LINE__
  1971. (p3) FMA f113 = f43, f58, f113 // A4 * B3
  1972. nop __LINE__
  1973. }
  1974. { .mfb
  1975. nop __LINE__
  1976. (p3) FMA_A f112 = f43, f59, f112 // A4 * B4
  1977. nop __LINE__
  1978. }
  1979. ;;
  1980. { .mfb
  1981. nop __LINE__
  1982. (p3) FMA f66 = f44, f56, f66 // A5 * B1
  1983. nop __LINE__
  1984. }
  1985. { .mfb
  1986. nop __LINE__
  1987. (p3) FMA_B f67 = f44, f57, f67 // A5 * B2
  1988. nop __LINE__
  1989. }
  1990. ;;
  1991. { .mfb
  1992. nop __LINE__
  1993. (p3) FMA f82 = f44, f58, f82 // A5 * B3
  1994. nop __LINE__
  1995. }
  1996. { .mfb
  1997. nop __LINE__
  1998. (p3) FMA_B f83 = f44, f59, f83 // A5 * B4
  1999. nop __LINE__
  2000. }
  2001. ;;
  2002. { .mfb
  2003. nop __LINE__
  2004. (p3) FMA f98 = f46, f56, f98 // A7 * B1
  2005. nop __LINE__
  2006. }
  2007. { .mfb
  2008. nop __LINE__
  2009. (p3) FMA_B f99 = f46, f57, f99 // A7 * B2
  2010. nop __LINE__
  2011. }
  2012. ;;
  2013. { .mfb
  2014. nop __LINE__
  2015. (p3) FMA f114 = f46, f58, f114 // A7 * B3
  2016. nop __LINE__
  2017. }
  2018. { .mfb
  2019. nop __LINE__
  2020. (p3) FMA_B f115 = f46, f59, f115 // A7 * B4
  2021. nop __LINE__
  2022. }
  2023. ;;
  2024. { .mfb
  2025. nop __LINE__
  2026. (p3) FMA f67 = f45, f56, f67 // A6 * B1
  2027. nop __LINE__
  2028. }
  2029. { .mfb
  2030. nop __LINE__
  2031. (p3) FMA_A f66 = f45, f57, f66 // A6 * B2
  2032. nop __LINE__
  2033. }
  2034. ;;
  2035. { .mfb
  2036. nop __LINE__
  2037. (p3) FMA f83 = f45, f58, f83 // A6 * B3
  2038. nop __LINE__
  2039. }
  2040. { .mfb
  2041. nop __LINE__
  2042. (p3) FMA_A f82 = f45, f59, f82 // A6 * B4
  2043. nop __LINE__
  2044. }
  2045. ;;
  2046. { .mfb
  2047. nop __LINE__
  2048. (p3) FMA f99 = f47, f56, f99 // A8 * B1
  2049. nop __LINE__
  2050. }
  2051. { .mfb
  2052. nop __LINE__
  2053. (p3) FMA_A f98 = f47, f57, f98 // A8 * B2
  2054. nop __LINE__
  2055. }
  2056. ;;
  2057. { .mfi
  2058. nop __LINE__
  2059. (p3) FMA f115 = f47, f58, f115 // A8 * B3
  2060. adds L = -1, L
  2061. }
  2062. { .mfb
  2063. nop __LINE__
  2064. (p3) FMA_A f114 = f47, f59, f114 // A8 * B4
  2065. br.cloop.sptk.few .L053
  2066. }
  2067. ;;
  2068. .L058:
  2069. #if defined(LN) || defined(RT)
  2070. #ifdef LN
  2071. adds r2 = -4, KK
  2072. #else
  2073. adds r2 = -2, KK
  2074. #endif
  2075. ;;
  2076. shladd r2 = r2, ZBASE_SHIFT, r0
  2077. ;;
  2078. shladd AOFFSET = r2, 2, AORIG
  2079. shladd BOFFSET = r2, 1, B
  2080. ;;
  2081. #endif
  2082. #if defined(LN) || defined(LT)
  2083. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  2084. ;;
  2085. LDFPD f74, f75 = [BOFFSET], 2 * SIZE
  2086. ;;
  2087. LDFPD f88, f89 = [BOFFSET], 2 * SIZE
  2088. ;;
  2089. LDFPD f90, f91 = [BOFFSET], 2 * SIZE
  2090. ;;
  2091. LDFPD f104, f105 = [BOFFSET], 2 * SIZE
  2092. ;;
  2093. LDFPD f106, f107 = [BOFFSET], 2 * SIZE
  2094. ;;
  2095. LDFPD f120, f121 = [BOFFSET], 2 * SIZE
  2096. ;;
  2097. LDFPD f122, f123 = [BOFFSET]
  2098. adds BOFFSET = -14 * SIZE, BOFFSET
  2099. ;;
  2100. FSUB f64 = f72, f64
  2101. FSUB_A f65 = f73, f65
  2102. FSUB f80 = f74, f80
  2103. FSUB_A f81 = f75, f81
  2104. FSUB f96 = f88, f96
  2105. FSUB_A f97 = f89, f97
  2106. FSUB f112 = f90, f112
  2107. FSUB_A f113 = f91, f113
  2108. FSUB f66 = f104, f66
  2109. FSUB_A f67 = f105, f67
  2110. FSUB f82 = f106, f82
  2111. FSUB_A f83 = f107, f83
  2112. FSUB f98 = f120, f98
  2113. FSUB_A f99 = f121, f99
  2114. FSUB f114 = f122, f114
  2115. FSUB_A f115 = f123, f115
  2116. ;;
  2117. #else
  2118. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  2119. ;;
  2120. LDFPD f74, f75 = [AOFFSET], 2 * SIZE
  2121. ;;
  2122. LDFPD f76, f77 = [AOFFSET], 2 * SIZE
  2123. ;;
  2124. LDFPD f78, f79 = [AOFFSET], 2 * SIZE
  2125. ;;
  2126. LDFPD f88, f89 = [AOFFSET], 2 * SIZE
  2127. ;;
  2128. LDFPD f90, f91 = [AOFFSET], 2 * SIZE
  2129. ;;
  2130. LDFPD f92, f93 = [AOFFSET], 2 * SIZE
  2131. ;;
  2132. LDFPD f94, f95 = [AOFFSET]
  2133. adds AOFFSET = -14 * SIZE, AOFFSET
  2134. ;;
  2135. FSUB f64 = f72, f64
  2136. FSUB f65 = f73, f65
  2137. FSUB f96 = f74, f96
  2138. FSUB f97 = f75, f97
  2139. FSUB f66 = f76, f66
  2140. FSUB f67 = f77, f67
  2141. FSUB f98 = f78, f98
  2142. FSUB f99 = f79, f99
  2143. FSUB f80 = f88, f80
  2144. FSUB f81 = f89, f81
  2145. FSUB f112 = f90, f112
  2146. FSUB f113 = f91, f113
  2147. FSUB f82 = f92, f82
  2148. FSUB f83 = f93, f83
  2149. FSUB f114 = f94, f114
  2150. FSUB f115 = f95, f115
  2151. ;;
  2152. #endif
  2153. #ifdef LN
  2154. adds AOFFSET = 30 * SIZE, AOFFSET
  2155. ;;
  2156. LDFPD f72, f73 = [AOFFSET]
  2157. adds AOFFSET = - 2 * SIZE, AOFFSET
  2158. ;;
  2159. LDFPD f74, f75 = [AOFFSET]
  2160. adds AOFFSET = - 2 * SIZE, AOFFSET
  2161. ;;
  2162. LDFPD f76, f77 = [AOFFSET]
  2163. adds AOFFSET = - 2 * SIZE, AOFFSET
  2164. ;;
  2165. LDFPD f78, f79 = [AOFFSET]
  2166. adds AOFFSET = - 4 * SIZE, AOFFSET
  2167. ;;
  2168. LDFPD f88, f89 = [AOFFSET]
  2169. adds AOFFSET = - 2 * SIZE, AOFFSET
  2170. ;;
  2171. LDFPD f90, f91 = [AOFFSET]
  2172. adds AOFFSET = - 2 * SIZE, AOFFSET
  2173. ;;
  2174. LDFPD f92, f93 = [AOFFSET]
  2175. adds AOFFSET = - 6 * SIZE, AOFFSET
  2176. ;;
  2177. LDFPD f104, f105 = [AOFFSET]
  2178. adds AOFFSET = - 2 * SIZE, AOFFSET
  2179. ;;
  2180. LDFPD f106, f107 = [AOFFSET]
  2181. adds AOFFSET = - 8 * SIZE, AOFFSET
  2182. ;;
  2183. LDFPD f120, f121 = [AOFFSET]
  2184. ;;
  2185. FMPY f32 = f72, f98
  2186. FMPY f33 = f73, f98
  2187. FMPY f34 = f72, f114
  2188. FMPY f35 = f73, f114
  2189. ;;
  2190. FMA_C f98 = f73, f99, f32
  2191. FMA_D f99 = f72, f99, f33
  2192. FMA_C f114 = f73, f115, f34
  2193. FMA_D f115 = f72, f115, f35
  2194. ;;
  2195. FNMA f66 = f74, f98, f66
  2196. FMA_A f67 = f75, f98, f67
  2197. FNMA f82 = f74, f114, f82
  2198. FMA_A f83 = f75, f114, f83
  2199. ;;
  2200. FMA_B f66 = f75, f99, f66
  2201. FNMA f67 = f74, f99, f67
  2202. FMA_B f82 = f75, f115, f82
  2203. FNMA f83 = f74, f115, f83
  2204. ;;
  2205. FNMA f96 = f76, f98, f96
  2206. FMA_A f97 = f77, f98, f97
  2207. FNMA f112 = f76, f114, f112
  2208. FMA_A f113 = f77, f114, f113
  2209. ;;
  2210. FMA_B f96 = f77, f99, f96
  2211. FNMA f97 = f76, f99, f97
  2212. FMA_B f112 = f77, f115, f112
  2213. FNMA f113 = f76, f115, f113
  2214. ;;
  2215. FNMA f64 = f78, f98, f64
  2216. FMA_A f65 = f79, f98, f65
  2217. FNMA f80 = f78, f114, f80
  2218. FMA_A f81 = f79, f114, f81
  2219. ;;
  2220. FMA_B f64 = f79, f99, f64
  2221. FNMA f65 = f78, f99, f65
  2222. FMA_B f80 = f79, f115, f80
  2223. FNMA f81 = f78, f115, f81
  2224. ;;
  2225. FMPY f32 = f88, f66
  2226. FMPY f33 = f89, f66
  2227. FMPY f34 = f88, f82
  2228. FMPY f35 = f89, f82
  2229. ;;
  2230. FMA_C f66 = f89, f67, f32
  2231. FMA_D f67 = f88, f67, f33
  2232. FMA_C f82 = f89, f83, f34
  2233. FMA_D f83 = f88, f83, f35
  2234. ;;
  2235. FNMA f96 = f90, f66, f96
  2236. FMA_A f97 = f91, f66, f97
  2237. FNMA f112 = f90, f82, f112
  2238. FMA_A f113 = f91, f82, f113
  2239. ;;
  2240. FMA_B f96 = f91, f67, f96
  2241. FNMA f97 = f90, f67, f97
  2242. FMA_B f112 = f91, f83, f112
  2243. FNMA f113 = f90, f83, f113
  2244. ;;
  2245. FNMA f64 = f92, f66, f64
  2246. FMA_A f65 = f93, f66, f65
  2247. FNMA f80 = f92, f82, f80
  2248. FMA_A f81 = f93, f82, f81
  2249. ;;
  2250. FMA_B f64 = f93, f67, f64
  2251. FNMA f65 = f92, f67, f65
  2252. FMA_B f80 = f93, f83, f80
  2253. FNMA f81 = f92, f83, f81
  2254. ;;
  2255. FMPY f32 = f104, f96
  2256. FMPY f33 = f105, f96
  2257. FMPY f34 = f104, f112
  2258. FMPY f35 = f105, f112
  2259. ;;
  2260. FMA_C f96 = f105, f97, f32
  2261. FMA_D f97 = f104, f97, f33
  2262. FMA_C f112 = f105, f113, f34
  2263. FMA_D f113 = f104, f113, f35
  2264. ;;
  2265. FNMA f64 = f106, f96, f64
  2266. FMA_A f65 = f107, f96, f65
  2267. FNMA f80 = f106, f112, f80
  2268. FMA_A f81 = f107, f112, f81
  2269. ;;
  2270. FMA_B f64 = f107, f97, f64
  2271. FNMA f65 = f106, f97, f65
  2272. FMA_B f80 = f107, f113, f80
  2273. FNMA f81 = f106, f113, f81
  2274. ;;
  2275. FMPY f32 = f120, f64
  2276. FMPY f33 = f121, f64
  2277. FMPY f34 = f120, f80
  2278. FMPY f35 = f121, f80
  2279. ;;
  2280. FMA_C f64 = f121, f65, f32
  2281. FMA_D f65 = f120, f65, f33
  2282. FMA_C f80 = f121, f81, f34
  2283. FMA_D f81 = f120, f81, f35
  2284. ;;
  2285. #endif
  2286. #ifdef LT
  2287. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  2288. ;;
  2289. LDFPD f74, f75 = [AOFFSET], 2 * SIZE
  2290. ;;
  2291. LDFPD f76, f77 = [AOFFSET], 2 * SIZE
  2292. ;;
  2293. LDFPD f78, f79 = [AOFFSET]
  2294. adds AOFFSET = 4 * SIZE, AOFFSET
  2295. ;;
  2296. LDFPD f90, f91 = [AOFFSET], 2 * SIZE
  2297. ;;
  2298. LDFPD f92, f93 = [AOFFSET], 2 * SIZE
  2299. ;;
  2300. LDFPD f94, f95 = [AOFFSET]
  2301. adds AOFFSET = 6 * SIZE, AOFFSET
  2302. ;;
  2303. LDFPD f108, f109 = [AOFFSET], 2 * SIZE
  2304. ;;
  2305. LDFPD f110, f111 = [AOFFSET]
  2306. adds AOFFSET = 8 * SIZE, AOFFSET
  2307. ;;
  2308. LDFPD f126, f127 = [AOFFSET]
  2309. adds AOFFSET = - 30 * SIZE, AOFFSET
  2310. ;;
  2311. FMPY f32 = f72, f64
  2312. FMPY f33 = f73, f64
  2313. FMPY f34 = f72, f80
  2314. FMPY f35 = f73, f80
  2315. ;;
  2316. FMA_C f64 = f73, f65, f32
  2317. FMA_D f65 = f72, f65, f33
  2318. FMA_C f80 = f73, f81, f34
  2319. FMA_D f81 = f72, f81, f35
  2320. ;;
  2321. FNMA f96 = f74, f64, f96
  2322. FMA_A f97 = f75, f64, f97
  2323. FNMA f112 = f74, f80, f112
  2324. FMA_A f113 = f75, f80, f113
  2325. ;;
  2326. FMA_B f96 = f75, f65, f96
  2327. FNMA f97 = f74, f65, f97
  2328. FMA_B f112 = f75, f81, f112
  2329. FNMA f113 = f74, f81, f113
  2330. ;;
  2331. FNMA f66 = f76, f64, f66
  2332. FMA_A f67 = f77, f64, f67
  2333. FNMA f82 = f76, f80, f82
  2334. FMA_A f83 = f77, f80, f83
  2335. ;;
  2336. FMA_B f66 = f77, f65, f66
  2337. FNMA f67 = f76, f65, f67
  2338. FMA_B f82 = f77, f81, f82
  2339. FNMA f83 = f76, f81, f83
  2340. ;;
  2341. FNMA f98 = f78, f64, f98
  2342. FMA_A f99 = f79, f64, f99
  2343. FNMA f114 = f78, f80, f114
  2344. FMA_A f115 = f79, f80, f115
  2345. ;;
  2346. FMA_B f98 = f79, f65, f98
  2347. FNMA f99 = f78, f65, f99
  2348. FMA_B f114 = f79, f81, f114
  2349. FNMA f115 = f78, f81, f115
  2350. ;;
  2351. FMPY f32 = f90, f96
  2352. FMPY f33 = f91, f96
  2353. FMPY f34 = f90, f112
  2354. FMPY f35 = f91, f112
  2355. ;;
  2356. FMA_C f96 = f91, f97, f32
  2357. FMA_D f97 = f90, f97, f33
  2358. FMA_C f112 = f91, f113, f34
  2359. FMA_D f113 = f90, f113, f35
  2360. ;;
  2361. FNMA f66 = f92, f96, f66
  2362. FMA_A f67 = f93, f96, f67
  2363. FNMA f82 = f92, f112, f82
  2364. FMA_A f83 = f93, f112, f83
  2365. ;;
  2366. FMA_B f66 = f93, f97, f66
  2367. FNMA f67 = f92, f97, f67
  2368. FMA_B f82 = f93, f113, f82
  2369. FNMA f83 = f92, f113, f83
  2370. ;;
  2371. FNMA f98 = f94, f96, f98
  2372. FMA_A f99 = f95, f96, f99
  2373. FNMA f114 = f94, f112, f114
  2374. FMA_A f115 = f95, f112, f115
  2375. ;;
  2376. FMA_B f98 = f95, f97, f98
  2377. FNMA f99 = f94, f97, f99
  2378. FMA_B f114 = f95, f113, f114
  2379. FNMA f115 = f94, f113, f115
  2380. ;;
  2381. FMPY f32 = f108, f66
  2382. FMPY f33 = f109, f66
  2383. FMPY f34 = f108, f82
  2384. FMPY f35 = f109, f82
  2385. ;;
  2386. FMA_C f66 = f109, f67, f32
  2387. FMA_D f67 = f108, f67, f33
  2388. FMA_C f82 = f109, f83, f34
  2389. FMA_D f83 = f108, f83, f35
  2390. ;;
  2391. FNMA f98 = f110, f66, f98
  2392. FMA_A f99 = f111, f66, f99
  2393. FNMA f114 = f110, f82, f114
  2394. FMA_A f115 = f111, f82, f115
  2395. ;;
  2396. FMA_B f98 = f111, f67, f98
  2397. FNMA f99 = f110, f67, f99
  2398. FMA_B f114 = f111, f83, f114
  2399. FNMA f115 = f110, f83, f115
  2400. ;;
  2401. FMPY f32 = f126, f98
  2402. FMPY f33 = f127, f98
  2403. FMPY f34 = f126, f114
  2404. FMPY f35 = f127, f114
  2405. ;;
  2406. FMA_C f98 = f127, f99, f32
  2407. FMA_D f99 = f126, f99, f33
  2408. FMA_C f114 = f127, f115, f34
  2409. FMA_D f115 = f126, f115, f35
  2410. ;;
  2411. #endif
  2412. #ifdef RN
  2413. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  2414. ;;
  2415. LDFPD f74, f75 = [BOFFSET]
  2416. adds BOFFSET = 4 * SIZE, BOFFSET
  2417. ;;
  2418. LDFPD f90, f91 = [BOFFSET]
  2419. adds BOFFSET = - 6 * SIZE, BOFFSET
  2420. ;;
  2421. FMPY f32 = f72, f64
  2422. FMPY f33 = f73, f64
  2423. FMPY f34 = f72, f96
  2424. FMPY f35 = f73, f96
  2425. FMPY f36 = f72, f66
  2426. FMPY f37 = f73, f66
  2427. FMPY f38 = f72, f98
  2428. FMPY f39 = f73, f98
  2429. ;;
  2430. FMA_C f64 = f73, f65, f32
  2431. FMA_D f65 = f72, f65, f33
  2432. FMA_C f96 = f73, f97, f34
  2433. FMA_D f97 = f72, f97, f35
  2434. FMA_C f66 = f73, f67, f36
  2435. FMA_D f67 = f72, f67, f37
  2436. FMA_C f98 = f73, f99, f38
  2437. FMA_D f99 = f72, f99, f39
  2438. ;;
  2439. FNMA f80 = f74, f64, f80
  2440. FMA_A f81 = f75, f64, f81
  2441. FNMA f112 = f74, f96, f112
  2442. FMA_A f113 = f75, f96, f113
  2443. FNMA f82 = f74, f66, f82
  2444. FMA_A f83 = f75, f66, f83
  2445. FNMA f114 = f74, f98, f114
  2446. FMA_A f115 = f75, f98, f115
  2447. ;;
  2448. FMA_B f80 = f75, f65, f80
  2449. FNMA f81 = f74, f65, f81
  2450. FMA_B f112 = f75, f97, f112
  2451. FNMA f113 = f74, f97, f113
  2452. FMA_B f82 = f75, f67, f82
  2453. FNMA f83 = f74, f67, f83
  2454. FMA_B f114 = f75, f99, f114
  2455. FNMA f115 = f74, f99, f115
  2456. ;;
  2457. FMPY f32 = f90, f80
  2458. FMPY f33 = f91, f80
  2459. FMPY f34 = f90, f112
  2460. FMPY f35 = f91, f112
  2461. FMPY f36 = f90, f82
  2462. FMPY f37 = f91, f82
  2463. FMPY f38 = f90, f114
  2464. FMPY f39 = f91, f114
  2465. ;;
  2466. FMA_C f80 = f91, f81, f32
  2467. FMA_D f81 = f90, f81, f33
  2468. FMA_C f112 = f91, f113, f34
  2469. FMA_D f113 = f90, f113, f35
  2470. FMA_C f82 = f91, f83, f36
  2471. FMA_D f83 = f90, f83, f37
  2472. FMA_C f114 = f91, f115, f38
  2473. FMA_D f115 = f90, f115, f39
  2474. ;;
  2475. #endif
  2476. #ifdef RT
  2477. adds BOFFSET = 6 * SIZE, BOFFSET
  2478. ;;
  2479. LDFPD f104, f105 = [BOFFSET]
  2480. adds BOFFSET = - 2 * SIZE, BOFFSET
  2481. ;;
  2482. LDFPD f106, f107 = [BOFFSET]
  2483. adds BOFFSET = - 4 * SIZE, BOFFSET
  2484. ;;
  2485. LDFPD f120, f121 = [BOFFSET]
  2486. ;;
  2487. FMPY f32 = f104, f80
  2488. FMPY f33 = f105, f80
  2489. FMPY f34 = f104, f112
  2490. FMPY f35 = f105, f112
  2491. FMPY f36 = f104, f82
  2492. FMPY f37 = f105, f82
  2493. FMPY f38 = f104, f114
  2494. FMPY f39 = f105, f114
  2495. ;;
  2496. FMA_C f80 = f105, f81, f32
  2497. FMA_D f81 = f104, f81, f33
  2498. FMA_C f112 = f105, f113, f34
  2499. FMA_D f113 = f104, f113, f35
  2500. FMA_C f82 = f105, f83, f36
  2501. FMA_D f83 = f104, f83, f37
  2502. FMA_C f114 = f105, f115, f38
  2503. FMA_D f115 = f104, f115, f39
  2504. ;;
  2505. FNMA f64 = f106, f80, f64
  2506. FMA_A f65 = f107, f80, f65
  2507. FNMA f96 = f106, f112, f96
  2508. FMA_A f97 = f107, f112, f97
  2509. FNMA f66 = f106, f82, f66
  2510. FMA_A f67 = f107, f82, f67
  2511. FNMA f98 = f106, f114, f98
  2512. FMA_A f99 = f107, f114, f99
  2513. ;;
  2514. FMA_B f64 = f107, f81, f64
  2515. FNMA f65 = f106, f81, f65
  2516. FMA_B f96 = f107, f113, f96
  2517. FNMA f97 = f106, f113, f97
  2518. FMA_B f66 = f107, f83, f66
  2519. FNMA f67 = f106, f83, f67
  2520. FMA_B f98 = f107, f115, f98
  2521. FNMA f99 = f106, f115, f99
  2522. ;;
  2523. FMPY f32 = f120, f64
  2524. FMPY f33 = f121, f64
  2525. FMPY f34 = f120, f96
  2526. FMPY f35 = f121, f96
  2527. FMPY f36 = f120, f66
  2528. FMPY f37 = f121, f66
  2529. FMPY f38 = f120, f98
  2530. FMPY f39 = f121, f98
  2531. ;;
  2532. FMA_C f64 = f121, f65, f32
  2533. FMA_D f65 = f120, f65, f33
  2534. FMA_C f96 = f121, f97, f34
  2535. FMA_D f97 = f120, f97, f35
  2536. FMA_C f66 = f121, f67, f36
  2537. FMA_D f67 = f120, f67, f37
  2538. FMA_C f98 = f121, f99, f38
  2539. FMA_D f99 = f120, f99, f39
  2540. ;;
  2541. #endif
  2542. #if defined(LN) || defined(LT)
  2543. adds BOFFSET2 = 4 * SIZE, BOFFSET
  2544. ;;
  2545. STFD [BOFFSET] = f64, SIZE
  2546. STFD [BOFFSET2] = f96, SIZE
  2547. ;;
  2548. STFD [BOFFSET] = f65, SIZE
  2549. STFD [BOFFSET2] = f97, SIZE
  2550. ;;
  2551. STFD [BOFFSET] = f80, SIZE
  2552. STFD [BOFFSET2] = f112, SIZE
  2553. ;;
  2554. STFD [BOFFSET] = f81, 5 * SIZE
  2555. STFD [BOFFSET2] = f113, 5 * SIZE
  2556. ;;
  2557. STFD [BOFFSET] = f66, SIZE
  2558. STFD [BOFFSET2] = f98, SIZE
  2559. ;;
  2560. STFD [BOFFSET] = f67, SIZE
  2561. STFD [BOFFSET2] = f99, SIZE
  2562. ;;
  2563. STFD [BOFFSET] = f82, SIZE
  2564. STFD [BOFFSET2] = f114, SIZE
  2565. ;;
  2566. STFD [BOFFSET] = f83, 5 * SIZE
  2567. STFD [BOFFSET2] = f115, 5 * SIZE
  2568. ;;
  2569. adds BOFFSET = - 16 * SIZE, BOFFSET
  2570. ;;
  2571. #else
  2572. adds AOFFSET2 = 4 * SIZE, AOFFSET
  2573. ;;
  2574. STFD [AOFFSET] = f64, SIZE
  2575. STFD [AOFFSET2] = f66, SIZE
  2576. ;;
  2577. STFD [AOFFSET] = f65, SIZE
  2578. STFD [AOFFSET2] = f67, SIZE
  2579. ;;
  2580. STFD [AOFFSET] = f96, SIZE
  2581. STFD [AOFFSET2] = f98, SIZE
  2582. ;;
  2583. STFD [AOFFSET] = f97, 5 * SIZE
  2584. STFD [AOFFSET2] = f99, 5 * SIZE
  2585. ;;
  2586. STFD [AOFFSET] = f80, SIZE
  2587. STFD [AOFFSET2] = f82, SIZE
  2588. ;;
  2589. STFD [AOFFSET] = f81, SIZE
  2590. STFD [AOFFSET2] = f83, SIZE
  2591. ;;
  2592. STFD [AOFFSET] = f112, SIZE
  2593. STFD [AOFFSET2] = f114, SIZE
  2594. ;;
  2595. STFD [AOFFSET] = f113, 5 * SIZE
  2596. STFD [AOFFSET2] = f115, 5 * SIZE
  2597. ;;
  2598. adds AOFFSET = - 16 * SIZE, AOFFSET
  2599. ;;
  2600. #endif
  2601. #ifdef LN
  2602. adds C1 = -8 * SIZE, C1
  2603. adds C2 = -8 * SIZE, C2
  2604. adds C5 = -8 * SIZE, C5
  2605. adds C6 = -8 * SIZE, C6
  2606. #endif
  2607. ;;
  2608. STFD [C1 ] = f64, SIZE
  2609. STFD [C5 ] = f66, SIZE
  2610. ;;
  2611. STFD [C1 ] = f65, SIZE
  2612. STFD [C5 ] = f67, SIZE
  2613. ;;
  2614. STFD [C1 ] = f96, SIZE
  2615. STFD [C5 ] = f98, SIZE
  2616. ;;
  2617. STFD [C1 ] = f97, 5 * SIZE
  2618. STFD [C5 ] = f99, 5 * SIZE
  2619. ;;
  2620. STFD [C2 ] = f80, SIZE
  2621. STFD [C6 ] = f82, SIZE
  2622. ;;
  2623. STFD [C2 ] = f81, SIZE
  2624. STFD [C6 ] = f83, SIZE
  2625. ;;
  2626. STFD [C2 ] = f112, SIZE
  2627. STFD [C6 ] = f114, SIZE
  2628. ;;
  2629. STFD [C2 ] = f113, 5 * SIZE
  2630. STFD [C6 ] = f115, 5 * SIZE
  2631. ;;
  2632. mov f64 = f0
  2633. mov f65 = f0
  2634. mov f80 = f0
  2635. mov f81 = f0
  2636. mov f96 = f0
  2637. mov f97 = f0
  2638. mov f112 = f0
  2639. mov f113 = f0
  2640. ;;
  2641. #ifdef LN
  2642. adds C1 = -8 * SIZE, C1
  2643. adds C2 = -8 * SIZE, C2
  2644. adds C5 = -8 * SIZE, C5
  2645. adds C6 = -8 * SIZE, C6
  2646. #endif
  2647. ;;
  2648. cmp.ne p6, p0 = 1, I
  2649. ;;
  2650. adds I = -1, I
  2651. ;;
  2652. shladd r2 = K, ZBASE_SHIFT, r0
  2653. ;;
  2654. sub L = K, KK
  2655. ;;
  2656. #ifdef RT
  2657. shladd AORIG = r2, 2, AORIG
  2658. #endif
  2659. ;;
  2660. #if defined(LT) || defined(RN)
  2661. shladd L = L, ZBASE_SHIFT, r0
  2662. ;;
  2663. shladd AOFFSET = L, 2, AOFFSET
  2664. shladd BOFFSET = L, 1, BOFFSET
  2665. #endif
  2666. ;;
  2667. #ifdef LT
  2668. adds KK = 4, KK
  2669. #elif defined LN
  2670. adds KK = -4, KK
  2671. #else
  2672. nop __LINE__
  2673. #endif
  2674. ;;
  2675. #if defined(LT) || defined(RN)
  2676. mov L = KK
  2677. #else
  2678. sub L = K, KK
  2679. #endif
  2680. ;;
  2681. (p6) br.cond.dptk .L052
  2682. ;;
  2683. .align 16
  2684. .L060:
  2685. { .mib
  2686. #if defined(LT) || defined(RN)
  2687. mov L = KK
  2688. #else
  2689. sub L = K, KK
  2690. #endif
  2691. tbit.z p6, p7 = M, 1
  2692. (p6) br.cond.dptk .L070
  2693. }
  2694. ;;
  2695. { .mmi
  2696. cmp.ne p7, p0 = r0, L
  2697. adds BOFFSET = 0 * SIZE, B
  2698. shl r2 = K, 1 + ZBASE_SHIFT
  2699. }
  2700. { .mmi
  2701. shladd r3 = KK, ZBASE_SHIFT, r0
  2702. nop __LINE__
  2703. nop __LINE__
  2704. }
  2705. ;;
  2706. #if defined(LT) || defined(RN)
  2707. { .mfb
  2708. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  2709. }
  2710. ;;
  2711. #else
  2712. { .mfi
  2713. shladd BOFFSET = r3, 1, B
  2714. #ifdef LN
  2715. sub AORIG = AORIG, r2
  2716. #else
  2717. nop __LINE__
  2718. #endif
  2719. }
  2720. ;;
  2721. { .mfi
  2722. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  2723. shladd AOFFSET = r3, 1, AORIG
  2724. }
  2725. ;;
  2726. #endif
  2727. ;;
  2728. adds L = 1, L
  2729. ;;
  2730. { .mmi
  2731. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  2732. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  2733. tbit.z p12, p0 = L, 0
  2734. }
  2735. { .mmi
  2736. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  2737. shr L = L, 1
  2738. }
  2739. ;;
  2740. { .mmi
  2741. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  2742. nop __LINE__
  2743. adds L = -1, L
  2744. }
  2745. ;;
  2746. { .mmi
  2747. nop __LINE__
  2748. nop __LINE__
  2749. mov ar.lc = L
  2750. }
  2751. ;;
  2752. cmp.eq p6, p0 = -1, L
  2753. (p6) br.cond.dpnt .L068
  2754. ;;
  2755. .align 16
  2756. .L062:
  2757. { .mfi
  2758. lfetch.nt1 [PREA], 8 * SIZE
  2759. FMA f64 = f32, f48, f64 // A1 * B1
  2760. cmp.ne p4, p5 = 0, L
  2761. }
  2762. { .mfi
  2763. nop __LINE__
  2764. FMA_B f65 = f32, f49, f65 // A1 * B2
  2765. (p12) cmp.ne p3, p0 = 0, L
  2766. }
  2767. ;;
  2768. { .mfb
  2769. lfetch.nt1 [PREB], 8 * SIZE
  2770. FMA f80 = f32, f50, f80 // A1 * B3
  2771. nop __LINE__
  2772. }
  2773. { .mfb
  2774. nop __LINE__
  2775. FMA_B f81 = f32, f51, f81 // A1 * B4
  2776. nop __LINE__
  2777. }
  2778. ;;
  2779. { .mfb
  2780. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  2781. FMA f96 = f34, f48, f96 // A3 * B1
  2782. nop __LINE__
  2783. }
  2784. { .mfb
  2785. nop __LINE__
  2786. FMA_B f97 = f34, f49, f97 // A3 * B2
  2787. nop __LINE__
  2788. }
  2789. ;;
  2790. { .mfb
  2791. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  2792. FMA f112 = f34, f50, f112 // A3 * B3
  2793. nop __LINE__
  2794. }
  2795. { .mfb
  2796. nop __LINE__
  2797. FMA_B f113 = f34, f51, f113 // A3 * B4
  2798. nop __LINE__
  2799. }
  2800. ;;
  2801. { .mfb
  2802. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  2803. FMA f65 = f33, f48, f65 // A2 * B1
  2804. nop __LINE__
  2805. }
  2806. { .mfb
  2807. nop __LINE__
  2808. FMA_A f64 = f33, f49, f64 // A2 * B2
  2809. nop __LINE__
  2810. }
  2811. ;;
  2812. { .mfb
  2813. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  2814. FMA f81 = f33, f50, f81 // A2 * B3
  2815. nop __LINE__
  2816. }
  2817. { .mfb
  2818. nop __LINE__
  2819. FMA_A f80 = f33, f51, f80 // A2 * B4
  2820. nop __LINE__
  2821. }
  2822. ;;
  2823. { .mfb
  2824. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  2825. FMA f97 = f35, f48, f97 // A4 * B1
  2826. }
  2827. { .mfb
  2828. FMA_A f96 = f35, f49, f96 // A4 * B2
  2829. nop __LINE__
  2830. }
  2831. { .mfb
  2832. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  2833. FMA f113 = f35, f50, f113 // A4 * B3
  2834. nop __LINE__
  2835. }
  2836. { .mfb
  2837. FMA_A f112 = f35, f51, f112 // A4 * B4
  2838. nop __LINE__
  2839. }
  2840. ;;
  2841. { .mfb
  2842. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  2843. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  2844. nop __LINE__
  2845. }
  2846. { .mfb
  2847. (p3) FMA_B f65 = f40, f57, f65 // A1 * B2
  2848. nop __LINE__
  2849. }
  2850. ;;
  2851. { .mfb
  2852. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  2853. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  2854. nop __LINE__
  2855. }
  2856. { .mfb
  2857. (p3) FMA_B f81 = f40, f59, f81 // A1 * B4
  2858. nop __LINE__
  2859. }
  2860. ;;
  2861. { .mfb
  2862. nop __LINE__
  2863. (p3) FMA f96 = f42, f56, f96 // A3 * B1
  2864. nop __LINE__
  2865. }
  2866. { .mfb
  2867. nop __LINE__
  2868. (p3) FMA_B f97 = f42, f57, f97 // A3 * B2
  2869. nop __LINE__
  2870. }
  2871. ;;
  2872. { .mfb
  2873. nop __LINE__
  2874. (p3) FMA f112 = f42, f58, f112 // A3 * B3
  2875. nop __LINE__
  2876. }
  2877. { .mfb
  2878. nop __LINE__
  2879. (p3) FMA_B f113 = f42, f59, f113 // A3 * B4
  2880. nop __LINE__
  2881. }
  2882. ;;
  2883. { .mfb
  2884. nop __LINE__
  2885. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  2886. nop __LINE__
  2887. }
  2888. { .mfb
  2889. nop __LINE__
  2890. (p3) FMA_A f64 = f41, f57, f64 // A2 * B2
  2891. nop __LINE__
  2892. }
  2893. ;;
  2894. { .mfb
  2895. nop __LINE__
  2896. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  2897. nop __LINE__
  2898. }
  2899. { .mfb
  2900. nop __LINE__
  2901. (p3) FMA_A f80 = f41, f59, f80 // A2 * B4
  2902. nop __LINE__
  2903. }
  2904. ;;
  2905. { .mfb
  2906. nop __LINE__
  2907. (p3) FMA f97 = f43, f56, f97 // A4 * B1
  2908. nop __LINE__
  2909. }
  2910. { .mfb
  2911. nop __LINE__
  2912. (p3) FMA_A f96 = f43, f57, f96 // A4 * B2
  2913. nop __LINE__
  2914. }
  2915. ;;
  2916. { .mfi
  2917. nop __LINE__
  2918. (p3) FMA f113 = f43, f58, f113 // A4 * B3
  2919. adds L = -1, L
  2920. }
  2921. { .mfb
  2922. nop __LINE__
  2923. (p3) FMA_A f112 = f43, f59, f112 // A4 * B4
  2924. br.cloop.sptk.few .L062
  2925. }
  2926. ;;
  2927. .L068:
  2928. #if defined(LN) || defined(RT)
  2929. #ifdef LN
  2930. adds r2 = -2, KK
  2931. #else
  2932. adds r2 = -2, KK
  2933. #endif
  2934. ;;
  2935. shladd r2 = r2, ZBASE_SHIFT, r0
  2936. ;;
  2937. shladd AOFFSET = r2, 1, AORIG
  2938. shladd BOFFSET = r2, 1, B
  2939. ;;
  2940. #endif
  2941. #if defined(LN) || defined(LT)
  2942. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  2943. ;;
  2944. LDFPD f74, f75 = [BOFFSET], 2 * SIZE
  2945. ;;
  2946. LDFPD f88, f89 = [BOFFSET], 2 * SIZE
  2947. ;;
  2948. LDFPD f90, f91 = [BOFFSET]
  2949. adds BOFFSET = -6 * SIZE, BOFFSET
  2950. ;;
  2951. FSUB f64 = f72, f64
  2952. FSUB_A f65 = f73, f65
  2953. FSUB f80 = f74, f80
  2954. FSUB_A f81 = f75, f81
  2955. FSUB f96 = f88, f96
  2956. FSUB_A f97 = f89, f97
  2957. FSUB f112 = f90, f112
  2958. FSUB_A f113 = f91, f113
  2959. ;;
  2960. #else
  2961. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  2962. ;;
  2963. LDFPD f74, f75 = [AOFFSET], 2 * SIZE
  2964. ;;
  2965. LDFPD f88, f89 = [AOFFSET], 2 * SIZE
  2966. ;;
  2967. LDFPD f90, f91 = [AOFFSET]
  2968. adds AOFFSET = -6 * SIZE, AOFFSET
  2969. ;;
  2970. FSUB f64 = f72, f64
  2971. FSUB f65 = f73, f65
  2972. FSUB f96 = f74, f96
  2973. FSUB f97 = f75, f97
  2974. FSUB f80 = f88, f80
  2975. FSUB f81 = f89, f81
  2976. FSUB f112 = f90, f112
  2977. FSUB f113 = f91, f113
  2978. ;;
  2979. #endif
  2980. #ifdef LN
  2981. adds AOFFSET = 6 * SIZE, AOFFSET
  2982. ;;
  2983. LDFPD f104, f105 = [AOFFSET]
  2984. adds AOFFSET = - 2 * SIZE, AOFFSET
  2985. ;;
  2986. LDFPD f106, f107 = [AOFFSET]
  2987. adds AOFFSET = - 4 * SIZE, AOFFSET
  2988. ;;
  2989. LDFPD f120, f121 = [AOFFSET]
  2990. ;;
  2991. FMPY f32 = f104, f96
  2992. FMPY f33 = f105, f96
  2993. FMPY f34 = f104, f112
  2994. FMPY f35 = f105, f112
  2995. ;;
  2996. FMA_C f96 = f105, f97, f32
  2997. FMA_D f97 = f104, f97, f33
  2998. FMA_C f112 = f105, f113, f34
  2999. FMA_D f113 = f104, f113, f35
  3000. ;;
  3001. FNMA f64 = f106, f96, f64
  3002. FMA_A f65 = f107, f96, f65
  3003. FNMA f80 = f106, f112, f80
  3004. FMA_A f81 = f107, f112, f81
  3005. ;;
  3006. FMA_B f64 = f107, f97, f64
  3007. FNMA f65 = f106, f97, f65
  3008. FMA_B f80 = f107, f113, f80
  3009. FNMA f81 = f106, f113, f81
  3010. ;;
  3011. FMPY f32 = f120, f64
  3012. FMPY f33 = f121, f64
  3013. FMPY f34 = f120, f80
  3014. FMPY f35 = f121, f80
  3015. ;;
  3016. FMA_C f64 = f121, f65, f32
  3017. FMA_D f65 = f120, f65, f33
  3018. FMA_C f80 = f121, f81, f34
  3019. FMA_D f81 = f120, f81, f35
  3020. ;;
  3021. #endif
  3022. #ifdef LT
  3023. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  3024. ;;
  3025. LDFPD f74, f75 = [AOFFSET]
  3026. adds AOFFSET = 4 * SIZE, AOFFSET
  3027. ;;
  3028. LDFPD f90, f91 = [AOFFSET]
  3029. adds AOFFSET = - 6 * SIZE, AOFFSET
  3030. ;;
  3031. FMPY f32 = f72, f64
  3032. FMPY f33 = f73, f64
  3033. FMPY f34 = f72, f80
  3034. FMPY f35 = f73, f80
  3035. ;;
  3036. FMA_C f64 = f73, f65, f32
  3037. FMA_D f65 = f72, f65, f33
  3038. FMA_C f80 = f73, f81, f34
  3039. FMA_D f81 = f72, f81, f35
  3040. ;;
  3041. FNMA f96 = f74, f64, f96
  3042. FMA_A f97 = f75, f64, f97
  3043. FNMA f112 = f74, f80, f112
  3044. FMA_A f113 = f75, f80, f113
  3045. ;;
  3046. FMA_B f96 = f75, f65, f96
  3047. FNMA f97 = f74, f65, f97
  3048. FMA_B f112 = f75, f81, f112
  3049. FNMA f113 = f74, f81, f113
  3050. ;;
  3051. FMPY f32 = f90, f96
  3052. FMPY f33 = f91, f96
  3053. FMPY f34 = f90, f112
  3054. FMPY f35 = f91, f112
  3055. ;;
  3056. FMA_C f96 = f91, f97, f32
  3057. FMA_D f97 = f90, f97, f33
  3058. FMA_C f112 = f91, f113, f34
  3059. FMA_D f113 = f90, f113, f35
  3060. ;;
  3061. #endif
  3062. #ifdef RN
  3063. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  3064. ;;
  3065. LDFPD f74, f75 = [BOFFSET]
  3066. adds BOFFSET = 4 * SIZE, BOFFSET
  3067. ;;
  3068. LDFPD f90, f91 = [BOFFSET]
  3069. adds BOFFSET = - 6 * SIZE, BOFFSET
  3070. ;;
  3071. FMPY f32 = f72, f64
  3072. FMPY f33 = f73, f64
  3073. FMPY f34 = f72, f96
  3074. FMPY f35 = f73, f96
  3075. ;;
  3076. FMA_C f64 = f73, f65, f32
  3077. FMA_D f65 = f72, f65, f33
  3078. FMA_C f96 = f73, f97, f34
  3079. FMA_D f97 = f72, f97, f35
  3080. ;;
  3081. FNMA f80 = f74, f64, f80
  3082. FMA_A f81 = f75, f64, f81
  3083. FNMA f112 = f74, f96, f112
  3084. FMA_A f113 = f75, f96, f113
  3085. ;;
  3086. FMA_B f80 = f75, f65, f80
  3087. FNMA f81 = f74, f65, f81
  3088. FMA_B f112 = f75, f97, f112
  3089. FNMA f113 = f74, f97, f113
  3090. ;;
  3091. FMPY f32 = f90, f80
  3092. FMPY f33 = f91, f80
  3093. FMPY f34 = f90, f112
  3094. FMPY f35 = f91, f112
  3095. ;;
  3096. FMA_C f80 = f91, f81, f32
  3097. FMA_D f81 = f90, f81, f33
  3098. FMA_C f112 = f91, f113, f34
  3099. FMA_D f113 = f90, f113, f35
  3100. ;;
  3101. #endif
  3102. #ifdef RT
  3103. adds BOFFSET = 6 * SIZE, BOFFSET
  3104. ;;
  3105. LDFPD f104, f105 = [BOFFSET]
  3106. adds BOFFSET = - 2 * SIZE, BOFFSET
  3107. ;;
  3108. LDFPD f106, f107 = [BOFFSET]
  3109. adds BOFFSET = - 4 * SIZE, BOFFSET
  3110. ;;
  3111. LDFPD f120, f121 = [BOFFSET]
  3112. ;;
  3113. FMPY f32 = f104, f80
  3114. FMPY f33 = f105, f80
  3115. FMPY f34 = f104, f112
  3116. FMPY f35 = f105, f112
  3117. ;;
  3118. FMA_C f80 = f105, f81, f32
  3119. FMA_D f81 = f104, f81, f33
  3120. FMA_C f112 = f105, f113, f34
  3121. FMA_D f113 = f104, f113, f35
  3122. ;;
  3123. FNMA f64 = f106, f80, f64
  3124. FMA_A f65 = f107, f80, f65
  3125. FNMA f96 = f106, f112, f96
  3126. FMA_A f97 = f107, f112, f97
  3127. ;;
  3128. FMA_B f64 = f107, f81, f64
  3129. FNMA f65 = f106, f81, f65
  3130. FMA_B f96 = f107, f113, f96
  3131. FNMA f97 = f106, f113, f97
  3132. ;;
  3133. FMPY f32 = f120, f64
  3134. FMPY f33 = f121, f64
  3135. FMPY f34 = f120, f96
  3136. FMPY f35 = f121, f96
  3137. ;;
  3138. FMA_C f64 = f121, f65, f32
  3139. FMA_D f65 = f120, f65, f33
  3140. FMA_C f96 = f121, f97, f34
  3141. FMA_D f97 = f120, f97, f35
  3142. ;;
  3143. #endif
  3144. #if defined(LN) || defined(LT)
  3145. adds BOFFSET2 = 4 * SIZE, BOFFSET
  3146. ;;
  3147. STFD [BOFFSET] = f64, SIZE
  3148. STFD [BOFFSET2] = f96, SIZE
  3149. ;;
  3150. STFD [BOFFSET] = f65, SIZE
  3151. STFD [BOFFSET2] = f97, SIZE
  3152. ;;
  3153. STFD [BOFFSET] = f80, SIZE
  3154. STFD [BOFFSET2] = f112, SIZE
  3155. ;;
  3156. STFD [BOFFSET] = f81, 5 * SIZE
  3157. STFD [BOFFSET2] = f113, 5 * SIZE
  3158. ;;
  3159. adds BOFFSET = - 8 * SIZE, BOFFSET
  3160. ;;
  3161. #else
  3162. adds AOFFSET2 = 4 * SIZE, AOFFSET
  3163. ;;
  3164. STFD [AOFFSET] = f64, SIZE
  3165. STFD [AOFFSET2] = f80, SIZE
  3166. ;;
  3167. STFD [AOFFSET] = f65, SIZE
  3168. STFD [AOFFSET2] = f81, SIZE
  3169. ;;
  3170. STFD [AOFFSET] = f96, SIZE
  3171. STFD [AOFFSET2] = f112, SIZE
  3172. ;;
  3173. STFD [AOFFSET] = f97, 5 * SIZE
  3174. STFD [AOFFSET2] = f113, 5 * SIZE
  3175. ;;
  3176. adds AOFFSET = - 8 * SIZE, AOFFSET
  3177. ;;
  3178. #endif
  3179. #ifdef LN
  3180. adds C1 = -4 * SIZE, C1
  3181. adds C2 = -4 * SIZE, C2
  3182. #endif
  3183. ;;
  3184. STFD [C1 ] = f64, SIZE
  3185. ;;
  3186. STFD [C1 ] = f65, SIZE
  3187. ;;
  3188. STFD [C1 ] = f96, SIZE
  3189. ;;
  3190. STFD [C1 ] = f97, SIZE
  3191. ;;
  3192. STFD [C2 ] = f80, SIZE
  3193. ;;
  3194. STFD [C2 ] = f81, SIZE
  3195. ;;
  3196. STFD [C2 ] = f112, SIZE
  3197. ;;
  3198. STFD [C2 ] = f113, SIZE
  3199. ;;
  3200. mov f64 = f0
  3201. mov f65 = f0
  3202. mov f80 = f0
  3203. mov f81 = f0
  3204. mov f96 = f0
  3205. mov f97 = f0
  3206. mov f112 = f0
  3207. mov f113 = f0
  3208. ;;
  3209. #ifdef LN
  3210. adds C1 = -4 * SIZE, C1
  3211. adds C2 = -4 * SIZE, C2
  3212. #endif
  3213. ;;
  3214. cmp.ne p6, p0 = 1, I
  3215. ;;
  3216. adds I = -1, I
  3217. ;;
  3218. shladd r2 = K, ZBASE_SHIFT, r0
  3219. ;;
  3220. sub L = K, KK
  3221. ;;
  3222. #ifdef RT
  3223. shladd AORIG = r2, 1, AORIG
  3224. #endif
  3225. ;;
  3226. #if defined(LT) || defined(RN)
  3227. shladd L = L, ZBASE_SHIFT, r0
  3228. ;;
  3229. shladd AOFFSET = L, 1, AOFFSET
  3230. shladd BOFFSET = L, 1, BOFFSET
  3231. #endif
  3232. ;;
  3233. #ifdef LT
  3234. adds KK = 2, KK
  3235. #elif defined LN
  3236. adds KK = -2, KK
  3237. #else
  3238. nop __LINE__
  3239. #endif
  3240. ;;
  3241. #if defined(LT) || defined(RN)
  3242. mov L = KK
  3243. #else
  3244. sub L = K, KK
  3245. #endif
  3246. ;;
  3247. .align 16
  3248. .L070:
  3249. { .mib
  3250. #if defined(LT) || defined(RN)
  3251. mov L = KK
  3252. #else
  3253. sub L = K, KK
  3254. #endif
  3255. tbit.z p6, p7 = M, 0
  3256. (p6) br.cond.dptk .L089
  3257. }
  3258. ;;
  3259. { .mmi
  3260. cmp.ne p7, p0 = r0, L
  3261. adds BOFFSET = 0 * SIZE, B
  3262. shl r2 = K, ZBASE_SHIFT
  3263. }
  3264. { .mmi
  3265. shladd r3 = KK, ZBASE_SHIFT, r0
  3266. nop __LINE__
  3267. nop __LINE__
  3268. }
  3269. ;;
  3270. #if defined(LT) || defined(RN)
  3271. { .mfb
  3272. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  3273. }
  3274. ;;
  3275. #else
  3276. { .mfi
  3277. shladd BOFFSET = r3, 1, B
  3278. #ifdef LN
  3279. sub AORIG = AORIG, r2
  3280. #else
  3281. nop __LINE__
  3282. #endif
  3283. }
  3284. ;;
  3285. { .mfi
  3286. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  3287. add AOFFSET = r3, AORIG
  3288. }
  3289. ;;
  3290. #endif
  3291. ;;
  3292. adds L = 1, L
  3293. ;;
  3294. { .mii
  3295. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  3296. tbit.z p12, p0 = L, 0
  3297. shr L = L, 1
  3298. }
  3299. ;;
  3300. { .mmi
  3301. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  3302. adds PREA = (PREFETCHSIZE + 0) * SIZE, AOFFSET
  3303. adds L = -1, L
  3304. }
  3305. ;;
  3306. { .mmi
  3307. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  3308. cmp.eq p3, p0 = r0, r0
  3309. mov ar.lc = L
  3310. }
  3311. ;;
  3312. cmp.eq p6, p0 = -1, L
  3313. (p6) br.cond.dpnt .L078
  3314. ;;
  3315. .align 16
  3316. .L072:
  3317. { .mfb
  3318. lfetch.nt1 [PREA], 4 * SIZE
  3319. FMA f64 = f32, f48, f64 // A1 * B1
  3320. nop __LINE__
  3321. }
  3322. { .mfi
  3323. nop __LINE__
  3324. FMA f96 = f32, f49, f96 // A1 * B2
  3325. (p12) cmp.ne p3, p0 = 0, L
  3326. }
  3327. ;;
  3328. { .mfi
  3329. lfetch.nt1 [PREB], 8 * SIZE
  3330. FMA f80 = f32, f50, f80 // A1 * B3
  3331. cmp.ne p4, p5 = 0, L
  3332. }
  3333. { .mfb
  3334. nop __LINE__
  3335. FMA f112 = f32, f51, f112 // A1 * B4
  3336. nop __LINE__
  3337. }
  3338. ;;
  3339. { .mfi
  3340. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  3341. FMA f65 = f33, f48, f65 // A2 * B1
  3342. }
  3343. { .mfi
  3344. nop __LINE__
  3345. FMA f97 = f33, f49, f97 // A2 * B2
  3346. }
  3347. ;;
  3348. { .mfi
  3349. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  3350. FMA f81 = f33, f50, f81 // A2 * B3
  3351. }
  3352. { .mmf
  3353. nop __LINE__
  3354. nop __LINE__
  3355. FMA f113 = f33, f51, f113 // A2 * B4
  3356. }
  3357. ;;
  3358. { .mfb
  3359. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  3360. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  3361. nop __LINE__
  3362. }
  3363. { .mmf
  3364. nop __LINE__
  3365. nop __LINE__
  3366. (p3) FMA f96 = f40, f57, f96 // A1 * B2
  3367. }
  3368. ;;
  3369. { .mfb
  3370. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  3371. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  3372. nop __LINE__
  3373. }
  3374. { .mmf
  3375. nop __LINE__
  3376. nop __LINE__
  3377. (p3) FMA f112 = f40, f59, f112 // A1 * B4
  3378. }
  3379. ;;
  3380. { .mfb
  3381. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  3382. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  3383. nop __LINE__
  3384. }
  3385. { .mfb
  3386. nop __LINE__
  3387. (p3) FMA f97 = f41, f57, f97 // A2 * B2
  3388. nop __LINE__
  3389. }
  3390. ;;
  3391. { .mfi
  3392. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  3393. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  3394. adds L = -1, L
  3395. }
  3396. { .mfb
  3397. nop __LINE__
  3398. (p3) FMA f113 = f41, f59, f113 // A2 * B4
  3399. br.cloop.sptk.few .L072
  3400. }
  3401. ;;
  3402. { .mfb
  3403. nop __LINE__
  3404. FCALC_A f64 = f64, f97
  3405. nop __LINE__
  3406. }
  3407. { .mfb
  3408. nop __LINE__
  3409. FCALC_A f80 = f80, f113
  3410. nop __LINE__
  3411. }
  3412. { .mfb
  3413. nop __LINE__
  3414. FCALC_B f65 = f65, f96
  3415. nop __LINE__
  3416. }
  3417. { .mfb
  3418. nop __LINE__
  3419. FCALC_B f81 = f81, f112
  3420. nop __LINE__
  3421. }
  3422. ;;
  3423. .L078:
  3424. #if defined(LN) || defined(RT)
  3425. #ifdef LN
  3426. adds r2 = -1, KK
  3427. #else
  3428. adds r2 = -2, KK
  3429. #endif
  3430. ;;
  3431. shladd r2 = r2, ZBASE_SHIFT, r0
  3432. ;;
  3433. add AOFFSET = r2, AORIG
  3434. shladd BOFFSET = r2, 1, B
  3435. ;;
  3436. #endif
  3437. #if defined(LN) || defined(LT)
  3438. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  3439. ;;
  3440. LDFPD f74, f75 = [BOFFSET]
  3441. adds BOFFSET = -2 * SIZE, BOFFSET
  3442. ;;
  3443. FSUB f64 = f72, f64
  3444. FSUB_A f65 = f73, f65
  3445. FSUB f80 = f74, f80
  3446. FSUB_A f81 = f75, f81
  3447. ;;
  3448. #else
  3449. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  3450. ;;
  3451. LDFPD f88, f89 = [AOFFSET]
  3452. adds AOFFSET = -2 * SIZE, AOFFSET
  3453. ;;
  3454. FSUB f64 = f72, f64
  3455. FSUB f65 = f73, f65
  3456. FSUB f80 = f88, f80
  3457. FSUB f81 = f89, f81
  3458. ;;
  3459. #endif
  3460. #ifdef LN
  3461. LDFPD f120, f121 = [AOFFSET]
  3462. ;;
  3463. FMPY f32 = f120, f64
  3464. FMPY f33 = f121, f64
  3465. FMPY f34 = f120, f80
  3466. FMPY f35 = f121, f80
  3467. ;;
  3468. FMA_C f64 = f121, f65, f32
  3469. FMA_D f65 = f120, f65, f33
  3470. FMA_C f80 = f121, f81, f34
  3471. FMA_D f81 = f120, f81, f35
  3472. ;;
  3473. #endif
  3474. #ifdef LT
  3475. LDFPD f72, f73 = [AOFFSET]
  3476. ;;
  3477. FMPY f32 = f72, f64
  3478. FMPY f33 = f73, f64
  3479. FMPY f34 = f72, f80
  3480. FMPY f35 = f73, f80
  3481. ;;
  3482. FMA_C f64 = f73, f65, f32
  3483. FMA_D f65 = f72, f65, f33
  3484. FMA_C f80 = f73, f81, f34
  3485. FMA_D f81 = f72, f81, f35
  3486. ;;
  3487. #endif
  3488. #ifdef RN
  3489. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  3490. ;;
  3491. LDFPD f74, f75 = [BOFFSET]
  3492. adds BOFFSET = 4 * SIZE, BOFFSET
  3493. ;;
  3494. LDFPD f90, f91 = [BOFFSET]
  3495. adds BOFFSET = - 6 * SIZE, BOFFSET
  3496. ;;
  3497. FMPY f32 = f72, f64
  3498. FMPY f33 = f73, f64
  3499. ;;
  3500. FMA_C f64 = f73, f65, f32
  3501. FMA_D f65 = f72, f65, f33
  3502. ;;
  3503. FNMA f80 = f74, f64, f80
  3504. FMA_A f81 = f75, f64, f81
  3505. ;;
  3506. FMA_B f80 = f75, f65, f80
  3507. FNMA f81 = f74, f65, f81
  3508. ;;
  3509. FMPY f32 = f90, f80
  3510. FMPY f33 = f91, f80
  3511. ;;
  3512. FMA_C f80 = f91, f81, f32
  3513. FMA_D f81 = f90, f81, f33
  3514. ;;
  3515. #endif
  3516. #ifdef RT
  3517. adds BOFFSET = 6 * SIZE, BOFFSET
  3518. ;;
  3519. LDFPD f104, f105 = [BOFFSET]
  3520. adds BOFFSET = - 2 * SIZE, BOFFSET
  3521. ;;
  3522. LDFPD f106, f107 = [BOFFSET]
  3523. adds BOFFSET = - 4 * SIZE, BOFFSET
  3524. ;;
  3525. LDFPD f120, f121 = [BOFFSET]
  3526. ;;
  3527. FMPY f32 = f104, f80
  3528. FMPY f33 = f105, f80
  3529. ;;
  3530. FMA_C f80 = f105, f81, f32
  3531. FMA_D f81 = f104, f81, f33
  3532. ;;
  3533. FNMA f64 = f106, f80, f64
  3534. FMA_A f65 = f107, f80, f65
  3535. ;;
  3536. FMA_B f64 = f107, f81, f64
  3537. FNMA f65 = f106, f81, f65
  3538. ;;
  3539. FMPY f32 = f120, f64
  3540. FMPY f33 = f121, f64
  3541. ;;
  3542. FMA_C f64 = f121, f65, f32
  3543. FMA_D f65 = f120, f65, f33
  3544. ;;
  3545. #endif
  3546. #if defined(LN) || defined(LT)
  3547. STFD [BOFFSET] = f64, SIZE
  3548. ;;
  3549. STFD [BOFFSET] = f65, SIZE
  3550. ;;
  3551. STFD [BOFFSET] = f80, SIZE
  3552. ;;
  3553. STFD [BOFFSET] = f81, SIZE
  3554. ;;
  3555. adds BOFFSET = - 4 * SIZE, BOFFSET
  3556. ;;
  3557. #else
  3558. STFD [AOFFSET] = f64, SIZE
  3559. ;;
  3560. STFD [AOFFSET] = f65, SIZE
  3561. ;;
  3562. STFD [AOFFSET] = f80, SIZE
  3563. ;;
  3564. STFD [AOFFSET] = f81, SIZE
  3565. ;;
  3566. adds AOFFSET = - 4 * SIZE, AOFFSET
  3567. ;;
  3568. #endif
  3569. #ifdef LN
  3570. adds C1 = -2 * SIZE, C1
  3571. adds C2 = -2 * SIZE, C2
  3572. #endif
  3573. ;;
  3574. STFD [C1 ] = f64, SIZE
  3575. ;;
  3576. STFD [C1 ] = f65, SIZE
  3577. ;;
  3578. STFD [C2 ] = f80, SIZE
  3579. ;;
  3580. STFD [C2 ] = f81, SIZE
  3581. ;;
  3582. mov f64 = f0
  3583. mov f65 = f0
  3584. mov f80 = f0
  3585. mov f81 = f0
  3586. mov f96 = f0
  3587. mov f97 = f0
  3588. mov f112 = f0
  3589. mov f113 = f0
  3590. ;;
  3591. #ifdef LN
  3592. adds C1 = -2 * SIZE, C1
  3593. adds C2 = -2 * SIZE, C2
  3594. #endif
  3595. ;;
  3596. cmp.ne p6, p0 = 1, I
  3597. ;;
  3598. adds I = -1, I
  3599. ;;
  3600. shladd r2 = K, ZBASE_SHIFT, r0
  3601. ;;
  3602. sub L = K, KK
  3603. ;;
  3604. #ifdef RT
  3605. add AORIG = r2, AORIG
  3606. #endif
  3607. ;;
  3608. #if defined(LT) || defined(RN)
  3609. shladd L = L, ZBASE_SHIFT, r0
  3610. ;;
  3611. add AOFFSET = L, AOFFSET
  3612. shladd BOFFSET = L, 1, BOFFSET
  3613. #endif
  3614. ;;
  3615. #ifdef LT
  3616. adds KK = 1, KK
  3617. #elif defined LN
  3618. adds KK = -1, KK
  3619. #else
  3620. nop __LINE__
  3621. #endif
  3622. ;;
  3623. #if defined(LT) || defined(RN)
  3624. mov L = KK
  3625. #else
  3626. sub L = K, KK
  3627. #endif
  3628. ;;
  3629. .align 16
  3630. .L089:
  3631. #ifdef LN
  3632. shladd KK8 = K, ZBASE_SHIFT, r0
  3633. ;;
  3634. shladd B = KK8, 1, B
  3635. #endif
  3636. #if defined(LT) || defined(RN)
  3637. mov B = BOFFSET
  3638. #endif
  3639. #ifdef RN
  3640. adds KK = 2, KK
  3641. #endif
  3642. #ifdef RT
  3643. adds KK = -2, KK
  3644. #endif
  3645. ;;
  3646. { .mmi
  3647. mov AOFFSET = A
  3648. nop __LINE__
  3649. }
  3650. ;;
  3651. .align 16
  3652. .L010:
  3653. shr J = N, 2
  3654. ;;
  3655. cmp.ge p6, p0 = 0, J
  3656. (p6) br.cond.dpnt .L999
  3657. ;;
  3658. .L010x:
  3659. #ifdef RT
  3660. { .mmi
  3661. shladd r3 = LDC, 2, r0
  3662. nop __LINE__
  3663. shl r2 = K, 2 + ZBASE_SHIFT
  3664. }
  3665. ;;
  3666. { .mmi
  3667. sub B = B, r2
  3668. sub C = C, r3
  3669. nop __LINE__
  3670. }
  3671. ;;
  3672. #endif
  3673. { .mmi
  3674. mov C1 = C // coffset1 = c + 0 * ldc
  3675. add C2 = LDC, C // coffset2 = c + 1 * ldc
  3676. shr I = M, 2
  3677. }
  3678. { .mmi
  3679. adds J = -1, J
  3680. #ifdef LN
  3681. add KK = M, OFFSET
  3682. #elif defined LT
  3683. mov KK = OFFSET
  3684. #else
  3685. nop __LINE__
  3686. #endif
  3687. #if defined(LN) || defined(RT)
  3688. mov AORIG = A
  3689. #else
  3690. mov AOFFSET = A
  3691. #endif
  3692. }
  3693. ;;
  3694. ;;
  3695. { .mmi
  3696. shladd C3 = LDC, 1, C // coffset3 = c + 2 * ldc
  3697. shladd C4 = LDC, 1, C2 // coffset4 = c + 3 * ldc
  3698. #if defined(LT) || defined(RN)
  3699. mov L = KK
  3700. #else
  3701. sub L = K, KK
  3702. #endif
  3703. }
  3704. { .mib
  3705. cmp.eq p6, p7 = 0, I
  3706. #ifndef RT
  3707. shladd C = LDC, 2, C // coffset += 8 * ldc
  3708. #else
  3709. nop __LINE__
  3710. #endif
  3711. (p6) br.cond.dpnt .L020
  3712. }
  3713. ;;
  3714. .align 16
  3715. .L011:
  3716. { .mmi
  3717. cmp.ne p7, p0 = r0, L
  3718. adds BOFFSET = 0 * SIZE, B
  3719. shl r2 = K, 2 + ZBASE_SHIFT
  3720. }
  3721. { .mfi
  3722. shladd r3 = KK, ZBASE_SHIFT, r0
  3723. mov f118 = f0
  3724. nop __LINE__
  3725. }
  3726. ;;
  3727. #if defined(LT) || defined(RN)
  3728. { .mfb
  3729. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  3730. mov f66 = f0
  3731. nop __LINE__
  3732. }
  3733. { .mmf
  3734. nop __LINE__
  3735. nop __LINE__
  3736. mov f67 = f0
  3737. }
  3738. ;;
  3739. #else
  3740. { .mfi
  3741. shladd BOFFSET = r3, 2, B
  3742. mov f66 = f0
  3743. #ifdef LN
  3744. sub AORIG = AORIG, r2
  3745. #else
  3746. nop __LINE__
  3747. #endif
  3748. }
  3749. ;;
  3750. { .mfi
  3751. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  3752. mov f67 = f0
  3753. shladd AOFFSET = r3, 2, AORIG
  3754. }
  3755. ;;
  3756. #endif
  3757. ;;
  3758. { .mfi
  3759. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  3760. mov f82 = f0
  3761. nop __LINE__
  3762. }
  3763. { .mfi
  3764. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  3765. mov f83 = f0
  3766. adds PREC = CPREFETCHSIZE * SIZE, C1
  3767. }
  3768. ;;
  3769. { .mfi
  3770. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  3771. mov f98 = f0
  3772. adds L = 1, L
  3773. }
  3774. { .mfi
  3775. (p7) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  3776. mov f99 = f0
  3777. adds C5 = 4 * SIZE, C1
  3778. }
  3779. ;;
  3780. { .mfi
  3781. (p7) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  3782. mov f114 = f0
  3783. tbit.z p12, p0 = L, 0
  3784. }
  3785. { .mfi
  3786. (p7) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  3787. mov f115 = f0
  3788. adds C6 = 4 * SIZE, C2
  3789. }
  3790. ;;
  3791. { .mfi
  3792. (p7) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  3793. mov f68 = f0
  3794. shr L = L, 1
  3795. }
  3796. { .mfi
  3797. setf.d f86 = r0
  3798. mov f69 = f0
  3799. adds C7 = 4 * SIZE, C3
  3800. }
  3801. ;;
  3802. { .mfi
  3803. CPREFETCH [PREC], LDC
  3804. mov f84 = f0
  3805. adds L = -1, L
  3806. }
  3807. { .mfi
  3808. setf.d f87 = r0
  3809. mov f85 = f0
  3810. adds C8 = 4 * SIZE, C4
  3811. }
  3812. ;;
  3813. { .mfi
  3814. CPREFETCH [PREC], LDC
  3815. mov f100 = f0
  3816. mov ar.lc = L
  3817. }
  3818. { .mfi
  3819. setf.d f102 = r0
  3820. mov f101 = f0
  3821. cmp.eq p3, p0 = r0, r0
  3822. }
  3823. ;;
  3824. { .mfi
  3825. CPREFETCH [PREC], LDC
  3826. mov f116 = f0
  3827. adds PREA = (PREFETCHSIZE + 0) * SIZE, AOFFSET
  3828. }
  3829. { .mfi
  3830. setf.d f103 = r0
  3831. mov f117 = f0
  3832. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  3833. }
  3834. ;;
  3835. { .mfi
  3836. CPREFETCH [PREC]
  3837. mov f70 = f0
  3838. cmp.eq p6, p0 = -1, L
  3839. }
  3840. { .mfb
  3841. setf.d f119 = r0
  3842. mov f71 = f0
  3843. (p6) br.cond.dpnt .L018
  3844. }
  3845. ;;
  3846. .align 16
  3847. .L012:
  3848. /* 1 */
  3849. { .mfi
  3850. lfetch.nt1 [PREA], 16 * SIZE
  3851. FMA f64 = f32, f48, f64 // A1 * B1
  3852. nop __LINE__
  3853. }
  3854. { .mfb
  3855. (p12) cmp.ne p3, p0 = 0, L
  3856. FMA_B f65 = f32, f49, f65 // A1 * B2
  3857. nop __LINE__
  3858. }
  3859. ;;
  3860. /* 2 */
  3861. { .mfi
  3862. lfetch.nt1 [PREB], 16 * SIZE
  3863. FMA f80 = f32, f50, f80 // A1 * B3
  3864. nop __LINE__
  3865. }
  3866. { .mfb
  3867. cmp.ne p4, p5 = 0, L
  3868. FMA_B f81 = f32, f51, f81 // A1 * B4
  3869. nop __LINE__
  3870. }
  3871. ;;
  3872. /* 3 */
  3873. { .mfb
  3874. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  3875. FMA f96 = f32, f52, f96 // A1 * B5
  3876. nop __LINE__
  3877. }
  3878. { .mfb
  3879. FMA_B f97 = f32, f53, f97 // A1 * B6
  3880. nop __LINE__
  3881. }
  3882. ;;
  3883. /* 4 */
  3884. { .mfb
  3885. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  3886. FMA f112 = f32, f54, f112 // A1 * B7
  3887. nop __LINE__
  3888. }
  3889. { .mfb
  3890. FMA_B f113 = f32, f55, f113 // A1 * B8
  3891. nop __LINE__
  3892. }
  3893. ;;
  3894. /* 5 */
  3895. { .mfb
  3896. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  3897. FMA f65 = f33, f48, f65 // A2 * B1
  3898. nop __LINE__
  3899. }
  3900. { .mfb
  3901. FMA_A f64 = f33, f49, f64 // A2 * B2
  3902. nop __LINE__
  3903. }
  3904. ;;
  3905. /* 6 */
  3906. { .mfb
  3907. (p3) LDFPD f60, f61 = [BOFFSET], 2 * SIZE
  3908. FMA f81 = f33, f50, f81 // A2 * B3
  3909. nop __LINE__
  3910. }
  3911. { .mfb
  3912. FMA_A f80 = f33, f51, f80 // A2 * B4
  3913. nop __LINE__
  3914. }
  3915. ;;
  3916. /* 7 */
  3917. { .mfb
  3918. (p3) LDFPD f62, f63 = [BOFFSET], 2 * SIZE
  3919. FMA f97 = f33, f52, f97 // A2 * B5
  3920. nop __LINE__
  3921. }
  3922. { .mfb
  3923. FMA_A f96 = f33, f53, f96 // A2 * B6
  3924. nop __LINE__
  3925. }
  3926. ;;
  3927. /* 8 */
  3928. { .mfb
  3929. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  3930. FMA f113 = f33, f54, f113 // A2 * B7
  3931. nop __LINE__
  3932. }
  3933. { .mfb
  3934. FMA_A f112 = f33, f55, f112 // A2 * B8
  3935. nop __LINE__
  3936. }
  3937. ;;
  3938. /* 9 */
  3939. { .mfb
  3940. (p3) LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  3941. FMA f66 = f34, f48, f66 // A3 * B1
  3942. nop __LINE__
  3943. }
  3944. { .mfb
  3945. FMA_B f67 = f34, f49, f67 // A3 * B2
  3946. nop __LINE__
  3947. }
  3948. ;;
  3949. /* 10 */
  3950. { .mfb
  3951. (p3) LDFPD f46, f47 = [AOFFSET], 2 * SIZE
  3952. FMA f82 = f34, f50, f82 // A3 * B3
  3953. nop __LINE__
  3954. }
  3955. { .mfb
  3956. FMA_B f83 = f34, f51, f83 // A3 * B4
  3957. nop __LINE__
  3958. }
  3959. ;;
  3960. /* 11 */
  3961. { .mfb
  3962. FMA f98 = f34, f52, f98 // A3 * B5
  3963. nop __LINE__
  3964. }
  3965. { .mfb
  3966. nop __LINE__
  3967. FMA_B f99 = f34, f53, f99 // A3 * B6
  3968. nop __LINE__
  3969. }
  3970. ;;
  3971. /* 12 */
  3972. { .mfb
  3973. FMA f114 = f34, f54, f114 // A3 * B7
  3974. nop __LINE__
  3975. }
  3976. { .mfb
  3977. nop __LINE__
  3978. FMA_B f115 = f34, f55, f115 // A3 * B8
  3979. nop __LINE__
  3980. }
  3981. ;;
  3982. /* 13 */
  3983. { .mfb
  3984. nop __LINE__
  3985. FMA f67 = f35, f48, f67 // A4 * B1
  3986. }
  3987. { .mfb
  3988. nop __LINE__
  3989. FMA_A f66 = f35, f49, f66 // A4 * B2
  3990. nop __LINE__
  3991. }
  3992. ;;
  3993. /* 14 */
  3994. { .mfb
  3995. FMA f83 = f35, f50, f83 // A4 * B3
  3996. nop __LINE__
  3997. }
  3998. { .mfb
  3999. nop __LINE__
  4000. FMA_A f82 = f35, f51, f82 // A4 * B4
  4001. nop __LINE__
  4002. }
  4003. ;;
  4004. /* 15 */
  4005. { .mfb
  4006. FMA f99 = f35, f52, f99 // A4 * B5
  4007. nop __LINE__
  4008. }
  4009. { .mfb
  4010. nop __LINE__
  4011. FMA_A f98 = f35, f53, f98 // A4 * B6
  4012. nop __LINE__
  4013. }
  4014. ;;
  4015. /* 16 */
  4016. { .mfb
  4017. FMA f115 = f35, f54, f115 // A4 * B7
  4018. nop __LINE__
  4019. }
  4020. { .mfb
  4021. nop __LINE__
  4022. FMA_A f114 = f35, f55, f114 // A4 * B8
  4023. nop __LINE__
  4024. }
  4025. ;;
  4026. /* 17 */
  4027. { .mfb
  4028. nop __LINE__
  4029. FMA f68 = f36, f48, f68 // A5 * B1
  4030. nop __LINE__
  4031. }
  4032. { .mfb
  4033. nop __LINE__
  4034. FMA_B f69 = f36, f49, f69 // A5 * B2
  4035. nop __LINE__
  4036. }
  4037. ;;
  4038. /* 18 */
  4039. { .mfb
  4040. nop __LINE__
  4041. FMA f84 = f36, f50, f84 // A5 * B3
  4042. nop __LINE__
  4043. }
  4044. { .mfb
  4045. nop __LINE__
  4046. FMA_B f85 = f36, f51, f85 // A5 * B4
  4047. nop __LINE__
  4048. }
  4049. ;;
  4050. /* 19 */
  4051. { .mfb
  4052. nop __LINE__
  4053. FMA f100 = f36, f52, f100 // A5 * B5
  4054. nop __LINE__
  4055. }
  4056. { .mfb
  4057. nop __LINE__
  4058. FMA_B f101 = f36, f53, f101 // A5 * B6
  4059. nop __LINE__
  4060. }
  4061. ;;
  4062. /* 20 */
  4063. { .mfb
  4064. nop __LINE__
  4065. FMA f116 = f36, f54, f116 // A5 * B7
  4066. nop __LINE__
  4067. }
  4068. { .mfb
  4069. nop __LINE__
  4070. FMA_B f117 = f36, f55, f117 // A5 * B8
  4071. nop __LINE__
  4072. }
  4073. ;;
  4074. /* 21 */
  4075. { .mfb
  4076. nop __LINE__
  4077. FMA f69 = f37, f48, f69 // A6 * B1
  4078. nop __LINE__
  4079. }
  4080. { .mfb
  4081. nop __LINE__
  4082. FMA_A f68 = f37, f49, f68 // A6 * B2
  4083. nop __LINE__
  4084. }
  4085. ;;
  4086. /* 22 */
  4087. { .mfb
  4088. nop __LINE__
  4089. FMA f85 = f37, f50, f85 // A6 * B3
  4090. nop __LINE__
  4091. }
  4092. { .mfb
  4093. nop __LINE__
  4094. FMA_A f84 = f37, f51, f84 // A6 * B4
  4095. nop __LINE__
  4096. }
  4097. ;;
  4098. /* 23 */
  4099. { .mfb
  4100. nop __LINE__
  4101. FMA f101 = f37, f52, f101 // A6 * B5
  4102. nop __LINE__
  4103. }
  4104. { .mfb
  4105. nop __LINE__
  4106. FMA_A f100 = f37, f53, f100 // A6 * B6
  4107. nop __LINE__
  4108. }
  4109. ;;
  4110. /* 24 */
  4111. { .mfb
  4112. nop __LINE__
  4113. FMA f117 = f37, f54, f117 // A6 * B7
  4114. nop __LINE__
  4115. }
  4116. { .mfb
  4117. nop __LINE__
  4118. FMA_A f116 = f37, f55, f116 // A6 * B8
  4119. nop __LINE__
  4120. }
  4121. ;;
  4122. /* 25 */
  4123. { .mfb
  4124. nop __LINE__
  4125. FMA f70 = f38, f48, f70 // A7 * B1
  4126. nop __LINE__
  4127. }
  4128. { .mfb
  4129. nop __LINE__
  4130. FMA_B f71 = f38, f49, f71 // A7 * B2
  4131. nop __LINE__
  4132. }
  4133. ;;
  4134. /* 26 */
  4135. { .mfb
  4136. nop __LINE__
  4137. FMA f86 = f38, f50, f86 // A7 * B3
  4138. nop __LINE__
  4139. }
  4140. { .mfb
  4141. nop __LINE__
  4142. FMA_B f87 = f38, f51, f87 // A7 * B4
  4143. nop __LINE__
  4144. }
  4145. ;;
  4146. /* 27 */
  4147. { .mfb
  4148. nop __LINE__
  4149. FMA f102 = f38, f52, f102 // A7 * B5
  4150. nop __LINE__
  4151. }
  4152. { .mfb
  4153. nop __LINE__
  4154. FMA_B f103 = f38, f53, f103 // A7 * B6
  4155. nop __LINE__
  4156. }
  4157. ;;
  4158. /* 28 */
  4159. { .mfb
  4160. nop __LINE__
  4161. FMA f118 = f38, f54, f118 // A7 * B7
  4162. nop __LINE__
  4163. }
  4164. { .mfb
  4165. nop __LINE__
  4166. FMA_B f119 = f38, f55, f119 // A7 * B8
  4167. nop __LINE__
  4168. }
  4169. ;;
  4170. /* 29 */
  4171. { .mfb
  4172. nop __LINE__
  4173. FMA f71 = f39, f48, f71 // A8 * B1
  4174. nop __LINE__
  4175. }
  4176. { .mfb
  4177. nop __LINE__
  4178. FMA_A f70 = f39, f49, f70 // A8 * B2
  4179. nop __LINE__
  4180. }
  4181. ;;
  4182. /* 30 */
  4183. { .mfb
  4184. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  4185. FMA f87 = f39, f50, f87 // A8 * B3
  4186. nop __LINE__
  4187. }
  4188. { .mfb
  4189. nop __LINE__
  4190. FMA_A f86 = f39, f51, f86 // A8 * B4
  4191. nop __LINE__
  4192. }
  4193. ;;
  4194. /* 31 */
  4195. { .mfb
  4196. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  4197. FMA f103 = f39, f52, f103 // A8 * B5
  4198. nop __LINE__
  4199. }
  4200. { .mfb
  4201. nop __LINE__
  4202. FMA_A f102 = f39, f53, f102 // A8 * B6
  4203. nop __LINE__
  4204. }
  4205. ;;
  4206. /* 32 */
  4207. { .mfb
  4208. nop __LINE__
  4209. FMA f119 = f39, f54, f119 // A8 * B7
  4210. nop __LINE__
  4211. }
  4212. { .mfb
  4213. nop __LINE__
  4214. FMA_A f118 = f39, f55, f118 // A8 * B8
  4215. nop __LINE__
  4216. }
  4217. ;;
  4218. /* 33 */
  4219. { .mfb
  4220. nop __LINE__
  4221. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  4222. nop __LINE__
  4223. }
  4224. { .mfb
  4225. nop __LINE__
  4226. (p3) FMA_B f65 = f40, f57, f65 // A1 * B2
  4227. nop __LINE__
  4228. }
  4229. ;;
  4230. /* 34 */
  4231. { .mfb
  4232. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  4233. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  4234. nop __LINE__
  4235. }
  4236. { .mfb
  4237. nop __LINE__
  4238. (p3) FMA_B f81 = f40, f59, f81 // A1 * B4
  4239. nop __LINE__
  4240. }
  4241. ;;
  4242. /* 35 */
  4243. { .mfb
  4244. (p4) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  4245. (p3) FMA f96 = f40, f60, f96 // A1 * B5
  4246. nop __LINE__
  4247. }
  4248. { .mfb
  4249. nop __LINE__
  4250. (p3) FMA_B f97 = f40, f61, f97 // A1 * B6
  4251. nop __LINE__
  4252. }
  4253. ;;
  4254. /* 36 */
  4255. { .mfb
  4256. (p4) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  4257. (p3) FMA f112 = f40, f62, f112 // A1 * B7
  4258. nop __LINE__
  4259. }
  4260. { .mfb
  4261. nop __LINE__
  4262. (p3) FMA_B f113 = f40, f63, f113 // A1 * B8
  4263. nop __LINE__
  4264. }
  4265. ;;
  4266. /* 37 */
  4267. { .mfb
  4268. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  4269. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  4270. nop __LINE__
  4271. }
  4272. { .mfb
  4273. nop __LINE__
  4274. (p3) FMA_A f64 = f41, f57, f64 // A2 * B2
  4275. nop __LINE__
  4276. }
  4277. ;;
  4278. /* 38 */
  4279. { .mfb
  4280. (p4) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  4281. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  4282. nop __LINE__
  4283. }
  4284. { .mfb
  4285. nop __LINE__
  4286. (p3) FMA_A f80 = f41, f59, f80 // A2 * B4
  4287. nop __LINE__
  4288. }
  4289. ;;
  4290. /* 39 */
  4291. { .mfb
  4292. (p4) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  4293. (p3) FMA f97 = f41, f60, f97 // A2 * B5
  4294. nop __LINE__
  4295. }
  4296. { .mfb
  4297. nop __LINE__
  4298. (p3) FMA_A f96 = f41, f61, f96 // A2 * B6
  4299. nop __LINE__
  4300. }
  4301. ;;
  4302. /* 40 */
  4303. { .mfb
  4304. nop __LINE__
  4305. (p3) FMA f113 = f41, f62, f113 // A2 * B7
  4306. nop __LINE__
  4307. }
  4308. { .mfb
  4309. nop __LINE__
  4310. (p3) FMA_A f112 = f41, f63, f112 // A2 * B8
  4311. nop __LINE__
  4312. }
  4313. ;;
  4314. /* 41 */
  4315. { .mfb
  4316. nop __LINE__
  4317. (p3) FMA f66 = f42, f56, f66 // A3 * B1
  4318. nop __LINE__
  4319. }
  4320. { .mfb
  4321. nop __LINE__
  4322. (p3) FMA_B f67 = f42, f57, f67 // A3 * B2
  4323. nop __LINE__
  4324. }
  4325. ;;
  4326. /* 42 */
  4327. { .mfb
  4328. nop __LINE__
  4329. (p3) FMA f82 = f42, f58, f82 // A3 * B3
  4330. nop __LINE__
  4331. }
  4332. { .mfb
  4333. nop __LINE__
  4334. (p3) FMA_B f83 = f42, f59, f83 // A3 * B4
  4335. nop __LINE__
  4336. }
  4337. ;;
  4338. /* 43 */
  4339. { .mfb
  4340. nop __LINE__
  4341. (p3) FMA f98 = f42, f60, f98 // A3 * B5
  4342. nop __LINE__
  4343. }
  4344. { .mfb
  4345. nop __LINE__
  4346. (p3) FMA_B f99 = f42, f61, f99 // A3 * B6
  4347. nop __LINE__
  4348. }
  4349. ;;
  4350. /* 44 */
  4351. { .mfb
  4352. nop __LINE__
  4353. (p3) FMA f114 = f42, f62, f114 // A3 * B7
  4354. nop __LINE__
  4355. }
  4356. { .mfb
  4357. nop __LINE__
  4358. (p3) FMA_B f115 = f42, f63, f115 // A3 * B8
  4359. nop __LINE__
  4360. }
  4361. ;;
  4362. /* 45 */
  4363. { .mfb
  4364. nop __LINE__
  4365. (p3) FMA f67 = f43, f56, f67 // A4 * B1
  4366. nop __LINE__
  4367. }
  4368. { .mfb
  4369. nop __LINE__
  4370. (p3) FMA_A f66 = f43, f57, f66 // A4 * B2
  4371. nop __LINE__
  4372. }
  4373. ;;
  4374. /* 46 */
  4375. { .mfb
  4376. nop __LINE__
  4377. (p3) FMA f83 = f43, f58, f83 // A4 * B3
  4378. nop __LINE__
  4379. }
  4380. { .mfb
  4381. nop __LINE__
  4382. (p3) FMA_A f82 = f43, f59, f82 // A4 * B4
  4383. nop __LINE__
  4384. }
  4385. ;;
  4386. /* 47 */
  4387. { .mfb
  4388. nop __LINE__
  4389. (p3) FMA f99 = f43, f60, f99 // A4 * B5
  4390. nop __LINE__
  4391. }
  4392. { .mfb
  4393. nop __LINE__
  4394. (p3) FMA_A f98 = f43, f61, f98 // A4 * B6
  4395. nop __LINE__
  4396. }
  4397. ;;
  4398. /* 48 */
  4399. { .mfb
  4400. nop __LINE__
  4401. (p3) FMA f115 = f43, f62, f115 // A4 * B7
  4402. nop __LINE__
  4403. }
  4404. { .mfb
  4405. nop __LINE__
  4406. (p3) FMA_A f114 = f43, f63, f114 // A4 * B8
  4407. nop __LINE__
  4408. }
  4409. ;;
  4410. /* 49 */
  4411. { .mfb
  4412. nop __LINE__
  4413. (p3) FMA f68 = f44, f56, f68 // A5 * B1
  4414. nop __LINE__
  4415. }
  4416. { .mfb
  4417. nop __LINE__
  4418. (p3) FMA_B f69 = f44, f57, f69 // A5 * B2
  4419. nop __LINE__
  4420. }
  4421. ;;
  4422. /* 50 */
  4423. { .mfb
  4424. nop __LINE__
  4425. (p3) FMA f84 = f44, f58, f84 // A5 * B3
  4426. nop __LINE__
  4427. }
  4428. { .mfb
  4429. nop __LINE__
  4430. (p3) FMA_B f85 = f44, f59, f85 // A5 * B4
  4431. nop __LINE__
  4432. }
  4433. ;;
  4434. /* 51 */
  4435. { .mfb
  4436. nop __LINE__
  4437. (p3) FMA f100 = f44, f60, f100 // A5 * B5
  4438. nop __LINE__
  4439. }
  4440. { .mfb
  4441. nop __LINE__
  4442. (p3) FMA_B f101 = f44, f61, f101 // A5 * B6
  4443. nop __LINE__
  4444. }
  4445. ;;
  4446. /* 52 */
  4447. { .mfb
  4448. nop __LINE__
  4449. (p3) FMA f116 = f44, f62, f116 // A5 * B7
  4450. nop __LINE__
  4451. }
  4452. { .mfb
  4453. nop __LINE__
  4454. (p3) FMA_B f117 = f44, f63, f117 // A5 * B8
  4455. nop __LINE__
  4456. }
  4457. ;;
  4458. /* 53 */
  4459. { .mfb
  4460. nop __LINE__
  4461. (p3) FMA f69 = f45, f56, f69 // A6 * B1
  4462. nop __LINE__
  4463. }
  4464. { .mfb
  4465. nop __LINE__
  4466. (p3) FMA_A f68 = f45, f57, f68 // A6 * B2
  4467. nop __LINE__
  4468. }
  4469. ;;
  4470. /* 54 */
  4471. { .mfb
  4472. nop __LINE__
  4473. (p3) FMA f85 = f45, f58, f85 // A6 * B3
  4474. nop __LINE__
  4475. }
  4476. { .mfb
  4477. nop __LINE__
  4478. (p3) FMA_A f84 = f45, f59, f84 // A6 * B4
  4479. nop __LINE__
  4480. }
  4481. ;;
  4482. /* 55 */
  4483. { .mfb
  4484. nop __LINE__
  4485. (p3) FMA f101 = f45, f60, f101 // A6 * B5
  4486. nop __LINE__
  4487. }
  4488. { .mfb
  4489. nop __LINE__
  4490. (p3) FMA_A f100 = f45, f61, f100 // A6 * B6
  4491. nop __LINE__
  4492. }
  4493. ;;
  4494. /* 56 */
  4495. { .mfb
  4496. nop __LINE__
  4497. (p3) FMA f117 = f45, f62, f117 // A6 * B7
  4498. nop __LINE__
  4499. }
  4500. { .mfb
  4501. nop __LINE__
  4502. (p3) FMA_A f116 = f45, f63, f116 // A6 * B8
  4503. nop __LINE__
  4504. }
  4505. ;;
  4506. /* 57 */
  4507. { .mfb
  4508. nop __LINE__
  4509. (p3) FMA f70 = f46, f56, f70 // A7 * B1
  4510. nop __LINE__
  4511. }
  4512. { .mfb
  4513. nop __LINE__
  4514. (p3) FMA_B f71 = f46, f57, f71 // A7 * B2
  4515. nop __LINE__
  4516. }
  4517. ;;
  4518. /* 58 */
  4519. { .mfb
  4520. nop __LINE__
  4521. (p3) FMA f86 = f46, f58, f86 // A7 * B3
  4522. nop __LINE__
  4523. }
  4524. { .mfb
  4525. nop __LINE__
  4526. (p3) FMA_B f87 = f46, f59, f87 // A7 * B4
  4527. nop __LINE__
  4528. }
  4529. ;;
  4530. /* 59 */
  4531. { .mfb
  4532. nop __LINE__
  4533. (p3) FMA f102 = f46, f60, f102 // A7 * B5
  4534. nop __LINE__
  4535. }
  4536. { .mfb
  4537. nop __LINE__
  4538. (p3) FMA_B f103 = f46, f61, f103 // A7 * B6
  4539. nop __LINE__
  4540. }
  4541. ;;
  4542. /* 60 */
  4543. { .mfb
  4544. nop __LINE__
  4545. (p3) FMA f118 = f46, f62, f118 // A7 * B7
  4546. nop __LINE__
  4547. }
  4548. { .mfb
  4549. nop __LINE__
  4550. (p3) FMA_B f119 = f46, f63, f119 // A7 * B8
  4551. nop __LINE__
  4552. }
  4553. ;;
  4554. /* 61 */
  4555. { .mfb
  4556. nop __LINE__
  4557. (p3) FMA f71 = f47, f56, f71 // A8 * B1
  4558. nop __LINE__
  4559. }
  4560. { .mfb
  4561. nop __LINE__
  4562. (p3) FMA_A f70 = f47, f57, f70 // A8 * B2
  4563. nop __LINE__
  4564. }
  4565. ;;
  4566. /* 62 */
  4567. { .mfb
  4568. nop __LINE__
  4569. (p3) FMA f87 = f47, f58, f87 // A8 * B3
  4570. nop __LINE__
  4571. }
  4572. { .mfb
  4573. nop __LINE__
  4574. (p3) FMA_A f86 = f47, f59, f86 // A8 * B4
  4575. nop __LINE__
  4576. }
  4577. ;;
  4578. /* 63 */
  4579. { .mfb
  4580. nop __LINE__
  4581. (p3) FMA f103 = f47, f60, f103 // A8 * B5
  4582. nop __LINE__
  4583. }
  4584. { .mfb
  4585. nop __LINE__
  4586. (p3) FMA_A f102 = f47, f61, f102 // A8 * B6
  4587. nop __LINE__
  4588. }
  4589. ;;
  4590. /* 64 */
  4591. { .mfi
  4592. nop __LINE__
  4593. (p3) FMA f119 = f47, f62, f119 // A8 * B7
  4594. adds L = -1, L
  4595. }
  4596. { .mfb
  4597. nop __LINE__
  4598. (p3) FMA_A f118 = f47, f63, f118 // A8 * B8
  4599. br.cloop.sptk.few .L012
  4600. }
  4601. ;;
  4602. .L018:
  4603. #if defined(LN) || defined(RT)
  4604. #ifdef LN
  4605. adds r2 = -4, KK
  4606. #else
  4607. adds r2 = -4, KK
  4608. #endif
  4609. ;;
  4610. shladd r2 = r2, ZBASE_SHIFT, r0
  4611. ;;
  4612. shladd AOFFSET = r2, 2, AORIG
  4613. shladd BOFFSET = r2, 2, B
  4614. ;;
  4615. #endif
  4616. #if defined(LN) || defined(LT)
  4617. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  4618. ;;
  4619. LDFPD f74, f75 = [BOFFSET], 2 * SIZE
  4620. ;;
  4621. LDFPD f76, f77 = [BOFFSET], 2 * SIZE
  4622. ;;
  4623. LDFPD f78, f79 = [BOFFSET], 2 * SIZE
  4624. ;;
  4625. LDFPD f88, f89 = [BOFFSET], 2 * SIZE
  4626. ;;
  4627. LDFPD f90, f91 = [BOFFSET], 2 * SIZE
  4628. ;;
  4629. LDFPD f92, f93 = [BOFFSET], 2 * SIZE
  4630. ;;
  4631. { .mfi
  4632. LDFPD f94, f95 = [BOFFSET], 2 * SIZE
  4633. FSUB f64 = f72, f64
  4634. nop __LINE__
  4635. }
  4636. { .mfi
  4637. nop __LINE__
  4638. FSUB_A f65 = f73, f65
  4639. nop __LINE__
  4640. }
  4641. ;;
  4642. { .mfi
  4643. LDFPD f104, f105 = [BOFFSET], 2 * SIZE
  4644. FSUB f80 = f74, f80
  4645. nop __LINE__
  4646. }
  4647. { .mfi
  4648. nop __LINE__
  4649. FSUB_A f81 = f75, f81
  4650. nop __LINE__
  4651. }
  4652. ;;
  4653. { .mfi
  4654. LDFPD f106, f107 = [BOFFSET], 2 * SIZE
  4655. FSUB f96 = f76, f96
  4656. nop __LINE__
  4657. }
  4658. { .mfi
  4659. nop __LINE__
  4660. FSUB_A f97 = f77, f97
  4661. nop __LINE__
  4662. }
  4663. ;;
  4664. { .mfi
  4665. LDFPD f108, f109 = [BOFFSET], 2 * SIZE
  4666. FSUB f112 = f78, f112
  4667. nop __LINE__
  4668. }
  4669. { .mfi
  4670. nop __LINE__
  4671. FSUB_A f113 = f79, f113
  4672. nop __LINE__
  4673. }
  4674. ;;
  4675. { .mfi
  4676. LDFPD f110, f111 = [BOFFSET], 2 * SIZE
  4677. FSUB f66 = f88, f66
  4678. nop __LINE__
  4679. }
  4680. { .mfi
  4681. nop __LINE__
  4682. FSUB_A f67 = f89, f67
  4683. nop __LINE__
  4684. }
  4685. ;;
  4686. { .mfi
  4687. LDFPD f120, f121 = [BOFFSET], 2 * SIZE
  4688. FSUB f82 = f90, f82
  4689. nop __LINE__
  4690. }
  4691. { .mfi
  4692. nop __LINE__
  4693. FSUB_A f83 = f91, f83
  4694. nop __LINE__
  4695. }
  4696. ;;
  4697. { .mfi
  4698. LDFPD f122, f123 = [BOFFSET], 2 * SIZE
  4699. FSUB f98 = f92, f98
  4700. nop __LINE__
  4701. }
  4702. { .mfi
  4703. nop __LINE__
  4704. FSUB_A f99 = f93, f99
  4705. nop __LINE__
  4706. }
  4707. ;;
  4708. { .mfi
  4709. LDFPD f124, f125 = [BOFFSET], 2 * SIZE
  4710. FSUB f114 = f94, f114
  4711. nop __LINE__
  4712. }
  4713. { .mfi
  4714. nop __LINE__
  4715. FSUB_A f115 = f95, f115
  4716. nop __LINE__
  4717. }
  4718. ;;
  4719. { .mfi
  4720. LDFPD f126, f127 = [BOFFSET]
  4721. FSUB f68 = f104, f68
  4722. adds BOFFSET = -30 * SIZE, BOFFSET
  4723. }
  4724. { .mfi
  4725. nop __LINE__
  4726. FSUB_A f69 = f105, f69
  4727. #ifdef LN
  4728. adds AOFFSET = 30 * SIZE, AOFFSET
  4729. #else
  4730. nop __LINE__
  4731. #endif
  4732. }
  4733. ;;
  4734. { .mfi
  4735. LDFPD f72, f73 = [AOFFSET]
  4736. FSUB f84 = f106, f84
  4737. #ifdef LN
  4738. adds AOFFSET = - 2 * SIZE, AOFFSET
  4739. #else
  4740. adds AOFFSET = 2 * SIZE, AOFFSET
  4741. #endif
  4742. }
  4743. { .mfi
  4744. nop __LINE__
  4745. FSUB_A f85 = f107, f85
  4746. nop __LINE__
  4747. }
  4748. ;;
  4749. { .mfi
  4750. LDFPD f74, f75 = [AOFFSET]
  4751. FSUB f100 = f108, f100
  4752. #ifdef LN
  4753. adds AOFFSET = - 2 * SIZE, AOFFSET
  4754. #else
  4755. adds AOFFSET = 2 * SIZE, AOFFSET
  4756. #endif
  4757. }
  4758. { .mfi
  4759. nop __LINE__
  4760. FSUB_A f101 = f109, f101
  4761. nop __LINE__
  4762. }
  4763. ;;
  4764. { .mfi
  4765. nop __LINE__
  4766. FSUB f116 = f110, f116
  4767. nop __LINE__
  4768. }
  4769. { .mfi
  4770. nop __LINE__
  4771. FSUB_A f117 = f111, f117
  4772. nop __LINE__
  4773. }
  4774. ;;
  4775. { .mfi
  4776. nop __LINE__
  4777. FSUB f70 = f120, f70
  4778. nop __LINE__
  4779. }
  4780. { .mfi
  4781. nop __LINE__
  4782. FSUB_A f71 = f121, f71
  4783. nop __LINE__
  4784. }
  4785. ;;
  4786. { .mfi
  4787. nop __LINE__
  4788. FSUB f86 = f122, f86
  4789. nop __LINE__
  4790. }
  4791. { .mfi
  4792. nop __LINE__
  4793. FSUB_A f87 = f123, f87
  4794. nop __LINE__
  4795. }
  4796. ;;
  4797. { .mfi
  4798. nop __LINE__
  4799. FSUB f102 = f124, f102
  4800. nop __LINE__
  4801. }
  4802. { .mfi
  4803. nop __LINE__
  4804. FSUB_A f103 = f125, f103
  4805. nop __LINE__
  4806. }
  4807. ;;
  4808. { .mfi
  4809. nop __LINE__
  4810. FSUB f118 = f126, f118
  4811. nop __LINE__
  4812. }
  4813. { .mfi
  4814. nop __LINE__
  4815. FSUB_A f119 = f127, f119
  4816. nop __LINE__
  4817. }
  4818. ;;
  4819. #else
  4820. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  4821. ;;
  4822. LDFPD f74, f75 = [AOFFSET], 2 * SIZE
  4823. ;;
  4824. LDFPD f76, f77 = [AOFFSET], 2 * SIZE
  4825. ;;
  4826. LDFPD f78, f79 = [AOFFSET], 2 * SIZE
  4827. ;;
  4828. LDFPD f88, f89 = [AOFFSET], 2 * SIZE
  4829. ;;
  4830. LDFPD f90, f91 = [AOFFSET], 2 * SIZE
  4831. ;;
  4832. { .mfi
  4833. LDFPD f92, f93 = [AOFFSET], 2 * SIZE
  4834. FSUB f64 = f72, f64
  4835. nop __LINE__
  4836. }
  4837. { .mfi
  4838. nop __LINE__
  4839. FSUB f65 = f73, f65
  4840. nop __LINE__
  4841. }
  4842. ;;
  4843. { .mfi
  4844. LDFPD f94, f95 = [AOFFSET], 2 * SIZE
  4845. FSUB f66 = f74, f66
  4846. nop __LINE__
  4847. }
  4848. { .mfi
  4849. nop __LINE__
  4850. FSUB f67 = f75, f67
  4851. nop __LINE__
  4852. }
  4853. ;;
  4854. { .mfi
  4855. LDFPD f104, f105 = [AOFFSET], 2 * SIZE
  4856. FSUB f68 = f76, f68
  4857. nop __LINE__
  4858. }
  4859. { .mfi
  4860. nop __LINE__
  4861. FSUB f69 = f77, f69
  4862. nop __LINE__
  4863. }
  4864. ;;
  4865. { .mfi
  4866. LDFPD f106, f107 = [AOFFSET], 2 * SIZE
  4867. FSUB f70 = f78, f70
  4868. nop __LINE__
  4869. }
  4870. { .mfi
  4871. nop __LINE__
  4872. FSUB f71 = f79, f71
  4873. nop __LINE__
  4874. }
  4875. ;;
  4876. { .mfi
  4877. LDFPD f108, f109 = [AOFFSET], 2 * SIZE
  4878. FSUB f80 = f88, f80
  4879. nop __LINE__
  4880. }
  4881. { .mfi
  4882. nop __LINE__
  4883. FSUB f81 = f89, f81
  4884. nop __LINE__
  4885. }
  4886. ;;
  4887. { .mfi
  4888. LDFPD f110, f111 = [AOFFSET], 2 * SIZE
  4889. FSUB f82 = f90, f82
  4890. nop __LINE__
  4891. }
  4892. { .mfi
  4893. nop __LINE__
  4894. FSUB f83 = f91, f83
  4895. nop __LINE__
  4896. }
  4897. ;;
  4898. { .mfi
  4899. LDFPD f120, f121 = [AOFFSET], 2 * SIZE
  4900. FSUB f84 = f92, f84
  4901. nop __LINE__
  4902. }
  4903. { .mfi
  4904. nop __LINE__
  4905. FSUB f85 = f93, f85
  4906. nop __LINE__
  4907. }
  4908. ;;
  4909. { .mfi
  4910. LDFPD f122, f123 = [AOFFSET], 2 * SIZE
  4911. FSUB f86 = f94, f86
  4912. nop __LINE__
  4913. }
  4914. { .mfi
  4915. nop __LINE__
  4916. FSUB f87 = f95, f87
  4917. nop __LINE__
  4918. }
  4919. ;;
  4920. { .mfi
  4921. LDFPD f124, f125 = [AOFFSET], 2 * SIZE
  4922. FSUB f96 = f104, f96
  4923. nop __LINE__
  4924. }
  4925. { .mfi
  4926. nop __LINE__
  4927. FSUB f97 = f105, f97
  4928. nop __LINE__
  4929. }
  4930. ;;
  4931. { .mfi
  4932. LDFPD f126, f127 = [AOFFSET]
  4933. FSUB f98 = f106, f98
  4934. adds AOFFSET = -30 * SIZE, AOFFSET
  4935. }
  4936. { .mfi
  4937. nop __LINE__
  4938. FSUB f99 = f107, f99
  4939. #ifdef RT
  4940. adds BOFFSET = 30 * SIZE, BOFFSET
  4941. #else
  4942. nop __LINE__
  4943. #endif
  4944. }
  4945. ;;
  4946. { .mfi
  4947. LDFPD f72, f73 = [BOFFSET]
  4948. FSUB f100 = f108, f100
  4949. #ifdef RN
  4950. adds BOFFSET = 2 * SIZE, BOFFSET
  4951. #else
  4952. adds BOFFSET = - 2 * SIZE, BOFFSET
  4953. #endif
  4954. }
  4955. { .mfi
  4956. nop __LINE__
  4957. FSUB f101 = f109, f101
  4958. nop __LINE__
  4959. }
  4960. ;;
  4961. { .mfi
  4962. LDFPD f74, f75 = [BOFFSET]
  4963. FSUB f102 = f110, f102
  4964. #ifdef RN
  4965. adds BOFFSET = 2 * SIZE, BOFFSET
  4966. #else
  4967. adds BOFFSET = - 2 * SIZE, BOFFSET
  4968. #endif
  4969. }
  4970. { .mfi
  4971. nop __LINE__
  4972. FSUB f103 = f111, f103
  4973. nop __LINE__
  4974. }
  4975. ;;
  4976. { .mfi
  4977. nop __LINE__
  4978. FSUB f112 = f120, f112
  4979. nop __LINE__
  4980. }
  4981. { .mfi
  4982. nop __LINE__
  4983. FSUB f113 = f121, f113
  4984. nop __LINE__
  4985. }
  4986. ;;
  4987. { .mfi
  4988. nop __LINE__
  4989. FSUB f114 = f122, f114
  4990. nop __LINE__
  4991. }
  4992. { .mfi
  4993. nop __LINE__
  4994. FSUB f115 = f123, f115
  4995. nop __LINE__
  4996. }
  4997. ;;
  4998. { .mfi
  4999. nop __LINE__
  5000. FSUB f116 = f124, f116
  5001. nop __LINE__
  5002. }
  5003. { .mfi
  5004. nop __LINE__
  5005. FSUB f117 = f125, f117
  5006. nop __LINE__
  5007. }
  5008. ;;
  5009. { .mfi
  5010. nop __LINE__
  5011. FSUB f118 = f126, f118
  5012. nop __LINE__
  5013. }
  5014. { .mfi
  5015. nop __LINE__
  5016. FSUB f119 = f127, f119
  5017. nop __LINE__
  5018. }
  5019. ;;
  5020. #endif
  5021. #ifdef LN
  5022. { .mfi
  5023. LDFPD f76, f77 = [AOFFSET]
  5024. FMPY f32 = f72, f70
  5025. adds AOFFSET = - 2 * SIZE, AOFFSET
  5026. }
  5027. { .mfi
  5028. nop __LINE__
  5029. FMPY f36 = f72, f102
  5030. nop __LINE__
  5031. }
  5032. ;;
  5033. { .mfi
  5034. LDFPD f78, f79 = [AOFFSET]
  5035. FMPY f33 = f73, f70
  5036. adds AOFFSET = - 4 * SIZE, AOFFSET
  5037. }
  5038. { .mfi
  5039. nop __LINE__
  5040. FMPY f37 = f73, f102
  5041. nop __LINE__
  5042. }
  5043. ;;
  5044. { .mfi
  5045. LDFPD f88, f89 = [AOFFSET]
  5046. FMPY f34 = f72, f86
  5047. adds AOFFSET = - 2 * SIZE, AOFFSET
  5048. }
  5049. { .mfi
  5050. nop __LINE__
  5051. FMPY f38 = f72, f118
  5052. nop __LINE__
  5053. }
  5054. ;;
  5055. { .mfi
  5056. LDFPD f90, f91 = [AOFFSET]
  5057. FMPY f35 = f73, f86
  5058. adds AOFFSET = - 2 * SIZE, AOFFSET
  5059. }
  5060. { .mfi
  5061. nop __LINE__
  5062. FMPY f39 = f73, f118
  5063. nop __LINE__
  5064. }
  5065. ;;
  5066. { .mfi
  5067. LDFPD f92, f93 = [AOFFSET]
  5068. FMA_C f70 = f73, f71, f32
  5069. adds AOFFSET = - 6 * SIZE, AOFFSET
  5070. }
  5071. { .mfi
  5072. nop __LINE__
  5073. FMA_C f102 = f73, f103, f36
  5074. adds C1 = -2 * SIZE, C1
  5075. }
  5076. ;;
  5077. { .mfi
  5078. LDFPD f104, f105 = [AOFFSET]
  5079. FMA_D f71 = f72, f71, f33
  5080. adds AOFFSET = - 2 * SIZE, AOFFSET
  5081. }
  5082. { .mfi
  5083. nop __LINE__
  5084. FMA_D f103 = f72, f103, f37
  5085. adds C2 = -2 * SIZE, C2
  5086. }
  5087. ;;
  5088. { .mfi
  5089. LDFPD f106, f107 = [AOFFSET]
  5090. FMA_C f86 = f73, f87, f34
  5091. adds AOFFSET = - 8 * SIZE, AOFFSET
  5092. }
  5093. { .mfi
  5094. nop __LINE__
  5095. FMA_C f118 = f73, f119, f38
  5096. adds C3 = -2 * SIZE, C3
  5097. }
  5098. ;;
  5099. { .mfi
  5100. LDFPD f120, f121 = [AOFFSET]
  5101. FMA_D f87 = f72, f87, f35
  5102. adds BOFFSET2 = 28 * SIZE, BOFFSET
  5103. }
  5104. { .mfi
  5105. nop __LINE__
  5106. FMA_D f119 = f72, f119, f39
  5107. adds BOFFSET = 24 * SIZE, BOFFSET
  5108. }
  5109. ;;
  5110. { .mfi
  5111. STFD [BOFFSET] = f70, SIZE
  5112. FNMA f68 = f74, f70, f68
  5113. adds C4 = -2 * SIZE, C4
  5114. }
  5115. { .mfi
  5116. STFD [BOFFSET2] = f102, SIZE
  5117. FNMA f100 = f74, f102, f100
  5118. nop __LINE__
  5119. }
  5120. ;;
  5121. { .mfi
  5122. STFD [BOFFSET] = f71, SIZE
  5123. FMA_A f69 = f75, f70, f69
  5124. nop __LINE__
  5125. }
  5126. { .mfi
  5127. STFD [BOFFSET2] = f103, SIZE
  5128. FMA_A f101 = f75, f102, f101
  5129. nop __LINE__
  5130. }
  5131. ;;
  5132. { .mfi
  5133. STFD [BOFFSET] = f86, SIZE
  5134. FNMA f84 = f74, f86, f84
  5135. nop __LINE__
  5136. }
  5137. { .mfi
  5138. STFD [BOFFSET2] = f118, SIZE
  5139. FNMA f116 = f74, f118, f116
  5140. nop __LINE__
  5141. }
  5142. ;;
  5143. { .mfi
  5144. STFD [BOFFSET] = f87, -11 * SIZE
  5145. FMA_A f85 = f75, f86, f85
  5146. nop __LINE__
  5147. }
  5148. { .mfi
  5149. STFD [BOFFSET2] = f119, -11 * SIZE
  5150. FMA_A f117 = f75, f118, f117
  5151. nop __LINE__
  5152. }
  5153. ;;
  5154. { .mfi
  5155. STFD [C1 ] = f70, SIZE
  5156. FMA_B f68 = f75, f71, f68
  5157. nop __LINE__
  5158. }
  5159. { .mfi
  5160. STFD [C3 ] = f102, SIZE
  5161. FMA_B f100 = f75, f103, f100
  5162. nop __LINE__
  5163. }
  5164. ;;
  5165. { .mfi
  5166. STFD [C1 ] = f71, -3 * SIZE
  5167. FNMA f69 = f74, f71, f69
  5168. nop __LINE__
  5169. }
  5170. { .mfi
  5171. STFD [C3 ] = f103, -3 * SIZE
  5172. FNMA f101 = f74, f103, f101
  5173. nop __LINE__
  5174. }
  5175. ;;
  5176. { .mfi
  5177. STFD [C2 ] = f86, SIZE
  5178. FMA_B f84 = f75, f87, f84
  5179. nop __LINE__
  5180. }
  5181. { .mfi
  5182. STFD [C4 ] = f118, SIZE
  5183. FMA_B f116 = f75, f119, f116
  5184. nop __LINE__
  5185. }
  5186. ;;
  5187. { .mfi
  5188. STFD [C2 ] = f87, -3 * SIZE
  5189. FNMA f85 = f74, f87, f85
  5190. nop __LINE__
  5191. }
  5192. { .mfi
  5193. STFD [C4 ] = f119, -3 * SIZE
  5194. FNMA f117 = f74, f119, f117
  5195. nop __LINE__
  5196. }
  5197. ;;
  5198. { .mfi
  5199. nop __LINE__
  5200. FNMA f66 = f76, f70, f66
  5201. nop __LINE__
  5202. }
  5203. { .mfi
  5204. nop __LINE__
  5205. FNMA f98 = f76, f102, f98
  5206. nop __LINE__
  5207. }
  5208. ;;
  5209. { .mfi
  5210. nop __LINE__
  5211. FMA_A f67 = f77, f70, f67
  5212. nop __LINE__
  5213. }
  5214. { .mfi
  5215. nop __LINE__
  5216. FMA_A f99 = f77, f102, f99
  5217. nop __LINE__
  5218. }
  5219. ;;
  5220. { .mfi
  5221. nop __LINE__
  5222. FNMA f82 = f76, f86, f82
  5223. nop __LINE__
  5224. }
  5225. { .mfi
  5226. nop __LINE__
  5227. FNMA f114 = f76, f118, f114
  5228. nop __LINE__
  5229. }
  5230. ;;
  5231. { .mfi
  5232. nop __LINE__
  5233. FMA_A f83 = f77, f86, f83
  5234. nop __LINE__
  5235. }
  5236. { .mfi
  5237. nop __LINE__
  5238. FMA_A f115 = f77, f118, f115
  5239. nop __LINE__
  5240. }
  5241. ;;
  5242. { .mfi
  5243. nop __LINE__
  5244. FMA_B f66 = f77, f71, f66
  5245. nop __LINE__
  5246. }
  5247. { .mfi
  5248. nop __LINE__
  5249. FMA_B f98 = f77, f103, f98
  5250. nop __LINE__
  5251. }
  5252. ;;
  5253. { .mfi
  5254. nop __LINE__
  5255. FNMA f67 = f76, f71, f67
  5256. nop __LINE__
  5257. }
  5258. { .mfi
  5259. nop __LINE__
  5260. FNMA f99 = f76, f103, f99
  5261. nop __LINE__
  5262. }
  5263. ;;
  5264. { .mfi
  5265. nop __LINE__
  5266. FMA_B f82 = f77, f87, f82
  5267. nop __LINE__
  5268. }
  5269. { .mfi
  5270. nop __LINE__
  5271. FMA_B f114 = f77, f119, f114
  5272. nop __LINE__
  5273. }
  5274. ;;
  5275. { .mfi
  5276. nop __LINE__
  5277. FNMA f83 = f76, f87, f83
  5278. nop __LINE__
  5279. }
  5280. { .mfi
  5281. nop __LINE__
  5282. FNMA f115 = f76, f119, f115
  5283. nop __LINE__
  5284. }
  5285. ;;
  5286. { .mfi
  5287. nop __LINE__
  5288. FNMA f64 = f78, f70, f64
  5289. nop __LINE__
  5290. }
  5291. { .mfi
  5292. nop __LINE__
  5293. FNMA f96 = f78, f102, f96
  5294. nop __LINE__
  5295. }
  5296. ;;
  5297. { .mfi
  5298. nop __LINE__
  5299. FMA_A f65 = f79, f70, f65
  5300. nop __LINE__
  5301. }
  5302. { .mfi
  5303. nop __LINE__
  5304. FMA_A f97 = f79, f102, f97
  5305. nop __LINE__
  5306. }
  5307. ;;
  5308. { .mfi
  5309. nop __LINE__
  5310. FNMA f80 = f78, f86, f80
  5311. nop __LINE__
  5312. }
  5313. { .mfi
  5314. nop __LINE__
  5315. FNMA f112 = f78, f118, f112
  5316. nop __LINE__
  5317. }
  5318. ;;
  5319. { .mfi
  5320. nop __LINE__
  5321. FMA_A f81 = f79, f86, f81
  5322. nop __LINE__
  5323. }
  5324. { .mfi
  5325. nop __LINE__
  5326. FMA_A f113 = f79, f118, f113
  5327. nop __LINE__
  5328. }
  5329. ;;
  5330. { .mfi
  5331. nop __LINE__
  5332. FMA_B f64 = f79, f71, f64
  5333. nop __LINE__
  5334. }
  5335. { .mfi
  5336. nop __LINE__
  5337. FMA_B f96 = f79, f103, f96
  5338. nop __LINE__
  5339. }
  5340. ;;
  5341. { .mfi
  5342. nop __LINE__
  5343. FNMA f65 = f78, f71, f65
  5344. nop __LINE__
  5345. }
  5346. { .mfi
  5347. nop __LINE__
  5348. FNMA f97 = f78, f103, f97
  5349. nop __LINE__
  5350. }
  5351. ;;
  5352. { .mfi
  5353. nop __LINE__
  5354. FMA_B f80 = f79, f87, f80
  5355. nop __LINE__
  5356. }
  5357. { .mfi
  5358. nop __LINE__
  5359. FMA_B f112 = f79, f119, f112
  5360. nop __LINE__
  5361. }
  5362. ;;
  5363. { .mfi
  5364. nop __LINE__
  5365. FNMA f81 = f78, f87, f81
  5366. nop __LINE__
  5367. }
  5368. { .mfi
  5369. nop __LINE__
  5370. FNMA f113 = f78, f119, f113
  5371. nop __LINE__
  5372. }
  5373. ;;
  5374. { .mfi
  5375. nop __LINE__
  5376. FMPY f32 = f88, f68
  5377. nop __LINE__
  5378. }
  5379. { .mfi
  5380. nop __LINE__
  5381. FMPY f36 = f88, f100
  5382. nop __LINE__
  5383. }
  5384. ;;
  5385. { .mfi
  5386. nop __LINE__
  5387. FMPY f33 = f89, f68
  5388. nop __LINE__
  5389. }
  5390. { .mfi
  5391. nop __LINE__
  5392. FMPY f37 = f89, f100
  5393. nop __LINE__
  5394. }
  5395. ;;
  5396. { .mfi
  5397. nop __LINE__
  5398. FMPY f34 = f88, f84
  5399. nop __LINE__
  5400. }
  5401. { .mfi
  5402. nop __LINE__
  5403. FMPY f38 = f88, f116
  5404. nop __LINE__
  5405. }
  5406. ;;
  5407. { .mfi
  5408. nop __LINE__
  5409. FMPY f35 = f89, f84
  5410. nop __LINE__
  5411. }
  5412. { .mfi
  5413. nop __LINE__
  5414. FMPY f39 = f89, f116
  5415. nop __LINE__
  5416. }
  5417. ;;
  5418. { .mfi
  5419. nop __LINE__
  5420. FMA_C f68 = f89, f69, f32
  5421. nop __LINE__
  5422. }
  5423. { .mfi
  5424. nop __LINE__
  5425. FMA_C f100 = f89, f101, f36
  5426. nop __LINE__
  5427. }
  5428. ;;
  5429. { .mfi
  5430. nop __LINE__
  5431. FMA_D f69 = f88, f69, f33
  5432. nop __LINE__
  5433. }
  5434. { .mfi
  5435. nop __LINE__
  5436. FMA_D f101 = f88, f101, f37
  5437. nop __LINE__
  5438. }
  5439. ;;
  5440. { .mfi
  5441. nop __LINE__
  5442. FMA_C f84 = f89, f85, f34
  5443. nop __LINE__
  5444. }
  5445. { .mfi
  5446. nop __LINE__
  5447. FMA_C f116 = f89, f117, f38
  5448. nop __LINE__
  5449. }
  5450. ;;
  5451. { .mfi
  5452. nop __LINE__
  5453. FMA_D f85 = f88, f85, f35
  5454. nop __LINE__
  5455. }
  5456. { .mfi
  5457. nop __LINE__
  5458. FMA_D f117 = f88, f117, f39
  5459. nop __LINE__
  5460. }
  5461. ;;
  5462. { .mfi
  5463. STFD [BOFFSET] = f68, SIZE
  5464. FNMA f66 = f90, f68, f66
  5465. nop __LINE__
  5466. }
  5467. { .mfi
  5468. STFD [BOFFSET2] = f100, SIZE
  5469. FNMA f98 = f90, f100, f98
  5470. nop __LINE__
  5471. }
  5472. ;;
  5473. { .mfi
  5474. STFD [BOFFSET] = f69, SIZE
  5475. FMA_A f67 = f91, f68, f67
  5476. nop __LINE__
  5477. }
  5478. { .mfi
  5479. STFD [BOFFSET2] = f101, SIZE
  5480. FMA_A f99 = f91, f100, f99
  5481. nop __LINE__
  5482. }
  5483. ;;
  5484. { .mfi
  5485. STFD [BOFFSET] = f84, SIZE
  5486. FNMA f82 = f90, f84, f82
  5487. nop __LINE__
  5488. }
  5489. { .mfi
  5490. STFD [BOFFSET2] = f116, SIZE
  5491. FNMA f114 = f90, f116, f114
  5492. nop __LINE__
  5493. }
  5494. ;;
  5495. { .mfi
  5496. STFD [BOFFSET] = f85, -11 * SIZE
  5497. FMA_A f83 = f91, f84, f83
  5498. nop __LINE__
  5499. }
  5500. { .mfi
  5501. STFD [BOFFSET2] = f117, -11 * SIZE
  5502. FMA_A f115 = f91, f116, f115
  5503. nop __LINE__
  5504. }
  5505. ;;
  5506. { .mfi
  5507. STFD [C1 ] = f68, SIZE
  5508. FMA_B f66 = f91, f69, f66
  5509. nop __LINE__
  5510. }
  5511. { .mfi
  5512. STFD [C3 ] = f100, SIZE
  5513. FMA_B f98 = f91, f101, f98
  5514. nop __LINE__
  5515. }
  5516. ;;
  5517. { .mfi
  5518. STFD [C1 ] = f69, -3 * SIZE
  5519. FNMA f67 = f90, f69, f67
  5520. nop __LINE__
  5521. }
  5522. { .mfi
  5523. STFD [C3 ] = f101, -3 * SIZE
  5524. FNMA f99 = f90, f101, f99
  5525. nop __LINE__
  5526. }
  5527. ;;
  5528. { .mfi
  5529. STFD [C2 ] = f84, SIZE
  5530. FMA_B f82 = f91, f85, f82
  5531. nop __LINE__
  5532. }
  5533. { .mfi
  5534. STFD [C4 ] = f116, SIZE
  5535. FMA_B f114 = f91, f117, f114
  5536. nop __LINE__
  5537. }
  5538. ;;
  5539. { .mfi
  5540. STFD [C2 ] = f85, -3 * SIZE
  5541. FNMA f83 = f90, f85, f83
  5542. nop __LINE__
  5543. }
  5544. { .mfi
  5545. STFD [C4 ] = f117, -3 * SIZE
  5546. FNMA f115 = f90, f117, f115
  5547. nop __LINE__
  5548. }
  5549. ;;
  5550. { .mfi
  5551. nop __LINE__
  5552. FNMA f64 = f92, f68, f64
  5553. nop __LINE__
  5554. }
  5555. { .mfi
  5556. nop __LINE__
  5557. FNMA f96 = f92, f100, f96
  5558. nop __LINE__
  5559. }
  5560. ;;
  5561. { .mfi
  5562. nop __LINE__
  5563. FMA_A f65 = f93, f68, f65
  5564. nop __LINE__
  5565. }
  5566. { .mfi
  5567. nop __LINE__
  5568. FMA_A f97 = f93, f100, f97
  5569. nop __LINE__
  5570. }
  5571. ;;
  5572. { .mfi
  5573. nop __LINE__
  5574. FNMA f80 = f92, f84, f80
  5575. nop __LINE__
  5576. }
  5577. { .mfi
  5578. nop __LINE__
  5579. FNMA f112 = f92, f116, f112
  5580. nop __LINE__
  5581. }
  5582. ;;
  5583. { .mfi
  5584. nop __LINE__
  5585. FMA_A f81 = f93, f84, f81
  5586. nop __LINE__
  5587. }
  5588. { .mfi
  5589. nop __LINE__
  5590. FMA_A f113 = f93, f116, f113
  5591. nop __LINE__
  5592. }
  5593. ;;
  5594. { .mfi
  5595. nop __LINE__
  5596. FMA_B f64 = f93, f69, f64
  5597. nop __LINE__
  5598. }
  5599. { .mfi
  5600. nop __LINE__
  5601. FMA_B f96 = f93, f101, f96
  5602. nop __LINE__
  5603. }
  5604. ;;
  5605. { .mfi
  5606. nop __LINE__
  5607. FNMA f65 = f92, f69, f65
  5608. nop __LINE__
  5609. }
  5610. { .mfi
  5611. nop __LINE__
  5612. FNMA f97 = f92, f101, f97
  5613. nop __LINE__
  5614. }
  5615. ;;
  5616. { .mfi
  5617. nop __LINE__
  5618. FMA_B f80 = f93, f85, f80
  5619. nop __LINE__
  5620. }
  5621. { .mfi
  5622. nop __LINE__
  5623. FMA_B f112 = f93, f117, f112
  5624. nop __LINE__
  5625. }
  5626. ;;
  5627. { .mfi
  5628. nop __LINE__
  5629. FNMA f81 = f92, f85, f81
  5630. nop __LINE__
  5631. }
  5632. { .mfi
  5633. nop __LINE__
  5634. FNMA f113 = f92, f117, f113
  5635. nop __LINE__
  5636. }
  5637. ;;
  5638. { .mfi
  5639. nop __LINE__
  5640. FMPY f32 = f104, f66
  5641. nop __LINE__
  5642. }
  5643. { .mfi
  5644. nop __LINE__
  5645. FMPY f36 = f104, f98
  5646. nop __LINE__
  5647. }
  5648. ;;
  5649. { .mfi
  5650. nop __LINE__
  5651. FMPY f33 = f105, f66
  5652. nop __LINE__
  5653. }
  5654. { .mfi
  5655. nop __LINE__
  5656. FMPY f37 = f105, f98
  5657. nop __LINE__
  5658. }
  5659. ;;
  5660. { .mfi
  5661. nop __LINE__
  5662. FMPY f34 = f104, f82
  5663. nop __LINE__
  5664. }
  5665. { .mfi
  5666. nop __LINE__
  5667. FMPY f38 = f104, f114
  5668. nop __LINE__
  5669. }
  5670. ;;
  5671. { .mfi
  5672. nop __LINE__
  5673. FMPY f35 = f105, f82
  5674. nop __LINE__
  5675. }
  5676. { .mfi
  5677. nop __LINE__
  5678. FMPY f39 = f105, f114
  5679. nop __LINE__
  5680. }
  5681. ;;
  5682. { .mfi
  5683. nop __LINE__
  5684. FMA_C f66 = f105, f67, f32
  5685. nop __LINE__
  5686. }
  5687. { .mfi
  5688. nop __LINE__
  5689. FMA_C f98 = f105, f99, f36
  5690. nop __LINE__
  5691. }
  5692. ;;
  5693. { .mfi
  5694. nop __LINE__
  5695. FMA_D f67 = f104, f67, f33
  5696. nop __LINE__
  5697. }
  5698. { .mfi
  5699. nop __LINE__
  5700. FMA_D f99 = f104, f99, f37
  5701. nop __LINE__
  5702. }
  5703. ;;
  5704. { .mfi
  5705. nop __LINE__
  5706. FMA_C f82 = f105, f83, f34
  5707. nop __LINE__
  5708. }
  5709. { .mfi
  5710. nop __LINE__
  5711. FMA_C f114 = f105, f115, f38
  5712. nop __LINE__
  5713. }
  5714. ;;
  5715. { .mfi
  5716. nop __LINE__
  5717. FMA_D f83 = f104, f83, f35
  5718. nop __LINE__
  5719. }
  5720. { .mfi
  5721. nop __LINE__
  5722. FMA_D f115 = f104, f115, f39
  5723. nop __LINE__
  5724. }
  5725. ;;
  5726. { .mfi
  5727. STFD [BOFFSET] = f66, SIZE
  5728. FNMA f64 = f106, f66, f64
  5729. nop __LINE__
  5730. }
  5731. { .mfi
  5732. STFD [BOFFSET2] = f98, SIZE
  5733. FNMA f96 = f106, f98, f96
  5734. nop __LINE__
  5735. }
  5736. ;;
  5737. { .mfi
  5738. STFD [BOFFSET] = f67, SIZE
  5739. FMA_A f65 = f107, f66, f65
  5740. nop __LINE__
  5741. }
  5742. { .mfi
  5743. STFD [BOFFSET2] = f99, SIZE
  5744. FMA_A f97 = f107, f98, f97
  5745. nop __LINE__
  5746. }
  5747. ;;
  5748. { .mfi
  5749. STFD [BOFFSET] = f82, SIZE
  5750. FNMA f80 = f106, f82, f80
  5751. nop __LINE__
  5752. }
  5753. { .mfi
  5754. STFD [BOFFSET2] = f114, SIZE
  5755. FNMA f112 = f106, f114, f112
  5756. nop __LINE__
  5757. }
  5758. ;;
  5759. { .mfi
  5760. STFD [BOFFSET] = f83, -11 * SIZE
  5761. FMA_A f81 = f107, f82, f81
  5762. nop __LINE__
  5763. }
  5764. { .mfi
  5765. STFD [BOFFSET2] = f115, -11 * SIZE
  5766. FMA_A f113 = f107, f114, f113
  5767. nop __LINE__
  5768. }
  5769. ;;
  5770. { .mfi
  5771. STFD [C1 ] = f66, SIZE
  5772. FMA_B f64 = f107, f67, f64
  5773. nop __LINE__
  5774. }
  5775. { .mfi
  5776. STFD [C3 ] = f98, SIZE
  5777. FMA_B f96 = f107, f99, f96
  5778. nop __LINE__
  5779. }
  5780. ;;
  5781. { .mfi
  5782. STFD [C1 ] = f67, -3 * SIZE
  5783. FNMA f65 = f106, f67, f65
  5784. nop __LINE__
  5785. }
  5786. { .mfi
  5787. STFD [C3 ] = f99, -3 * SIZE
  5788. FNMA f97 = f106, f99, f97
  5789. nop __LINE__
  5790. }
  5791. ;;
  5792. { .mfi
  5793. STFD [C2 ] = f82, SIZE
  5794. FMA_B f80 = f107, f83, f80
  5795. nop __LINE__
  5796. }
  5797. { .mfi
  5798. STFD [C4 ] = f114, SIZE
  5799. FMA_B f112 = f107, f115, f112
  5800. nop __LINE__
  5801. }
  5802. ;;
  5803. { .mfi
  5804. STFD [C2 ] = f83, -3 * SIZE
  5805. FNMA f81 = f106, f83, f81
  5806. nop __LINE__
  5807. }
  5808. { .mfi
  5809. STFD [C4 ] = f115, -3 * SIZE
  5810. FNMA f113 = f106, f115, f113
  5811. nop __LINE__
  5812. }
  5813. ;;
  5814. { .mfi
  5815. nop __LINE__
  5816. FMPY f32 = f120, f64
  5817. nop __LINE__
  5818. }
  5819. { .mfi
  5820. nop __LINE__
  5821. FMPY f36 = f120, f96
  5822. nop __LINE__
  5823. }
  5824. ;;
  5825. { .mfi
  5826. nop __LINE__
  5827. FMPY f33 = f121, f64
  5828. nop __LINE__
  5829. }
  5830. { .mfi
  5831. nop __LINE__
  5832. FMPY f37 = f121, f96
  5833. nop __LINE__
  5834. }
  5835. ;;
  5836. { .mfi
  5837. nop __LINE__
  5838. FMPY f34 = f120, f80
  5839. nop __LINE__
  5840. }
  5841. { .mfi
  5842. nop __LINE__
  5843. FMPY f38 = f120, f112
  5844. nop __LINE__
  5845. }
  5846. ;;
  5847. { .mfi
  5848. nop __LINE__
  5849. FMPY f35 = f121, f80
  5850. nop __LINE__
  5851. }
  5852. { .mfi
  5853. nop __LINE__
  5854. FMPY f39 = f121, f112
  5855. nop __LINE__
  5856. }
  5857. ;;
  5858. { .mfi
  5859. nop __LINE__
  5860. FMA_C f64 = f121, f65, f32
  5861. nop __LINE__
  5862. }
  5863. { .mfi
  5864. nop __LINE__
  5865. FMA_C f96 = f121, f97, f36
  5866. nop __LINE__
  5867. }
  5868. ;;
  5869. { .mfi
  5870. nop __LINE__
  5871. FMA_D f65 = f120, f65, f33
  5872. nop __LINE__
  5873. }
  5874. { .mfi
  5875. nop __LINE__
  5876. FMA_D f97 = f120, f97, f37
  5877. nop __LINE__
  5878. }
  5879. ;;
  5880. { .mfi
  5881. nop __LINE__
  5882. FMA_C f80 = f121, f81, f34
  5883. nop __LINE__
  5884. }
  5885. { .mfi
  5886. nop __LINE__
  5887. FMA_C f112 = f121, f113, f38
  5888. nop __LINE__
  5889. }
  5890. ;;
  5891. { .mfi
  5892. nop __LINE__
  5893. FMA_D f81 = f120, f81, f35
  5894. nop __LINE__
  5895. }
  5896. { .mfi
  5897. nop __LINE__
  5898. FMA_D f113 = f120, f113, f39
  5899. nop __LINE__
  5900. }
  5901. ;;
  5902. { .mmi
  5903. STFD [BOFFSET] = f64, SIZE
  5904. STFD [BOFFSET2] = f96, SIZE
  5905. nop __LINE__
  5906. }
  5907. ;;
  5908. { .mmi
  5909. STFD [BOFFSET] = f65, SIZE
  5910. STFD [BOFFSET2] = f97, SIZE
  5911. nop __LINE__
  5912. }
  5913. ;;
  5914. { .mmi
  5915. STFD [BOFFSET] = f80, SIZE
  5916. STFD [BOFFSET2] = f112, SIZE
  5917. nop __LINE__
  5918. }
  5919. ;;
  5920. { .mmi
  5921. STFD [BOFFSET] = f81, -3 * SIZE
  5922. STFD [BOFFSET2] = f113, -3 * SIZE
  5923. nop __LINE__
  5924. }
  5925. ;;
  5926. { .mfi
  5927. STFD [C1 ] = f64, SIZE
  5928. mov f64 = f0
  5929. nop __LINE__
  5930. }
  5931. { .mfi
  5932. STFD [C3 ] = f96, SIZE
  5933. mov f96 = f0
  5934. nop __LINE__
  5935. }
  5936. ;;
  5937. { .mfi
  5938. STFD [C1 ] = f65, -1 * SIZE
  5939. mov f65 = f0
  5940. adds KK = -4, KK
  5941. }
  5942. { .mfi
  5943. STFD [C3 ] = f97, -1 * SIZE
  5944. mov f97 = f0
  5945. nop __LINE__
  5946. }
  5947. ;;
  5948. { .mfi
  5949. STFD [C2 ] = f80, SIZE
  5950. mov f80 = f0
  5951. cmp.ne p6, p0 = 1, I
  5952. }
  5953. { .mfi
  5954. STFD [C4 ] = f112, SIZE
  5955. mov f112 = f0
  5956. sub L = K, KK
  5957. }
  5958. ;;
  5959. { .mfi
  5960. STFD [C2 ] = f81, -1 * SIZE
  5961. mov f81 = f0
  5962. adds I = -1, I
  5963. }
  5964. { .mfb
  5965. STFD [C4 ] = f113, -1 * SIZE
  5966. mov f113 = f0
  5967. (p6) br.cond.dptk .L011
  5968. }
  5969. ;;
  5970. #endif
  5971. #ifdef LT
  5972. { .mfi
  5973. LDFPD f76, f77 = [AOFFSET], 2 * SIZE
  5974. FMPY f32 = f72, f64
  5975. nop __LINE__
  5976. }
  5977. { .mfi
  5978. nop __LINE__
  5979. FMPY f36 = f72, f96
  5980. nop __LINE__
  5981. }
  5982. ;;
  5983. { .mfi
  5984. LDFPD f78, f79 = [AOFFSET]
  5985. FMPY f33 = f73, f64
  5986. adds AOFFSET = 4 * SIZE, AOFFSET
  5987. }
  5988. { .mfi
  5989. nop __LINE__
  5990. FMPY f37 = f73, f96
  5991. nop __LINE__
  5992. }
  5993. ;;
  5994. { .mfi
  5995. LDFPD f90, f91 = [AOFFSET], 2 * SIZE
  5996. FMPY f34 = f72, f80
  5997. nop __LINE__
  5998. }
  5999. { .mfi
  6000. nop __LINE__
  6001. FMPY f38 = f72, f112
  6002. nop __LINE__
  6003. }
  6004. ;;
  6005. { .mfi
  6006. LDFPD f92, f93 = [AOFFSET], 2 * SIZE
  6007. FMPY f35 = f73, f80
  6008. nop __LINE__
  6009. }
  6010. { .mfi
  6011. nop __LINE__
  6012. FMPY f39 = f73, f112
  6013. nop __LINE__
  6014. }
  6015. ;;
  6016. { .mfi
  6017. LDFPD f94, f95 = [AOFFSET]
  6018. FMA_C f64 = f73, f65, f32
  6019. adds AOFFSET = 6 * SIZE, AOFFSET
  6020. }
  6021. { .mfi
  6022. nop __LINE__
  6023. FMA_C f96 = f73, f97, f36
  6024. nop __LINE__
  6025. }
  6026. ;;
  6027. { .mfi
  6028. LDFPD f108, f109 = [AOFFSET], 2 * SIZE
  6029. FMA_D f65 = f72, f65, f33
  6030. nop __LINE__
  6031. }
  6032. { .mfi
  6033. nop __LINE__
  6034. FMA_D f97 = f72, f97, f37
  6035. nop __LINE__
  6036. }
  6037. ;;
  6038. { .mfi
  6039. LDFPD f110, f111 = [AOFFSET]
  6040. FMA_C f80 = f73, f81, f34
  6041. adds AOFFSET = 8 * SIZE, AOFFSET
  6042. }
  6043. { .mfi
  6044. nop __LINE__
  6045. FMA_C f112 = f73, f113, f38
  6046. nop __LINE__
  6047. }
  6048. ;;
  6049. { .mfi
  6050. LDFPD f126, f127 = [AOFFSET]
  6051. FMA_D f81 = f72, f81, f35
  6052. adds AOFFSET = - 30 * SIZE, AOFFSET
  6053. }
  6054. { .mfi
  6055. nop __LINE__
  6056. FMA_D f113 = f72, f113, f39
  6057. adds BOFFSET2 = 4 * SIZE, BOFFSET
  6058. }
  6059. ;;
  6060. { .mfi
  6061. STFD [BOFFSET] = f64, SIZE
  6062. FNMA f66 = f74, f64, f66
  6063. nop __LINE__
  6064. }
  6065. { .mfi
  6066. STFD [BOFFSET2] = f96, SIZE
  6067. FNMA f98 = f74, f96, f98
  6068. nop __LINE__
  6069. }
  6070. ;;
  6071. { .mfi
  6072. STFD [BOFFSET] = f65, SIZE
  6073. FMA_A f67 = f75, f64, f67
  6074. nop __LINE__
  6075. }
  6076. { .mfi
  6077. STFD [BOFFSET2] = f97, SIZE
  6078. FMA_A f99 = f75, f96, f99
  6079. nop __LINE__
  6080. }
  6081. ;;
  6082. { .mfi
  6083. STFD [BOFFSET] = f80, SIZE
  6084. FNMA f82 = f74, f80, f82
  6085. nop __LINE__
  6086. }
  6087. { .mfi
  6088. STFD [BOFFSET2] = f112, SIZE
  6089. FNMA f114 = f74, f112, f114
  6090. nop __LINE__
  6091. }
  6092. ;;
  6093. { .mfi
  6094. STFD [BOFFSET] = f81, 5 * SIZE
  6095. FMA_A f83 = f75, f80, f83
  6096. nop __LINE__
  6097. }
  6098. { .mfi
  6099. STFD [BOFFSET2] = f113, 5 * SIZE
  6100. FMA_A f115 = f75, f112, f115
  6101. nop __LINE__
  6102. }
  6103. ;;
  6104. { .mfi
  6105. STFD [C1 ] = f64, SIZE
  6106. FMA_B f66 = f75, f65, f66
  6107. nop __LINE__
  6108. }
  6109. { .mfi
  6110. STFD [C3 ] = f96, SIZE
  6111. FMA_B f98 = f75, f97, f98
  6112. nop __LINE__
  6113. }
  6114. ;;
  6115. { .mfi
  6116. STFD [C1 ] = f65, SIZE
  6117. FNMA f67 = f74, f65, f67
  6118. nop __LINE__
  6119. }
  6120. { .mfi
  6121. STFD [C3 ] = f97, SIZE
  6122. FNMA f99 = f74, f97, f99
  6123. nop __LINE__
  6124. }
  6125. ;;
  6126. { .mfi
  6127. STFD [C2 ] = f80, SIZE
  6128. FMA_B f82 = f75, f81, f82
  6129. nop __LINE__
  6130. }
  6131. { .mfi
  6132. STFD [C4 ] = f112, SIZE
  6133. FMA_B f114 = f75, f113, f114
  6134. nop __LINE__
  6135. }
  6136. ;;
  6137. { .mfi
  6138. STFD [C2 ] = f81, SIZE
  6139. FNMA f83 = f74, f81, f83
  6140. nop __LINE__
  6141. }
  6142. { .mfi
  6143. STFD [C4 ] = f113, SIZE
  6144. FNMA f115 = f74, f113, f115
  6145. nop __LINE__
  6146. }
  6147. ;;
  6148. { .mfi
  6149. nop __LINE__
  6150. FNMA f68 = f76, f64, f68
  6151. nop __LINE__
  6152. }
  6153. { .mfi
  6154. nop __LINE__
  6155. FNMA f100 = f76, f96, f100
  6156. nop __LINE__
  6157. }
  6158. ;;
  6159. { .mfi
  6160. nop __LINE__
  6161. FMA_A f69 = f77, f64, f69
  6162. nop __LINE__
  6163. }
  6164. { .mfi
  6165. nop __LINE__
  6166. FMA_A f101 = f77, f96, f101
  6167. nop __LINE__
  6168. }
  6169. ;;
  6170. { .mfi
  6171. nop __LINE__
  6172. FNMA f84 = f76, f80, f84
  6173. nop __LINE__
  6174. }
  6175. { .mfi
  6176. nop __LINE__
  6177. FNMA f116 = f76, f112, f116
  6178. nop __LINE__
  6179. }
  6180. ;;
  6181. { .mfi
  6182. nop __LINE__
  6183. FMA_A f85 = f77, f80, f85
  6184. nop __LINE__
  6185. }
  6186. { .mfi
  6187. nop __LINE__
  6188. FMA_A f117 = f77, f112, f117
  6189. nop __LINE__
  6190. }
  6191. ;;
  6192. { .mfi
  6193. nop __LINE__
  6194. FMA_B f68 = f77, f65, f68
  6195. nop __LINE__
  6196. }
  6197. { .mfi
  6198. nop __LINE__
  6199. FMA_B f100 = f77, f97, f100
  6200. nop __LINE__
  6201. }
  6202. ;;
  6203. { .mfi
  6204. nop __LINE__
  6205. FNMA f69 = f76, f65, f69
  6206. nop __LINE__
  6207. }
  6208. { .mfi
  6209. nop __LINE__
  6210. FNMA f101 = f76, f97, f101
  6211. nop __LINE__
  6212. }
  6213. ;;
  6214. { .mfi
  6215. nop __LINE__
  6216. FMA_B f84 = f77, f81, f84
  6217. nop __LINE__
  6218. }
  6219. { .mfi
  6220. nop __LINE__
  6221. FMA_B f116 = f77, f113, f116
  6222. nop __LINE__
  6223. }
  6224. ;;
  6225. { .mfi
  6226. nop __LINE__
  6227. FNMA f85 = f76, f81, f85
  6228. nop __LINE__
  6229. }
  6230. { .mfi
  6231. nop __LINE__
  6232. FNMA f117 = f76, f113, f117
  6233. nop __LINE__
  6234. }
  6235. ;;
  6236. { .mfi
  6237. nop __LINE__
  6238. FNMA f70 = f78, f64, f70
  6239. nop __LINE__
  6240. }
  6241. { .mfi
  6242. nop __LINE__
  6243. FNMA f102 = f78, f96, f102
  6244. nop __LINE__
  6245. }
  6246. ;;
  6247. { .mfi
  6248. nop __LINE__
  6249. FMA_A f71 = f79, f64, f71
  6250. nop __LINE__
  6251. }
  6252. { .mfi
  6253. nop __LINE__
  6254. FMA_A f103 = f79, f96, f103
  6255. nop __LINE__
  6256. }
  6257. ;;
  6258. { .mfi
  6259. nop __LINE__
  6260. FNMA f86 = f78, f80, f86
  6261. nop __LINE__
  6262. }
  6263. { .mfi
  6264. nop __LINE__
  6265. FNMA f118 = f78, f112, f118
  6266. nop __LINE__
  6267. }
  6268. ;;
  6269. { .mfi
  6270. nop __LINE__
  6271. FMA_A f87 = f79, f80, f87
  6272. nop __LINE__
  6273. }
  6274. { .mfi
  6275. nop __LINE__
  6276. FMA_A f119 = f79, f112, f119
  6277. nop __LINE__
  6278. }
  6279. ;;
  6280. { .mfi
  6281. nop __LINE__
  6282. FMA_B f70 = f79, f65, f70
  6283. nop __LINE__
  6284. }
  6285. { .mfi
  6286. nop __LINE__
  6287. FMA_B f102 = f79, f97, f102
  6288. nop __LINE__
  6289. }
  6290. ;;
  6291. { .mfi
  6292. nop __LINE__
  6293. FNMA f71 = f78, f65, f71
  6294. nop __LINE__
  6295. }
  6296. { .mfi
  6297. nop __LINE__
  6298. FNMA f103 = f78, f97, f103
  6299. nop __LINE__
  6300. }
  6301. ;;
  6302. { .mfi
  6303. nop __LINE__
  6304. FMA_B f86 = f79, f81, f86
  6305. nop __LINE__
  6306. }
  6307. { .mfi
  6308. nop __LINE__
  6309. FMA_B f118 = f79, f113, f118
  6310. nop __LINE__
  6311. }
  6312. ;;
  6313. { .mfi
  6314. nop __LINE__
  6315. FNMA f87 = f78, f81, f87
  6316. nop __LINE__
  6317. }
  6318. { .mfi
  6319. nop __LINE__
  6320. FNMA f119 = f78, f113, f119
  6321. nop __LINE__
  6322. }
  6323. ;;
  6324. { .mfi
  6325. nop __LINE__
  6326. FMPY f32 = f90, f66
  6327. nop __LINE__
  6328. }
  6329. { .mfi
  6330. nop __LINE__
  6331. FMPY f36 = f90, f98
  6332. nop __LINE__
  6333. }
  6334. ;;
  6335. { .mfi
  6336. nop __LINE__
  6337. FMPY f33 = f91, f66
  6338. nop __LINE__
  6339. }
  6340. { .mfi
  6341. nop __LINE__
  6342. FMPY f37 = f91, f98
  6343. nop __LINE__
  6344. }
  6345. ;;
  6346. { .mfi
  6347. nop __LINE__
  6348. FMPY f34 = f90, f82
  6349. nop __LINE__
  6350. }
  6351. { .mfi
  6352. nop __LINE__
  6353. FMPY f38 = f90, f114
  6354. nop __LINE__
  6355. }
  6356. ;;
  6357. { .mfi
  6358. nop __LINE__
  6359. FMPY f35 = f91, f82
  6360. nop __LINE__
  6361. }
  6362. { .mfi
  6363. nop __LINE__
  6364. FMPY f39 = f91, f114
  6365. nop __LINE__
  6366. }
  6367. ;;
  6368. { .mfi
  6369. nop __LINE__
  6370. FMA_C f66 = f91, f67, f32
  6371. nop __LINE__
  6372. }
  6373. { .mfi
  6374. nop __LINE__
  6375. FMA_C f98 = f91, f99, f36
  6376. nop __LINE__
  6377. }
  6378. ;;
  6379. { .mfi
  6380. nop __LINE__
  6381. FMA_D f67 = f90, f67, f33
  6382. nop __LINE__
  6383. }
  6384. { .mfi
  6385. nop __LINE__
  6386. FMA_D f99 = f90, f99, f37
  6387. nop __LINE__
  6388. }
  6389. ;;
  6390. { .mfi
  6391. nop __LINE__
  6392. FMA_C f82 = f91, f83, f34
  6393. nop __LINE__
  6394. }
  6395. { .mfi
  6396. nop __LINE__
  6397. FMA_C f114 = f91, f115, f38
  6398. nop __LINE__
  6399. }
  6400. ;;
  6401. { .mfi
  6402. nop __LINE__
  6403. FMA_D f83 = f90, f83, f35
  6404. nop __LINE__
  6405. }
  6406. { .mfi
  6407. nop __LINE__
  6408. FMA_D f115 = f90, f115, f39
  6409. nop __LINE__
  6410. }
  6411. ;;
  6412. { .mfi
  6413. STFD [BOFFSET] = f66, SIZE
  6414. FNMA f68 = f92, f66, f68
  6415. nop __LINE__
  6416. }
  6417. { .mfi
  6418. STFD [BOFFSET2] = f98, SIZE
  6419. FNMA f100 = f92, f98, f100
  6420. nop __LINE__
  6421. }
  6422. ;;
  6423. { .mfi
  6424. STFD [BOFFSET] = f67, SIZE
  6425. FMA_A f69 = f93, f66, f69
  6426. nop __LINE__
  6427. }
  6428. { .mfi
  6429. STFD [BOFFSET2] = f99, SIZE
  6430. FMA_A f101 = f93, f98, f101
  6431. nop __LINE__
  6432. }
  6433. ;;
  6434. { .mfi
  6435. STFD [BOFFSET] = f82, SIZE
  6436. FNMA f84 = f92, f82, f84
  6437. nop __LINE__
  6438. }
  6439. { .mfi
  6440. STFD [BOFFSET2] = f114, SIZE
  6441. FNMA f116 = f92, f114, f116
  6442. nop __LINE__
  6443. }
  6444. ;;
  6445. { .mfi
  6446. STFD [BOFFSET] = f83, 5 * SIZE
  6447. FMA_A f85 = f93, f82, f85
  6448. nop __LINE__
  6449. }
  6450. { .mfi
  6451. STFD [BOFFSET2] = f115, 5 * SIZE
  6452. FMA_A f117 = f93, f114, f117
  6453. nop __LINE__
  6454. }
  6455. ;;
  6456. { .mfi
  6457. STFD [C1 ] = f66, SIZE
  6458. FMA_B f68 = f93, f67, f68
  6459. nop __LINE__
  6460. }
  6461. { .mfi
  6462. STFD [C3 ] = f98, SIZE
  6463. FMA_B f100 = f93, f99, f100
  6464. nop __LINE__
  6465. }
  6466. ;;
  6467. { .mfi
  6468. STFD [C1 ] = f67, SIZE
  6469. FNMA f69 = f92, f67, f69
  6470. nop __LINE__
  6471. }
  6472. { .mfi
  6473. STFD [C3 ] = f99, SIZE
  6474. FNMA f101 = f92, f99, f101
  6475. nop __LINE__
  6476. }
  6477. ;;
  6478. { .mfi
  6479. STFD [C2 ] = f82, SIZE
  6480. FMA_B f84 = f93, f83, f84
  6481. nop __LINE__
  6482. }
  6483. { .mfi
  6484. STFD [C4 ] = f114, SIZE
  6485. FMA_B f116 = f93, f115, f116
  6486. nop __LINE__
  6487. }
  6488. ;;
  6489. { .mfi
  6490. STFD [C2 ] = f83, SIZE
  6491. FNMA f85 = f92, f83, f85
  6492. nop __LINE__
  6493. }
  6494. { .mfi
  6495. STFD [C4 ] = f115, SIZE
  6496. FNMA f117 = f92, f115, f117
  6497. nop __LINE__
  6498. }
  6499. ;;
  6500. { .mfi
  6501. nop __LINE__
  6502. FNMA f70 = f94, f66, f70
  6503. nop __LINE__
  6504. }
  6505. { .mfi
  6506. nop __LINE__
  6507. FNMA f102 = f94, f98, f102
  6508. nop __LINE__
  6509. }
  6510. ;;
  6511. { .mfi
  6512. nop __LINE__
  6513. FMA_A f71 = f95, f66, f71
  6514. nop __LINE__
  6515. }
  6516. { .mfi
  6517. nop __LINE__
  6518. FMA_A f103 = f95, f98, f103
  6519. nop __LINE__
  6520. }
  6521. ;;
  6522. { .mfi
  6523. nop __LINE__
  6524. FNMA f86 = f94, f82, f86
  6525. nop __LINE__
  6526. }
  6527. { .mfi
  6528. nop __LINE__
  6529. FNMA f118 = f94, f114, f118
  6530. nop __LINE__
  6531. }
  6532. ;;
  6533. { .mfi
  6534. nop __LINE__
  6535. FMA_A f87 = f95, f82, f87
  6536. nop __LINE__
  6537. }
  6538. { .mfi
  6539. nop __LINE__
  6540. FMA_A f119 = f95, f114, f119
  6541. nop __LINE__
  6542. }
  6543. ;;
  6544. { .mfi
  6545. nop __LINE__
  6546. FMA_B f70 = f95, f67, f70
  6547. nop __LINE__
  6548. }
  6549. { .mfi
  6550. nop __LINE__
  6551. FMA_B f102 = f95, f99, f102
  6552. nop __LINE__
  6553. }
  6554. ;;
  6555. { .mfi
  6556. nop __LINE__
  6557. FNMA f71 = f94, f67, f71
  6558. nop __LINE__
  6559. }
  6560. { .mfi
  6561. nop __LINE__
  6562. FNMA f103 = f94, f99, f103
  6563. nop __LINE__
  6564. }
  6565. ;;
  6566. { .mfi
  6567. nop __LINE__
  6568. FMA_B f86 = f95, f83, f86
  6569. nop __LINE__
  6570. }
  6571. { .mfi
  6572. nop __LINE__
  6573. FMA_B f118 = f95, f115, f118
  6574. nop __LINE__
  6575. }
  6576. ;;
  6577. { .mfi
  6578. nop __LINE__
  6579. FNMA f87 = f94, f83, f87
  6580. nop __LINE__
  6581. }
  6582. { .mfi
  6583. nop __LINE__
  6584. FNMA f119 = f94, f115, f119
  6585. nop __LINE__
  6586. }
  6587. ;;
  6588. { .mfi
  6589. nop __LINE__
  6590. FMPY f32 = f108, f68
  6591. nop __LINE__
  6592. }
  6593. { .mfi
  6594. nop __LINE__
  6595. FMPY f36 = f108, f100
  6596. nop __LINE__
  6597. }
  6598. { .mfi
  6599. nop __LINE__
  6600. FMPY f33 = f109, f68
  6601. nop __LINE__
  6602. }
  6603. { .mfi
  6604. nop __LINE__
  6605. FMPY f37 = f109, f100
  6606. nop __LINE__
  6607. }
  6608. { .mfi
  6609. nop __LINE__
  6610. FMPY f34 = f108, f84
  6611. nop __LINE__
  6612. }
  6613. { .mfi
  6614. nop __LINE__
  6615. FMPY f38 = f108, f116
  6616. nop __LINE__
  6617. }
  6618. { .mfi
  6619. nop __LINE__
  6620. FMPY f35 = f109, f84
  6621. nop __LINE__
  6622. }
  6623. { .mfi
  6624. nop __LINE__
  6625. FMPY f39 = f109, f116
  6626. nop __LINE__
  6627. }
  6628. ;;
  6629. { .mfi
  6630. nop __LINE__
  6631. FMA_C f68 = f109, f69, f32
  6632. nop __LINE__
  6633. }
  6634. { .mfi
  6635. nop __LINE__
  6636. FMA_C f100 = f109, f101, f36
  6637. nop __LINE__
  6638. }
  6639. { .mfi
  6640. nop __LINE__
  6641. FMA_D f69 = f108, f69, f33
  6642. nop __LINE__
  6643. }
  6644. { .mfi
  6645. nop __LINE__
  6646. FMA_D f101 = f108, f101, f37
  6647. nop __LINE__
  6648. }
  6649. { .mfi
  6650. nop __LINE__
  6651. FMA_C f84 = f109, f85, f34
  6652. nop __LINE__
  6653. }
  6654. { .mfi
  6655. nop __LINE__
  6656. FMA_C f116 = f109, f117, f38
  6657. nop __LINE__
  6658. }
  6659. { .mfi
  6660. nop __LINE__
  6661. FMA_D f85 = f108, f85, f35
  6662. nop __LINE__
  6663. }
  6664. { .mfi
  6665. nop __LINE__
  6666. FMA_D f117 = f108, f117, f39
  6667. nop __LINE__
  6668. }
  6669. ;;
  6670. { .mfi
  6671. STFD [BOFFSET] = f68, SIZE
  6672. FNMA f70 = f110, f68, f70
  6673. nop __LINE__
  6674. }
  6675. { .mfi
  6676. STFD [BOFFSET2] = f100, SIZE
  6677. FNMA f102 = f110, f100, f102
  6678. nop __LINE__
  6679. }
  6680. ;;
  6681. { .mfi
  6682. STFD [BOFFSET] = f69, SIZE
  6683. FMA_A f71 = f111, f68, f71
  6684. nop __LINE__
  6685. }
  6686. { .mfi
  6687. STFD [BOFFSET2] = f101, SIZE
  6688. FMA_A f103 = f111, f100, f103
  6689. nop __LINE__
  6690. }
  6691. ;;
  6692. { .mfi
  6693. STFD [BOFFSET] = f84, SIZE
  6694. FNMA f86 = f110, f84, f86
  6695. nop __LINE__
  6696. }
  6697. { .mfi
  6698. STFD [BOFFSET2] = f116, SIZE
  6699. FNMA f118 = f110, f116, f118
  6700. nop __LINE__
  6701. }
  6702. ;;
  6703. { .mfi
  6704. STFD [BOFFSET] = f85, 5 * SIZE
  6705. FMA_A f87 = f111, f84, f87
  6706. nop __LINE__
  6707. }
  6708. { .mfi
  6709. STFD [BOFFSET2] = f117, 5 * SIZE
  6710. FMA_A f119 = f111, f116, f119
  6711. nop __LINE__
  6712. }
  6713. ;;
  6714. { .mfi
  6715. STFD [C1 ] = f68, SIZE
  6716. FMA_B f70 = f111, f69, f70
  6717. nop __LINE__
  6718. }
  6719. { .mfi
  6720. STFD [C3 ] = f100, SIZE
  6721. FMA_B f102 = f111, f101, f102
  6722. nop __LINE__
  6723. }
  6724. ;;
  6725. { .mfi
  6726. STFD [C1 ] = f69, SIZE
  6727. FNMA f71 = f110, f69, f71
  6728. nop __LINE__
  6729. }
  6730. { .mfi
  6731. STFD [C3 ] = f101, SIZE
  6732. FNMA f103 = f110, f101, f103
  6733. nop __LINE__
  6734. }
  6735. ;;
  6736. { .mfi
  6737. STFD [C2 ] = f84, SIZE
  6738. FMA_B f86 = f111, f85, f86
  6739. nop __LINE__
  6740. }
  6741. { .mfi
  6742. STFD [C4 ] = f116, SIZE
  6743. FMA_B f118 = f111, f117, f118
  6744. nop __LINE__
  6745. }
  6746. ;;
  6747. { .mfi
  6748. STFD [C2 ] = f85, SIZE
  6749. FNMA f87 = f110, f85, f87
  6750. nop __LINE__
  6751. }
  6752. { .mfi
  6753. STFD [C4 ] = f117, SIZE
  6754. FNMA f119 = f110, f117, f119
  6755. nop __LINE__
  6756. }
  6757. ;;
  6758. { .mfi
  6759. nop __LINE__
  6760. FMPY f32 = f126, f70
  6761. nop __LINE__
  6762. }
  6763. { .mfi
  6764. nop __LINE__
  6765. FMPY f36 = f126, f102
  6766. nop __LINE__
  6767. }
  6768. ;;
  6769. { .mfi
  6770. nop __LINE__
  6771. FMPY f33 = f127, f70
  6772. nop __LINE__
  6773. }
  6774. { .mfi
  6775. nop __LINE__
  6776. FMPY f37 = f127, f102
  6777. nop __LINE__
  6778. }
  6779. ;;
  6780. { .mfi
  6781. nop __LINE__
  6782. FMPY f34 = f126, f86
  6783. nop __LINE__
  6784. }
  6785. { .mfi
  6786. nop __LINE__
  6787. FMPY f38 = f126, f118
  6788. nop __LINE__
  6789. }
  6790. ;;
  6791. { .mfi
  6792. nop __LINE__
  6793. FMPY f35 = f127, f86
  6794. nop __LINE__
  6795. }
  6796. { .mfi
  6797. nop __LINE__
  6798. FMPY f39 = f127, f118
  6799. nop __LINE__
  6800. }
  6801. ;;
  6802. { .mfi
  6803. nop __LINE__
  6804. FMA_C f70 = f127, f71, f32
  6805. nop __LINE__
  6806. }
  6807. { .mfi
  6808. nop __LINE__
  6809. FMA_C f102 = f127, f103, f36
  6810. nop __LINE__
  6811. }
  6812. ;;
  6813. { .mfi
  6814. nop __LINE__
  6815. FMA_D f71 = f126, f71, f33
  6816. nop __LINE__
  6817. }
  6818. { .mfi
  6819. nop __LINE__
  6820. FMA_D f103 = f126, f103, f37
  6821. nop __LINE__
  6822. }
  6823. ;;
  6824. { .mfi
  6825. nop __LINE__
  6826. FMA_C f86 = f127, f87, f34
  6827. nop __LINE__
  6828. }
  6829. { .mfi
  6830. nop __LINE__
  6831. FMA_C f118 = f127, f119, f38
  6832. nop __LINE__
  6833. }
  6834. ;;
  6835. { .mfi
  6836. nop __LINE__
  6837. FMA_D f87 = f126, f87, f35
  6838. nop __LINE__
  6839. }
  6840. { .mfi
  6841. nop __LINE__
  6842. FMA_D f119 = f126, f119, f39
  6843. nop __LINE__
  6844. }
  6845. ;;
  6846. { .mmi
  6847. STFD [BOFFSET] = f70, SIZE
  6848. STFD [BOFFSET2] = f102, SIZE
  6849. nop __LINE__
  6850. }
  6851. ;;
  6852. { .mmi
  6853. STFD [BOFFSET] = f71, SIZE
  6854. STFD [BOFFSET2] = f103, SIZE
  6855. sub r2 = K, KK
  6856. }
  6857. ;;
  6858. { .mmi
  6859. STFD [BOFFSET] = f86, SIZE
  6860. STFD [BOFFSET2] = f118, SIZE
  6861. adds KK = 4, KK
  6862. }
  6863. ;;
  6864. { .mmi
  6865. STFD [BOFFSET] = f87, -27 * SIZE
  6866. STFD [BOFFSET2] = f119
  6867. shladd r2 = r2, ZBASE_SHIFT, r0
  6868. }
  6869. ;;
  6870. { .mfi
  6871. STFD [C1 ] = f70, SIZE
  6872. mov f64 = f0
  6873. shladd AOFFSET = r2, 2, AOFFSET
  6874. }
  6875. { .mfi
  6876. STFD [C3 ] = f102, SIZE
  6877. mov f65 = f0
  6878. shladd BOFFSET = r2, 2, BOFFSET
  6879. }
  6880. ;;
  6881. { .mfi
  6882. STFD [C1 ] = f71, SIZE
  6883. mov f80 = f0
  6884. mov L = KK
  6885. }
  6886. { .mfi
  6887. STFD [C3 ] = f103, SIZE
  6888. mov f81 = f0
  6889. nop __LINE__
  6890. }
  6891. ;;
  6892. { .mfi
  6893. STFD [C2 ] = f86, SIZE
  6894. mov f96 = f0
  6895. cmp.ne p6, p0 = 1, I
  6896. }
  6897. { .mfi
  6898. STFD [C4 ] = f118, SIZE
  6899. mov f97 = f0
  6900. nop __LINE__
  6901. }
  6902. ;;
  6903. { .mfi
  6904. STFD [C2 ] = f87, SIZE
  6905. mov f112 = f0
  6906. adds I = -1, I
  6907. }
  6908. { .mfb
  6909. STFD [C4 ] = f119, SIZE
  6910. mov f113 = f0
  6911. (p6) br.cond.dptk .L011
  6912. }
  6913. ;;
  6914. #endif
  6915. #ifdef RN
  6916. { .mfi
  6917. LDFPD f76, f77 = [BOFFSET], 2 * SIZE
  6918. FMPY f32 = f72, f64
  6919. nop __LINE__
  6920. }
  6921. { .mfi
  6922. nop __LINE__
  6923. FMPY f36 = f72, f68
  6924. nop __LINE__
  6925. }
  6926. ;;
  6927. { .mfi
  6928. LDFPD f78, f79 = [BOFFSET]
  6929. FMPY f33 = f73, f64
  6930. adds BOFFSET = 4 * SIZE, BOFFSET
  6931. }
  6932. { .mfi
  6933. nop __LINE__
  6934. FMPY f37 = f73, f68
  6935. nop __LINE__
  6936. }
  6937. ;;
  6938. { .mfi
  6939. LDFPD f90, f91 = [BOFFSET], 2 * SIZE
  6940. FMPY f34 = f72, f66
  6941. nop __LINE__
  6942. }
  6943. { .mfi
  6944. nop __LINE__
  6945. FMPY f38 = f72, f70
  6946. nop __LINE__
  6947. }
  6948. ;;
  6949. { .mfi
  6950. LDFPD f92, f93 = [BOFFSET], 2 * SIZE
  6951. FMPY f35 = f73, f66
  6952. nop __LINE__
  6953. }
  6954. { .mfi
  6955. nop __LINE__
  6956. FMPY f39 = f73, f70
  6957. nop __LINE__
  6958. }
  6959. ;;
  6960. { .mfi
  6961. LDFPD f94, f95 = [BOFFSET]
  6962. FMA_C f64 = f73, f65, f32
  6963. adds BOFFSET = 6 * SIZE, BOFFSET
  6964. }
  6965. { .mfi
  6966. nop __LINE__
  6967. FMA_C f68 = f73, f69, f36
  6968. nop __LINE__
  6969. }
  6970. ;;
  6971. { .mfi
  6972. LDFPD f108, f109 = [BOFFSET], 2 * SIZE
  6973. FMA_D f65 = f72, f65, f33
  6974. nop __LINE__
  6975. }
  6976. { .mfi
  6977. nop __LINE__
  6978. FMA_D f69 = f72, f69, f37
  6979. nop __LINE__
  6980. }
  6981. ;;
  6982. { .mfi
  6983. LDFPD f110, f111 = [BOFFSET]
  6984. FMA_C f66 = f73, f67, f34
  6985. adds BOFFSET = 8 * SIZE, BOFFSET
  6986. }
  6987. { .mfi
  6988. nop __LINE__
  6989. FMA_C f70 = f73, f71, f38
  6990. nop __LINE__
  6991. }
  6992. ;;
  6993. { .mfi
  6994. LDFPD f126, f127 = [BOFFSET]
  6995. FMA_D f67 = f72, f67, f35
  6996. adds BOFFSET = - 30 * SIZE, BOFFSET
  6997. }
  6998. { .mfi
  6999. nop __LINE__
  7000. FMA_D f71 = f72, f71, f39
  7001. adds AOFFSET2 = 4 * SIZE, AOFFSET
  7002. }
  7003. ;;
  7004. { .mfi
  7005. STFD [AOFFSET] = f64, SIZE
  7006. FNMA f80 = f74, f64, f80
  7007. nop __LINE__
  7008. }
  7009. { .mfi
  7010. STFD [AOFFSET2] = f68, SIZE
  7011. FNMA f84 = f74, f68, f84
  7012. nop __LINE__
  7013. }
  7014. ;;
  7015. { .mfi
  7016. STFD [AOFFSET] = f65, SIZE
  7017. FMA_A f81 = f75, f64, f81
  7018. nop __LINE__
  7019. }
  7020. { .mfi
  7021. STFD [AOFFSET2] = f69, SIZE
  7022. FMA_A f85 = f75, f68, f85
  7023. nop __LINE__
  7024. }
  7025. ;;
  7026. { .mfi
  7027. STFD [AOFFSET] = f66, SIZE
  7028. FNMA f82 = f74, f66, f82
  7029. nop __LINE__
  7030. }
  7031. { .mfi
  7032. STFD [AOFFSET2] = f70, SIZE
  7033. FNMA f86 = f74, f70, f86
  7034. nop __LINE__
  7035. }
  7036. ;;
  7037. { .mfi
  7038. STFD [AOFFSET] = f67, 5 * SIZE
  7039. FMA_A f83 = f75, f66, f83
  7040. nop __LINE__
  7041. }
  7042. { .mfi
  7043. STFD [AOFFSET2] = f71, 5 * SIZE
  7044. FMA_A f87 = f75, f70, f87
  7045. nop __LINE__
  7046. }
  7047. ;;
  7048. { .mfi
  7049. STFD [C1 ] = f64, SIZE
  7050. FMA_B f80 = f75, f65, f80
  7051. nop __LINE__
  7052. }
  7053. { .mfi
  7054. STFD [C5 ] = f68, SIZE
  7055. FMA_B f84 = f75, f69, f84
  7056. nop __LINE__
  7057. }
  7058. ;;
  7059. { .mfi
  7060. STFD [C1 ] = f65, SIZE
  7061. FNMA f81 = f74, f65, f81
  7062. nop __LINE__
  7063. }
  7064. { .mfi
  7065. STFD [C5 ] = f69, SIZE
  7066. FNMA f85 = f74, f69, f85
  7067. nop __LINE__
  7068. }
  7069. ;;
  7070. { .mfi
  7071. STFD [C1 ] = f66, SIZE
  7072. FMA_B f82 = f75, f67, f82
  7073. nop __LINE__
  7074. }
  7075. { .mfi
  7076. STFD [C5 ] = f70, SIZE
  7077. FMA_B f86 = f75, f71, f86
  7078. nop __LINE__
  7079. }
  7080. ;;
  7081. { .mfi
  7082. STFD [C1 ] = f67, 5 * SIZE
  7083. FNMA f83 = f74, f67, f83
  7084. nop __LINE__
  7085. }
  7086. { .mfi
  7087. STFD [C5 ] = f71, 5 * SIZE
  7088. FNMA f87 = f74, f71, f87
  7089. nop __LINE__
  7090. }
  7091. ;;
  7092. { .mfi
  7093. nop __LINE__
  7094. FNMA f96 = f76, f64, f96
  7095. nop __LINE__
  7096. }
  7097. { .mfi
  7098. nop __LINE__
  7099. FNMA f100 = f76, f68, f100
  7100. nop __LINE__
  7101. }
  7102. ;;
  7103. { .mfi
  7104. nop __LINE__
  7105. FMA_A f97 = f77, f64, f97
  7106. nop __LINE__
  7107. }
  7108. { .mfi
  7109. nop __LINE__
  7110. FMA_A f101 = f77, f68, f101
  7111. nop __LINE__
  7112. }
  7113. ;;
  7114. { .mfi
  7115. nop __LINE__
  7116. FNMA f98 = f76, f66, f98
  7117. nop __LINE__
  7118. }
  7119. { .mfi
  7120. nop __LINE__
  7121. FNMA f102 = f76, f70, f102
  7122. nop __LINE__
  7123. }
  7124. ;;
  7125. { .mfi
  7126. nop __LINE__
  7127. FMA_A f99 = f77, f66, f99
  7128. nop __LINE__
  7129. }
  7130. { .mfi
  7131. nop __LINE__
  7132. FMA_A f103 = f77, f70, f103
  7133. nop __LINE__
  7134. }
  7135. ;;
  7136. { .mfi
  7137. nop __LINE__
  7138. FMA_B f96 = f77, f65, f96
  7139. nop __LINE__
  7140. }
  7141. { .mfi
  7142. nop __LINE__
  7143. FMA_B f100 = f77, f69, f100
  7144. nop __LINE__
  7145. }
  7146. ;;
  7147. { .mfi
  7148. nop __LINE__
  7149. FNMA f97 = f76, f65, f97
  7150. nop __LINE__
  7151. }
  7152. { .mfi
  7153. nop __LINE__
  7154. FNMA f101 = f76, f69, f101
  7155. nop __LINE__
  7156. }
  7157. ;;
  7158. { .mfi
  7159. nop __LINE__
  7160. FMA_B f98 = f77, f67, f98
  7161. nop __LINE__
  7162. }
  7163. { .mfi
  7164. nop __LINE__
  7165. FMA_B f102 = f77, f71, f102
  7166. nop __LINE__
  7167. }
  7168. ;;
  7169. { .mfi
  7170. nop __LINE__
  7171. FNMA f99 = f76, f67, f99
  7172. nop __LINE__
  7173. }
  7174. { .mfi
  7175. nop __LINE__
  7176. FNMA f103 = f76, f71, f103
  7177. nop __LINE__
  7178. }
  7179. ;;
  7180. { .mfi
  7181. nop __LINE__
  7182. FNMA f112 = f78, f64, f112
  7183. nop __LINE__
  7184. }
  7185. { .mfi
  7186. nop __LINE__
  7187. FNMA f116 = f78, f68, f116
  7188. nop __LINE__
  7189. }
  7190. ;;
  7191. { .mfi
  7192. nop __LINE__
  7193. FMA_A f113 = f79, f64, f113
  7194. nop __LINE__
  7195. }
  7196. { .mfi
  7197. nop __LINE__
  7198. FMA_A f117 = f79, f68, f117
  7199. nop __LINE__
  7200. }
  7201. ;;
  7202. { .mfi
  7203. nop __LINE__
  7204. FNMA f114 = f78, f66, f114
  7205. nop __LINE__
  7206. }
  7207. { .mfi
  7208. nop __LINE__
  7209. FNMA f118 = f78, f70, f118
  7210. nop __LINE__
  7211. }
  7212. ;;
  7213. { .mfi
  7214. nop __LINE__
  7215. FMA_A f115 = f79, f66, f115
  7216. nop __LINE__
  7217. }
  7218. { .mfi
  7219. nop __LINE__
  7220. FMA_A f119 = f79, f70, f119
  7221. nop __LINE__
  7222. }
  7223. ;;
  7224. { .mfi
  7225. nop __LINE__
  7226. FMA_B f112 = f79, f65, f112
  7227. nop __LINE__
  7228. }
  7229. { .mfi
  7230. nop __LINE__
  7231. FMA_B f116 = f79, f69, f116
  7232. nop __LINE__
  7233. }
  7234. ;;
  7235. { .mfi
  7236. nop __LINE__
  7237. FNMA f113 = f78, f65, f113
  7238. nop __LINE__
  7239. }
  7240. { .mfi
  7241. nop __LINE__
  7242. FNMA f117 = f78, f69, f117
  7243. nop __LINE__
  7244. }
  7245. ;;
  7246. { .mfi
  7247. nop __LINE__
  7248. FMA_B f114 = f79, f67, f114
  7249. nop __LINE__
  7250. }
  7251. { .mfi
  7252. nop __LINE__
  7253. FMA_B f118 = f79, f71, f118
  7254. nop __LINE__
  7255. }
  7256. ;;
  7257. { .mfi
  7258. nop __LINE__
  7259. FNMA f115 = f78, f67, f115
  7260. nop __LINE__
  7261. }
  7262. { .mfi
  7263. nop __LINE__
  7264. FNMA f119 = f78, f71, f119
  7265. nop __LINE__
  7266. }
  7267. ;;
  7268. { .mfi
  7269. nop __LINE__
  7270. FMPY f32 = f90, f80
  7271. nop __LINE__
  7272. }
  7273. { .mfi
  7274. nop __LINE__
  7275. FMPY f36 = f90, f84
  7276. nop __LINE__
  7277. }
  7278. { .mfi
  7279. nop __LINE__
  7280. FMPY f33 = f91, f80
  7281. nop __LINE__
  7282. }
  7283. { .mfi
  7284. nop __LINE__
  7285. FMPY f37 = f91, f84
  7286. nop __LINE__
  7287. }
  7288. { .mfi
  7289. nop __LINE__
  7290. FMPY f34 = f90, f82
  7291. nop __LINE__
  7292. }
  7293. { .mfi
  7294. nop __LINE__
  7295. FMPY f38 = f90, f86
  7296. nop __LINE__
  7297. }
  7298. { .mfi
  7299. nop __LINE__
  7300. FMPY f35 = f91, f82
  7301. nop __LINE__
  7302. }
  7303. { .mfi
  7304. nop __LINE__
  7305. FMPY f39 = f91, f86
  7306. nop __LINE__
  7307. }
  7308. ;;
  7309. { .mfi
  7310. nop __LINE__
  7311. FMA_C f80 = f91, f81, f32
  7312. nop __LINE__
  7313. }
  7314. { .mfi
  7315. nop __LINE__
  7316. FMA_C f84 = f91, f85, f36
  7317. nop __LINE__
  7318. }
  7319. { .mfi
  7320. nop __LINE__
  7321. FMA_D f81 = f90, f81, f33
  7322. nop __LINE__
  7323. }
  7324. { .mfi
  7325. nop __LINE__
  7326. FMA_D f85 = f90, f85, f37
  7327. nop __LINE__
  7328. }
  7329. { .mfi
  7330. nop __LINE__
  7331. FMA_C f82 = f91, f83, f34
  7332. nop __LINE__
  7333. }
  7334. { .mfi
  7335. nop __LINE__
  7336. FMA_C f86 = f91, f87, f38
  7337. nop __LINE__
  7338. }
  7339. { .mfi
  7340. nop __LINE__
  7341. FMA_D f83 = f90, f83, f35
  7342. nop __LINE__
  7343. }
  7344. { .mfi
  7345. nop __LINE__
  7346. FMA_D f87 = f90, f87, f39
  7347. nop __LINE__
  7348. }
  7349. ;;
  7350. { .mfi
  7351. STFD [AOFFSET] = f80, SIZE
  7352. FNMA f96 = f92, f80, f96
  7353. nop __LINE__
  7354. }
  7355. { .mfi
  7356. STFD [AOFFSET2] = f84, SIZE
  7357. FNMA f100 = f92, f84, f100
  7358. nop __LINE__
  7359. }
  7360. ;;
  7361. { .mfi
  7362. STFD [AOFFSET] = f81, SIZE
  7363. FMA_A f97 = f93, f80, f97
  7364. nop __LINE__
  7365. }
  7366. { .mfi
  7367. STFD [AOFFSET2] = f85, SIZE
  7368. FMA_A f101 = f93, f84, f101
  7369. nop __LINE__
  7370. }
  7371. ;;
  7372. { .mfi
  7373. STFD [AOFFSET] = f82, SIZE
  7374. FNMA f98 = f92, f82, f98
  7375. nop __LINE__
  7376. }
  7377. { .mfi
  7378. STFD [AOFFSET2] = f86, SIZE
  7379. FNMA f102 = f92, f86, f102
  7380. nop __LINE__
  7381. }
  7382. ;;
  7383. { .mfi
  7384. STFD [AOFFSET] = f83, 5 * SIZE
  7385. FMA_A f99 = f93, f82, f99
  7386. nop __LINE__
  7387. }
  7388. { .mfi
  7389. STFD [AOFFSET2] = f87, 5 * SIZE
  7390. FMA_A f103 = f93, f86, f103
  7391. nop __LINE__
  7392. }
  7393. ;;
  7394. { .mfi
  7395. STFD [C2 ] = f80, SIZE
  7396. FMA_B f96 = f93, f81, f96
  7397. nop __LINE__
  7398. }
  7399. { .mfi
  7400. STFD [C6 ] = f84, SIZE
  7401. FMA_B f100 = f93, f85, f100
  7402. nop __LINE__
  7403. }
  7404. ;;
  7405. { .mfi
  7406. STFD [C2 ] = f81, SIZE
  7407. FNMA f97 = f92, f81, f97
  7408. nop __LINE__
  7409. }
  7410. { .mfi
  7411. STFD [C6 ] = f85, SIZE
  7412. FNMA f101 = f92, f85, f101
  7413. nop __LINE__
  7414. }
  7415. ;;
  7416. { .mfi
  7417. STFD [C2 ] = f82, SIZE
  7418. FMA_B f98 = f93, f83, f98
  7419. nop __LINE__
  7420. }
  7421. { .mfi
  7422. STFD [C6 ] = f86, SIZE
  7423. FMA_B f102 = f93, f87, f102
  7424. nop __LINE__
  7425. }
  7426. ;;
  7427. { .mfi
  7428. STFD [C2 ] = f83, 5 * SIZE
  7429. FNMA f99 = f92, f83, f99
  7430. nop __LINE__
  7431. }
  7432. { .mfi
  7433. STFD [C6 ] = f87, 5 * SIZE
  7434. FNMA f103 = f92, f87, f103
  7435. nop __LINE__
  7436. }
  7437. ;;
  7438. { .mfi
  7439. nop __LINE__
  7440. FNMA f112 = f94, f80, f112
  7441. nop __LINE__
  7442. }
  7443. { .mfi
  7444. nop __LINE__
  7445. FNMA f116 = f94, f84, f116
  7446. nop __LINE__
  7447. }
  7448. ;;
  7449. { .mfi
  7450. nop __LINE__
  7451. FMA_A f113 = f95, f80, f113
  7452. nop __LINE__
  7453. }
  7454. { .mfi
  7455. nop __LINE__
  7456. FMA_A f117 = f95, f84, f117
  7457. nop __LINE__
  7458. }
  7459. ;;
  7460. { .mfi
  7461. nop __LINE__
  7462. FNMA f114 = f94, f82, f114
  7463. nop __LINE__
  7464. }
  7465. { .mfi
  7466. nop __LINE__
  7467. FNMA f118 = f94, f86, f118
  7468. nop __LINE__
  7469. }
  7470. ;;
  7471. { .mfi
  7472. nop __LINE__
  7473. FMA_A f115 = f95, f82, f115
  7474. nop __LINE__
  7475. }
  7476. { .mfi
  7477. nop __LINE__
  7478. FMA_A f119 = f95, f86, f119
  7479. nop __LINE__
  7480. }
  7481. ;;
  7482. { .mfi
  7483. nop __LINE__
  7484. FMA_B f112 = f95, f81, f112
  7485. nop __LINE__
  7486. }
  7487. { .mfi
  7488. nop __LINE__
  7489. FMA_B f116 = f95, f85, f116
  7490. nop __LINE__
  7491. }
  7492. ;;
  7493. { .mfi
  7494. nop __LINE__
  7495. FNMA f113 = f94, f81, f113
  7496. nop __LINE__
  7497. }
  7498. { .mfi
  7499. nop __LINE__
  7500. FNMA f117 = f94, f85, f117
  7501. nop __LINE__
  7502. }
  7503. ;;
  7504. { .mfi
  7505. nop __LINE__
  7506. FMA_B f114 = f95, f83, f114
  7507. nop __LINE__
  7508. }
  7509. { .mfi
  7510. nop __LINE__
  7511. FMA_B f118 = f95, f87, f118
  7512. nop __LINE__
  7513. }
  7514. ;;
  7515. { .mfi
  7516. nop __LINE__
  7517. FNMA f115 = f94, f83, f115
  7518. nop __LINE__
  7519. }
  7520. { .mfi
  7521. nop __LINE__
  7522. FNMA f119 = f94, f87, f119
  7523. nop __LINE__
  7524. }
  7525. ;;
  7526. { .mfi
  7527. nop __LINE__
  7528. FMPY f32 = f108, f96
  7529. nop __LINE__
  7530. }
  7531. { .mfi
  7532. nop __LINE__
  7533. FMPY f36 = f108, f100
  7534. nop __LINE__
  7535. }
  7536. ;;
  7537. { .mfi
  7538. nop __LINE__
  7539. FMPY f33 = f109, f96
  7540. nop __LINE__
  7541. }
  7542. { .mfi
  7543. nop __LINE__
  7544. FMPY f37 = f109, f100
  7545. nop __LINE__
  7546. }
  7547. ;;
  7548. { .mfi
  7549. nop __LINE__
  7550. FMPY f34 = f108, f98
  7551. nop __LINE__
  7552. }
  7553. { .mfi
  7554. nop __LINE__
  7555. FMPY f38 = f108, f102
  7556. nop __LINE__
  7557. }
  7558. ;;
  7559. { .mfi
  7560. nop __LINE__
  7561. FMPY f35 = f109, f98
  7562. nop __LINE__
  7563. }
  7564. { .mfi
  7565. nop __LINE__
  7566. FMPY f39 = f109, f102
  7567. nop __LINE__
  7568. }
  7569. ;;
  7570. { .mfi
  7571. nop __LINE__
  7572. FMA_C f96 = f109, f97, f32
  7573. nop __LINE__
  7574. }
  7575. { .mfi
  7576. nop __LINE__
  7577. FMA_C f100 = f109, f101, f36
  7578. nop __LINE__
  7579. }
  7580. ;;
  7581. { .mfi
  7582. nop __LINE__
  7583. FMA_D f97 = f108, f97, f33
  7584. nop __LINE__
  7585. }
  7586. { .mfi
  7587. nop __LINE__
  7588. FMA_D f101 = f108, f101, f37
  7589. nop __LINE__
  7590. }
  7591. ;;
  7592. { .mfi
  7593. nop __LINE__
  7594. FMA_C f98 = f109, f99, f34
  7595. nop __LINE__
  7596. }
  7597. { .mfi
  7598. nop __LINE__
  7599. FMA_C f102 = f109, f103, f38
  7600. nop __LINE__
  7601. }
  7602. ;;
  7603. { .mfi
  7604. nop __LINE__
  7605. FMA_D f99 = f108, f99, f35
  7606. nop __LINE__
  7607. }
  7608. { .mfi
  7609. nop __LINE__
  7610. FMA_D f103 = f108, f103, f39
  7611. nop __LINE__
  7612. }
  7613. ;;
  7614. { .mfi
  7615. STFD [AOFFSET] = f96, SIZE
  7616. FNMA f112 = f110, f96, f112
  7617. nop __LINE__
  7618. }
  7619. { .mfi
  7620. STFD [AOFFSET2] = f100, SIZE
  7621. FNMA f116 = f110, f100, f116
  7622. nop __LINE__
  7623. }
  7624. ;;
  7625. { .mfi
  7626. STFD [AOFFSET] = f97, SIZE
  7627. FMA_A f113 = f111, f96, f113
  7628. nop __LINE__
  7629. }
  7630. { .mfi
  7631. STFD [AOFFSET2] = f101, SIZE
  7632. FMA_A f117 = f111, f100, f117
  7633. nop __LINE__
  7634. }
  7635. ;;
  7636. { .mfi
  7637. STFD [AOFFSET] = f98, SIZE
  7638. FNMA f114 = f110, f98, f114
  7639. nop __LINE__
  7640. }
  7641. { .mfi
  7642. STFD [AOFFSET2] = f102, SIZE
  7643. FNMA f118 = f110, f102, f118
  7644. nop __LINE__
  7645. }
  7646. ;;
  7647. { .mfi
  7648. STFD [AOFFSET] = f99, 5 * SIZE
  7649. FMA_A f115 = f111, f98, f115
  7650. nop __LINE__
  7651. }
  7652. { .mfi
  7653. STFD [AOFFSET2] = f103, 5 * SIZE
  7654. FMA_A f119 = f111, f102, f119
  7655. nop __LINE__
  7656. }
  7657. ;;
  7658. { .mfi
  7659. STFD [C3 ] = f96, SIZE
  7660. FMA_B f112 = f111, f97, f112
  7661. nop __LINE__
  7662. }
  7663. { .mfi
  7664. STFD [C7 ] = f100, SIZE
  7665. FMA_B f116 = f111, f101, f116
  7666. nop __LINE__
  7667. }
  7668. ;;
  7669. { .mfi
  7670. STFD [C3 ] = f97, SIZE
  7671. FNMA f113 = f110, f97, f113
  7672. nop __LINE__
  7673. }
  7674. { .mfi
  7675. STFD [C7 ] = f101, SIZE
  7676. FNMA f117 = f110, f101, f117
  7677. nop __LINE__
  7678. }
  7679. ;;
  7680. { .mfi
  7681. STFD [C3 ] = f98, SIZE
  7682. FMA_B f114 = f111, f99, f114
  7683. nop __LINE__
  7684. }
  7685. { .mfi
  7686. STFD [C7 ] = f102, SIZE
  7687. FMA_B f118 = f111, f103, f118
  7688. nop __LINE__
  7689. }
  7690. ;;
  7691. { .mfi
  7692. STFD [C3 ] = f99, 5 * SIZE
  7693. FNMA f115 = f110, f99, f115
  7694. nop __LINE__
  7695. }
  7696. { .mfi
  7697. STFD [C7 ] = f103, 5 * SIZE
  7698. FNMA f119 = f110, f103, f119
  7699. nop __LINE__
  7700. }
  7701. ;;
  7702. { .mfi
  7703. nop __LINE__
  7704. FMPY f32 = f126, f112
  7705. nop __LINE__
  7706. }
  7707. { .mfi
  7708. nop __LINE__
  7709. FMPY f36 = f126, f116
  7710. nop __LINE__
  7711. }
  7712. ;;
  7713. { .mfi
  7714. nop __LINE__
  7715. FMPY f33 = f127, f112
  7716. nop __LINE__
  7717. }
  7718. { .mfi
  7719. nop __LINE__
  7720. FMPY f37 = f127, f116
  7721. nop __LINE__
  7722. }
  7723. ;;
  7724. { .mfi
  7725. nop __LINE__
  7726. FMPY f34 = f126, f114
  7727. nop __LINE__
  7728. }
  7729. { .mfi
  7730. nop __LINE__
  7731. FMPY f38 = f126, f118
  7732. nop __LINE__
  7733. }
  7734. ;;
  7735. { .mfi
  7736. nop __LINE__
  7737. FMPY f35 = f127, f114
  7738. nop __LINE__
  7739. }
  7740. { .mfi
  7741. nop __LINE__
  7742. FMPY f39 = f127, f118
  7743. nop __LINE__
  7744. }
  7745. ;;
  7746. { .mfi
  7747. nop __LINE__
  7748. FMA_C f112 = f127, f113, f32
  7749. nop __LINE__
  7750. }
  7751. { .mfi
  7752. nop __LINE__
  7753. FMA_C f116 = f127, f117, f36
  7754. nop __LINE__
  7755. }
  7756. ;;
  7757. { .mfi
  7758. nop __LINE__
  7759. FMA_D f113 = f126, f113, f33
  7760. nop __LINE__
  7761. }
  7762. { .mfi
  7763. nop __LINE__
  7764. FMA_D f117 = f126, f117, f37
  7765. nop __LINE__
  7766. }
  7767. ;;
  7768. { .mfi
  7769. nop __LINE__
  7770. FMA_C f114 = f127, f115, f34
  7771. nop __LINE__
  7772. }
  7773. { .mfi
  7774. nop __LINE__
  7775. FMA_C f118 = f127, f119, f38
  7776. nop __LINE__
  7777. }
  7778. ;;
  7779. { .mfi
  7780. nop __LINE__
  7781. FMA_D f115 = f126, f115, f35
  7782. nop __LINE__
  7783. }
  7784. { .mfi
  7785. nop __LINE__
  7786. FMA_D f119 = f126, f119, f39
  7787. nop __LINE__
  7788. }
  7789. ;;
  7790. { .mmi
  7791. STFD [AOFFSET] = f112, SIZE
  7792. STFD [AOFFSET2] = f116, SIZE
  7793. sub r2 = K, KK
  7794. }
  7795. ;;
  7796. { .mmi
  7797. STFD [AOFFSET] = f113, SIZE
  7798. STFD [AOFFSET2] = f117, SIZE
  7799. mov L = KK
  7800. }
  7801. ;;
  7802. { .mmi
  7803. STFD [AOFFSET] = f114, SIZE
  7804. STFD [AOFFSET2] = f118, SIZE
  7805. shladd r2 = r2, ZBASE_SHIFT, r0
  7806. }
  7807. ;;
  7808. { .mmi
  7809. STFD [AOFFSET] = f115, -27 * SIZE
  7810. STFD [AOFFSET2] = f119
  7811. nop __LINE__
  7812. }
  7813. ;;
  7814. { .mfi
  7815. STFD [C4 ] = f112, SIZE
  7816. mov f64 = f0
  7817. shladd BOFFSET = r2, 2, BOFFSET
  7818. }
  7819. { .mfi
  7820. STFD [C8 ] = f116, SIZE
  7821. mov f65 = f0
  7822. shladd AOFFSET = r2, 2, AOFFSET
  7823. }
  7824. ;;
  7825. { .mfi
  7826. STFD [C4 ] = f113, SIZE
  7827. mov f80 = f0
  7828. cmp.ne p6, p0 = 1, I
  7829. }
  7830. { .mfi
  7831. STFD [C8 ] = f117, SIZE
  7832. mov f81 = f0
  7833. nop __LINE__
  7834. }
  7835. ;;
  7836. { .mfi
  7837. STFD [C4 ] = f114, SIZE
  7838. mov f96 = f0
  7839. adds I = -1, I
  7840. }
  7841. { .mfi
  7842. STFD [C8 ] = f118, SIZE
  7843. mov f97 = f0
  7844. nop __LINE__
  7845. }
  7846. ;;
  7847. { .mfi
  7848. STFD [C4 ] = f115, 5 * SIZE
  7849. mov f112 = f0
  7850. nop __LINE__
  7851. }
  7852. { .mfb
  7853. STFD [C8 ] = f119, 5 * SIZE
  7854. mov f113 = f0
  7855. (p6) br.cond.dptk .L011
  7856. }
  7857. #endif
  7858. #ifdef RT
  7859. { .mfi
  7860. LDFPD f76, f77 = [BOFFSET]
  7861. FMPY f32 = f72, f112
  7862. adds BOFFSET = - 2 * SIZE, BOFFSET
  7863. }
  7864. { .mfi
  7865. nop __LINE__
  7866. FMPY f36 = f72, f116
  7867. nop __LINE__
  7868. }
  7869. ;;
  7870. { .mfi
  7871. LDFPD f78, f79 = [BOFFSET]
  7872. FMPY f33 = f73, f112
  7873. adds BOFFSET = - 4 * SIZE, BOFFSET
  7874. }
  7875. { .mfi
  7876. nop __LINE__
  7877. FMPY f37 = f73, f116
  7878. nop __LINE__
  7879. }
  7880. ;;
  7881. { .mfi
  7882. LDFPD f88, f89 = [BOFFSET]
  7883. FMPY f34 = f72, f114
  7884. adds BOFFSET = - 2 * SIZE, BOFFSET
  7885. }
  7886. { .mfi
  7887. nop __LINE__
  7888. FMPY f38 = f72, f118
  7889. nop __LINE__
  7890. }
  7891. ;;
  7892. { .mfi
  7893. LDFPD f90, f91 = [BOFFSET]
  7894. FMPY f35 = f73, f114
  7895. adds BOFFSET = - 2 * SIZE, BOFFSET
  7896. }
  7897. { .mfi
  7898. nop __LINE__
  7899. FMPY f39 = f73, f118
  7900. nop __LINE__
  7901. }
  7902. ;;
  7903. { .mfi
  7904. LDFPD f92, f93 = [BOFFSET]
  7905. FMA_C f112 = f73, f113, f32
  7906. adds BOFFSET = - 6 * SIZE, BOFFSET
  7907. }
  7908. { .mfi
  7909. nop __LINE__
  7910. FMA_C f116 = f73, f117, f36
  7911. nop __LINE__
  7912. }
  7913. ;;
  7914. { .mfi
  7915. LDFPD f104, f105 = [BOFFSET]
  7916. FMA_D f113 = f72, f113, f33
  7917. adds BOFFSET = - 2 * SIZE, BOFFSET
  7918. }
  7919. { .mfi
  7920. nop __LINE__
  7921. FMA_D f117 = f72, f117, f37
  7922. nop __LINE__
  7923. }
  7924. ;;
  7925. { .mfi
  7926. LDFPD f106, f107 = [BOFFSET]
  7927. FMA_C f114 = f73, f115, f34
  7928. adds BOFFSET = - 8 * SIZE, BOFFSET
  7929. }
  7930. { .mfi
  7931. nop __LINE__
  7932. FMA_C f118 = f73, f119, f38
  7933. nop __LINE__
  7934. }
  7935. ;;
  7936. { .mfi
  7937. LDFPD f120, f121 = [BOFFSET]
  7938. FMA_D f115 = f72, f115, f35
  7939. adds AOFFSET2 = 28 * SIZE, AOFFSET
  7940. }
  7941. { .mfi
  7942. nop __LINE__
  7943. FMA_D f119 = f72, f119, f39
  7944. adds AOFFSET = 24 * SIZE, AOFFSET
  7945. }
  7946. ;;
  7947. { .mfi
  7948. STFD [AOFFSET] = f112, SIZE
  7949. FNMA f96 = f74, f112, f96
  7950. nop __LINE__
  7951. }
  7952. { .mfi
  7953. STFD [AOFFSET2] = f116, SIZE
  7954. FNMA f100 = f74, f116, f100
  7955. nop __LINE__
  7956. }
  7957. ;;
  7958. { .mfi
  7959. STFD [AOFFSET] = f113, SIZE
  7960. FMA_A f97 = f75, f112, f97
  7961. nop __LINE__
  7962. }
  7963. { .mfi
  7964. STFD [AOFFSET2] = f117, SIZE
  7965. FMA_A f101 = f75, f116, f101
  7966. nop __LINE__
  7967. }
  7968. ;;
  7969. { .mfi
  7970. STFD [AOFFSET] = f114, SIZE
  7971. FNMA f98 = f74, f114, f98
  7972. nop __LINE__
  7973. }
  7974. { .mfi
  7975. STFD [AOFFSET2] = f118, SIZE
  7976. FNMA f102 = f74, f118, f102
  7977. nop __LINE__
  7978. }
  7979. ;;
  7980. { .mfi
  7981. STFD [AOFFSET] = f115, -11 * SIZE
  7982. FMA_A f99 = f75, f114, f99
  7983. nop __LINE__
  7984. }
  7985. { .mfi
  7986. STFD [AOFFSET2] = f119, -11 * SIZE
  7987. FMA_A f103 = f75, f118, f103
  7988. nop __LINE__
  7989. }
  7990. ;;
  7991. { .mfi
  7992. STFD [C4 ] = f112, SIZE
  7993. FMA_B f96 = f75, f113, f96
  7994. nop __LINE__
  7995. }
  7996. { .mfi
  7997. STFD [C8 ] = f116, SIZE
  7998. FMA_B f100 = f75, f117, f100
  7999. nop __LINE__
  8000. }
  8001. ;;
  8002. { .mfi
  8003. STFD [C4 ] = f113, SIZE
  8004. FNMA f97 = f74, f113, f97
  8005. nop __LINE__
  8006. }
  8007. { .mfi
  8008. STFD [C8 ] = f117, SIZE
  8009. FNMA f101 = f74, f117, f101
  8010. nop __LINE__
  8011. }
  8012. ;;
  8013. { .mfi
  8014. STFD [C4 ] = f114, SIZE
  8015. FMA_B f98 = f75, f115, f98
  8016. nop __LINE__
  8017. }
  8018. { .mfi
  8019. STFD [C8 ] = f118, SIZE
  8020. FMA_B f102 = f75, f119, f102
  8021. nop __LINE__
  8022. }
  8023. ;;
  8024. { .mfi
  8025. STFD [C4 ] = f115, 5 * SIZE
  8026. FNMA f99 = f74, f115, f99
  8027. nop __LINE__
  8028. }
  8029. { .mfi
  8030. STFD [C8 ] = f119, 5 * SIZE
  8031. FNMA f103 = f74, f119, f103
  8032. nop __LINE__
  8033. }
  8034. ;;
  8035. { .mfi
  8036. nop __LINE__
  8037. FNMA f80 = f76, f112, f80
  8038. nop __LINE__
  8039. }
  8040. { .mfi
  8041. nop __LINE__
  8042. FNMA f84 = f76, f116, f84
  8043. nop __LINE__
  8044. }
  8045. ;;
  8046. { .mfi
  8047. nop __LINE__
  8048. FMA_A f81 = f77, f112, f81
  8049. nop __LINE__
  8050. }
  8051. { .mfi
  8052. nop __LINE__
  8053. FMA_A f85 = f77, f116, f85
  8054. nop __LINE__
  8055. }
  8056. ;;
  8057. { .mfi
  8058. nop __LINE__
  8059. FNMA f82 = f76, f114, f82
  8060. nop __LINE__
  8061. }
  8062. { .mfi
  8063. nop __LINE__
  8064. FNMA f86 = f76, f118, f86
  8065. nop __LINE__
  8066. }
  8067. ;;
  8068. { .mfi
  8069. nop __LINE__
  8070. FMA_A f83 = f77, f114, f83
  8071. nop __LINE__
  8072. }
  8073. { .mfi
  8074. nop __LINE__
  8075. FMA_A f87 = f77, f118, f87
  8076. nop __LINE__
  8077. }
  8078. ;;
  8079. { .mfi
  8080. nop __LINE__
  8081. FMA_B f80 = f77, f113, f80
  8082. nop __LINE__
  8083. }
  8084. { .mfi
  8085. nop __LINE__
  8086. FMA_B f84 = f77, f117, f84
  8087. nop __LINE__
  8088. }
  8089. ;;
  8090. { .mfi
  8091. nop __LINE__
  8092. FNMA f81 = f76, f113, f81
  8093. nop __LINE__
  8094. }
  8095. { .mfi
  8096. nop __LINE__
  8097. FNMA f85 = f76, f117, f85
  8098. nop __LINE__
  8099. }
  8100. ;;
  8101. { .mfi
  8102. nop __LINE__
  8103. FMA_B f82 = f77, f115, f82
  8104. nop __LINE__
  8105. }
  8106. { .mfi
  8107. nop __LINE__
  8108. FMA_B f86 = f77, f119, f86
  8109. nop __LINE__
  8110. }
  8111. ;;
  8112. { .mfi
  8113. nop __LINE__
  8114. FNMA f83 = f76, f115, f83
  8115. nop __LINE__
  8116. }
  8117. { .mfi
  8118. nop __LINE__
  8119. FNMA f87 = f76, f119, f87
  8120. nop __LINE__
  8121. }
  8122. ;;
  8123. { .mfi
  8124. nop __LINE__
  8125. FNMA f64 = f78, f112, f64
  8126. nop __LINE__
  8127. }
  8128. { .mfi
  8129. nop __LINE__
  8130. FNMA f68 = f78, f116, f68
  8131. nop __LINE__
  8132. }
  8133. ;;
  8134. { .mfi
  8135. nop __LINE__
  8136. FMA_A f65 = f79, f112, f65
  8137. nop __LINE__
  8138. }
  8139. { .mfi
  8140. nop __LINE__
  8141. FMA_A f69 = f79, f116, f69
  8142. nop __LINE__
  8143. }
  8144. ;;
  8145. { .mfi
  8146. nop __LINE__
  8147. FNMA f66 = f78, f114, f66
  8148. nop __LINE__
  8149. }
  8150. { .mfi
  8151. nop __LINE__
  8152. FNMA f70 = f78, f118, f70
  8153. nop __LINE__
  8154. }
  8155. ;;
  8156. { .mfi
  8157. nop __LINE__
  8158. FMA_A f67 = f79, f114, f67
  8159. nop __LINE__
  8160. }
  8161. { .mfi
  8162. nop __LINE__
  8163. FMA_A f71 = f79, f118, f71
  8164. nop __LINE__
  8165. }
  8166. ;;
  8167. { .mfi
  8168. nop __LINE__
  8169. FMA_B f64 = f79, f113, f64
  8170. nop __LINE__
  8171. }
  8172. { .mfi
  8173. nop __LINE__
  8174. FMA_B f68 = f79, f117, f68
  8175. nop __LINE__
  8176. }
  8177. ;;
  8178. { .mfi
  8179. nop __LINE__
  8180. FNMA f65 = f78, f113, f65
  8181. nop __LINE__
  8182. }
  8183. { .mfi
  8184. nop __LINE__
  8185. FNMA f69 = f78, f117, f69
  8186. nop __LINE__
  8187. }
  8188. ;;
  8189. { .mfi
  8190. nop __LINE__
  8191. FMA_B f66 = f79, f115, f66
  8192. nop __LINE__
  8193. }
  8194. { .mfi
  8195. nop __LINE__
  8196. FMA_B f70 = f79, f119, f70
  8197. nop __LINE__
  8198. }
  8199. ;;
  8200. { .mfi
  8201. nop __LINE__
  8202. FNMA f67 = f78, f115, f67
  8203. nop __LINE__
  8204. }
  8205. { .mfi
  8206. nop __LINE__
  8207. FNMA f71 = f78, f119, f71
  8208. nop __LINE__
  8209. }
  8210. ;;
  8211. { .mfi
  8212. nop __LINE__
  8213. FMPY f32 = f88, f96
  8214. nop __LINE__
  8215. }
  8216. { .mfi
  8217. nop __LINE__
  8218. FMPY f36 = f88, f100
  8219. nop __LINE__
  8220. }
  8221. ;;
  8222. { .mfi
  8223. nop __LINE__
  8224. FMPY f33 = f89, f96
  8225. nop __LINE__
  8226. }
  8227. { .mfi
  8228. nop __LINE__
  8229. FMPY f37 = f89, f100
  8230. nop __LINE__
  8231. }
  8232. ;;
  8233. { .mfi
  8234. nop __LINE__
  8235. FMPY f34 = f88, f98
  8236. nop __LINE__
  8237. }
  8238. { .mfi
  8239. nop __LINE__
  8240. FMPY f38 = f88, f102
  8241. nop __LINE__
  8242. }
  8243. ;;
  8244. { .mfi
  8245. nop __LINE__
  8246. FMPY f35 = f89, f98
  8247. nop __LINE__
  8248. }
  8249. { .mfi
  8250. nop __LINE__
  8251. FMPY f39 = f89, f102
  8252. nop __LINE__
  8253. }
  8254. ;;
  8255. { .mfi
  8256. nop __LINE__
  8257. FMA_C f96 = f89, f97, f32
  8258. nop __LINE__
  8259. }
  8260. { .mfi
  8261. nop __LINE__
  8262. FMA_C f100 = f89, f101, f36
  8263. nop __LINE__
  8264. }
  8265. ;;
  8266. { .mfi
  8267. nop __LINE__
  8268. FMA_D f97 = f88, f97, f33
  8269. nop __LINE__
  8270. }
  8271. { .mfi
  8272. nop __LINE__
  8273. FMA_D f101 = f88, f101, f37
  8274. nop __LINE__
  8275. }
  8276. ;;
  8277. { .mfi
  8278. nop __LINE__
  8279. FMA_C f98 = f89, f99, f34
  8280. nop __LINE__
  8281. }
  8282. { .mfi
  8283. nop __LINE__
  8284. FMA_C f102 = f89, f103, f38
  8285. nop __LINE__
  8286. }
  8287. ;;
  8288. { .mfi
  8289. nop __LINE__
  8290. FMA_D f99 = f88, f99, f35
  8291. nop __LINE__
  8292. }
  8293. { .mfi
  8294. nop __LINE__
  8295. FMA_D f103 = f88, f103, f39
  8296. nop __LINE__
  8297. }
  8298. ;;
  8299. { .mfi
  8300. STFD [AOFFSET] = f96, SIZE
  8301. FNMA f80 = f90, f96, f80
  8302. nop __LINE__
  8303. }
  8304. { .mfi
  8305. STFD [AOFFSET2] = f100, SIZE
  8306. FNMA f84 = f90, f100, f84
  8307. nop __LINE__
  8308. }
  8309. ;;
  8310. { .mfi
  8311. STFD [AOFFSET] = f97, SIZE
  8312. FMA_A f81 = f91, f96, f81
  8313. nop __LINE__
  8314. }
  8315. { .mfi
  8316. STFD [AOFFSET2] = f101, SIZE
  8317. FMA_A f85 = f91, f100, f85
  8318. nop __LINE__
  8319. }
  8320. ;;
  8321. { .mfi
  8322. STFD [AOFFSET] = f98, SIZE
  8323. FNMA f82 = f90, f98, f82
  8324. nop __LINE__
  8325. }
  8326. { .mfi
  8327. STFD [AOFFSET2] = f102, SIZE
  8328. FNMA f86 = f90, f102, f86
  8329. nop __LINE__
  8330. }
  8331. ;;
  8332. { .mfi
  8333. STFD [AOFFSET] = f99, -11 * SIZE
  8334. FMA_A f83 = f91, f98, f83
  8335. nop __LINE__
  8336. }
  8337. { .mfi
  8338. STFD [AOFFSET2] = f103, -11 * SIZE
  8339. FMA_A f87 = f91, f102, f87
  8340. nop __LINE__
  8341. }
  8342. ;;
  8343. { .mfi
  8344. STFD [C3 ] = f96, SIZE
  8345. FMA_B f80 = f91, f97, f80
  8346. nop __LINE__
  8347. }
  8348. { .mfi
  8349. STFD [C7 ] = f100, SIZE
  8350. FMA_B f84 = f91, f101, f84
  8351. nop __LINE__
  8352. }
  8353. ;;
  8354. { .mfi
  8355. STFD [C3 ] = f97, SIZE
  8356. FNMA f81 = f90, f97, f81
  8357. nop __LINE__
  8358. }
  8359. { .mfi
  8360. STFD [C7 ] = f101, SIZE
  8361. FNMA f85 = f90, f101, f85
  8362. nop __LINE__
  8363. }
  8364. ;;
  8365. { .mfi
  8366. STFD [C3 ] = f98, SIZE
  8367. FMA_B f82 = f91, f99, f82
  8368. nop __LINE__
  8369. }
  8370. { .mfi
  8371. STFD [C7 ] = f102, SIZE
  8372. FMA_B f86 = f91, f103, f86
  8373. nop __LINE__
  8374. }
  8375. ;;
  8376. { .mfi
  8377. STFD [C3 ] = f99, 5 * SIZE
  8378. FNMA f83 = f90, f99, f83
  8379. nop __LINE__
  8380. }
  8381. { .mfi
  8382. STFD [C7 ] = f103, 5 * SIZE
  8383. FNMA f87 = f90, f103, f87
  8384. nop __LINE__
  8385. }
  8386. ;;
  8387. { .mfi
  8388. nop __LINE__
  8389. FNMA f64 = f92, f96, f64
  8390. nop __LINE__
  8391. }
  8392. { .mfi
  8393. nop __LINE__
  8394. FNMA f68 = f92, f100, f68
  8395. nop __LINE__
  8396. }
  8397. ;;
  8398. { .mfi
  8399. nop __LINE__
  8400. FMA_A f65 = f93, f96, f65
  8401. nop __LINE__
  8402. }
  8403. { .mfi
  8404. nop __LINE__
  8405. FMA_A f69 = f93, f100, f69
  8406. nop __LINE__
  8407. }
  8408. ;;
  8409. { .mfi
  8410. nop __LINE__
  8411. FNMA f66 = f92, f98, f66
  8412. nop __LINE__
  8413. }
  8414. { .mfi
  8415. nop __LINE__
  8416. FNMA f70 = f92, f102, f70
  8417. nop __LINE__
  8418. }
  8419. ;;
  8420. { .mfi
  8421. nop __LINE__
  8422. FMA_A f67 = f93, f98, f67
  8423. nop __LINE__
  8424. }
  8425. { .mfi
  8426. nop __LINE__
  8427. FMA_A f71 = f93, f102, f71
  8428. nop __LINE__
  8429. }
  8430. ;;
  8431. { .mfi
  8432. nop __LINE__
  8433. FMA_B f64 = f93, f97, f64
  8434. nop __LINE__
  8435. }
  8436. { .mfi
  8437. nop __LINE__
  8438. FMA_B f68 = f93, f101, f68
  8439. nop __LINE__
  8440. }
  8441. ;;
  8442. { .mfi
  8443. nop __LINE__
  8444. FNMA f65 = f92, f97, f65
  8445. nop __LINE__
  8446. }
  8447. { .mfi
  8448. nop __LINE__
  8449. FNMA f69 = f92, f101, f69
  8450. nop __LINE__
  8451. }
  8452. ;;
  8453. { .mfi
  8454. nop __LINE__
  8455. FMA_B f66 = f93, f99, f66
  8456. nop __LINE__
  8457. }
  8458. { .mfi
  8459. nop __LINE__
  8460. FMA_B f70 = f93, f103, f70
  8461. nop __LINE__
  8462. }
  8463. ;;
  8464. { .mfi
  8465. nop __LINE__
  8466. FNMA f67 = f92, f99, f67
  8467. nop __LINE__
  8468. }
  8469. { .mfi
  8470. nop __LINE__
  8471. FNMA f71 = f92, f103, f71
  8472. nop __LINE__
  8473. }
  8474. ;;
  8475. { .mfi
  8476. nop __LINE__
  8477. FMPY f32 = f104, f80
  8478. nop __LINE__
  8479. }
  8480. { .mfi
  8481. nop __LINE__
  8482. FMPY f36 = f104, f84
  8483. nop __LINE__
  8484. }
  8485. ;;
  8486. { .mfi
  8487. nop __LINE__
  8488. FMPY f33 = f105, f80
  8489. nop __LINE__
  8490. }
  8491. { .mfi
  8492. nop __LINE__
  8493. FMPY f37 = f105, f84
  8494. nop __LINE__
  8495. }
  8496. ;;
  8497. { .mfi
  8498. nop __LINE__
  8499. FMPY f34 = f104, f82
  8500. nop __LINE__
  8501. }
  8502. { .mfi
  8503. nop __LINE__
  8504. FMPY f38 = f104, f86
  8505. nop __LINE__
  8506. }
  8507. ;;
  8508. { .mfi
  8509. nop __LINE__
  8510. FMPY f35 = f105, f82
  8511. nop __LINE__
  8512. }
  8513. { .mfi
  8514. nop __LINE__
  8515. FMPY f39 = f105, f86
  8516. nop __LINE__
  8517. }
  8518. ;;
  8519. { .mfi
  8520. nop __LINE__
  8521. FMA_C f80 = f105, f81, f32
  8522. nop __LINE__
  8523. }
  8524. { .mfi
  8525. nop __LINE__
  8526. FMA_C f84 = f105, f85, f36
  8527. nop __LINE__
  8528. }
  8529. ;;
  8530. { .mfi
  8531. nop __LINE__
  8532. FMA_D f81 = f104, f81, f33
  8533. nop __LINE__
  8534. }
  8535. { .mfi
  8536. nop __LINE__
  8537. FMA_D f85 = f104, f85, f37
  8538. nop __LINE__
  8539. }
  8540. ;;
  8541. { .mfi
  8542. nop __LINE__
  8543. FMA_C f82 = f105, f83, f34
  8544. nop __LINE__
  8545. }
  8546. { .mfi
  8547. nop __LINE__
  8548. FMA_C f86 = f105, f87, f38
  8549. nop __LINE__
  8550. }
  8551. ;;
  8552. { .mfi
  8553. nop __LINE__
  8554. FMA_D f83 = f104, f83, f35
  8555. nop __LINE__
  8556. }
  8557. { .mfi
  8558. nop __LINE__
  8559. FMA_D f87 = f104, f87, f39
  8560. nop __LINE__
  8561. }
  8562. ;;
  8563. { .mfi
  8564. STFD [AOFFSET] = f80, SIZE
  8565. FNMA f64 = f106, f80, f64
  8566. nop __LINE__
  8567. }
  8568. { .mfi
  8569. STFD [AOFFSET2] = f84, SIZE
  8570. FNMA f68 = f106, f84, f68
  8571. nop __LINE__
  8572. }
  8573. ;;
  8574. { .mfi
  8575. STFD [AOFFSET] = f81, SIZE
  8576. FMA_A f65 = f107, f80, f65
  8577. nop __LINE__
  8578. }
  8579. { .mfi
  8580. STFD [AOFFSET2] = f85, SIZE
  8581. FMA_A f69 = f107, f84, f69
  8582. nop __LINE__
  8583. }
  8584. ;;
  8585. { .mfi
  8586. STFD [AOFFSET] = f82, SIZE
  8587. FNMA f66 = f106, f82, f66
  8588. nop __LINE__
  8589. }
  8590. { .mfi
  8591. STFD [AOFFSET2] = f86, SIZE
  8592. FNMA f70 = f106, f86, f70
  8593. nop __LINE__
  8594. }
  8595. ;;
  8596. { .mfi
  8597. STFD [AOFFSET] = f83, -11 * SIZE
  8598. FMA_A f67 = f107, f82, f67
  8599. nop __LINE__
  8600. }
  8601. { .mfi
  8602. STFD [AOFFSET2] = f87, -11 * SIZE
  8603. FMA_A f71 = f107, f86, f71
  8604. nop __LINE__
  8605. }
  8606. ;;
  8607. { .mfi
  8608. STFD [C2 ] = f80, SIZE
  8609. FMA_B f64 = f107, f81, f64
  8610. nop __LINE__
  8611. }
  8612. { .mfi
  8613. STFD [C6 ] = f84, SIZE
  8614. FMA_B f68 = f107, f85, f68
  8615. nop __LINE__
  8616. }
  8617. ;;
  8618. { .mfi
  8619. STFD [C2 ] = f81, SIZE
  8620. FNMA f65 = f106, f81, f65
  8621. nop __LINE__
  8622. }
  8623. { .mfi
  8624. STFD [C6 ] = f85, SIZE
  8625. FNMA f69 = f106, f85, f69
  8626. nop __LINE__
  8627. }
  8628. ;;
  8629. { .mfi
  8630. STFD [C2 ] = f82, SIZE
  8631. FMA_B f66 = f107, f83, f66
  8632. nop __LINE__
  8633. }
  8634. { .mfi
  8635. STFD [C6 ] = f86, SIZE
  8636. FMA_B f70 = f107, f87, f70
  8637. nop __LINE__
  8638. }
  8639. ;;
  8640. { .mfi
  8641. STFD [C2 ] = f83, 5 * SIZE
  8642. FNMA f67 = f106, f83, f67
  8643. nop __LINE__
  8644. }
  8645. { .mfi
  8646. STFD [C6 ] = f87, 5 * SIZE
  8647. FNMA f71 = f106, f87, f71
  8648. nop __LINE__
  8649. }
  8650. ;;
  8651. { .mfi
  8652. nop __LINE__
  8653. FMPY f32 = f120, f64
  8654. nop __LINE__
  8655. }
  8656. { .mfi
  8657. nop __LINE__
  8658. FMPY f36 = f120, f68
  8659. nop __LINE__
  8660. }
  8661. ;;
  8662. { .mfi
  8663. nop __LINE__
  8664. FMPY f33 = f121, f64
  8665. nop __LINE__
  8666. }
  8667. { .mfi
  8668. nop __LINE__
  8669. FMPY f37 = f121, f68
  8670. nop __LINE__
  8671. }
  8672. ;;
  8673. { .mfi
  8674. nop __LINE__
  8675. FMPY f34 = f120, f66
  8676. nop __LINE__
  8677. }
  8678. { .mfi
  8679. nop __LINE__
  8680. FMPY f38 = f120, f70
  8681. nop __LINE__
  8682. }
  8683. ;;
  8684. { .mfi
  8685. nop __LINE__
  8686. FMPY f35 = f121, f66
  8687. nop __LINE__
  8688. }
  8689. { .mfi
  8690. nop __LINE__
  8691. FMPY f39 = f121, f70
  8692. nop __LINE__
  8693. }
  8694. ;;
  8695. { .mfi
  8696. nop __LINE__
  8697. FMA_C f64 = f121, f65, f32
  8698. nop __LINE__
  8699. }
  8700. { .mfi
  8701. nop __LINE__
  8702. FMA_C f68 = f121, f69, f36
  8703. nop __LINE__
  8704. }
  8705. ;;
  8706. { .mfi
  8707. nop __LINE__
  8708. FMA_D f65 = f120, f65, f33
  8709. nop __LINE__
  8710. }
  8711. { .mfi
  8712. nop __LINE__
  8713. FMA_D f69 = f120, f69, f37
  8714. nop __LINE__
  8715. }
  8716. ;;
  8717. { .mfi
  8718. nop __LINE__
  8719. FMA_C f66 = f121, f67, f34
  8720. nop __LINE__
  8721. }
  8722. { .mfi
  8723. nop __LINE__
  8724. FMA_C f70 = f121, f71, f38
  8725. nop __LINE__
  8726. }
  8727. ;;
  8728. { .mfi
  8729. nop __LINE__
  8730. FMA_D f67 = f120, f67, f35
  8731. nop __LINE__
  8732. }
  8733. { .mfi
  8734. nop __LINE__
  8735. FMA_D f71 = f120, f71, f39
  8736. nop __LINE__
  8737. }
  8738. ;;
  8739. { .mmi
  8740. STFD [AOFFSET] = f64, SIZE
  8741. STFD [AOFFSET2] = f68, SIZE
  8742. shladd r2 = K, ZBASE_SHIFT, r0
  8743. }
  8744. ;;
  8745. { .mmi
  8746. STFD [AOFFSET] = f65, SIZE
  8747. STFD [AOFFSET2] = f69, SIZE
  8748. shladd AORIG = r2, 2, AORIG
  8749. }
  8750. ;;
  8751. { .mmi
  8752. STFD [AOFFSET] = f66, SIZE
  8753. STFD [AOFFSET2] = f70, SIZE
  8754. nop __LINE__
  8755. }
  8756. ;;
  8757. { .mmi
  8758. STFD [AOFFSET] = f67, -3 * SIZE
  8759. STFD [AOFFSET2] = f71
  8760. nop __LINE__
  8761. }
  8762. ;;
  8763. { .mfi
  8764. STFD [C1 ] = f64, SIZE
  8765. mov f64 = f0
  8766. cmp.ne p6, p0 = 1, I
  8767. }
  8768. { .mfi
  8769. STFD [C5 ] = f68, SIZE
  8770. mov f81 = f0
  8771. nop __LINE__
  8772. }
  8773. ;;
  8774. { .mfi
  8775. STFD [C1 ] = f65, SIZE
  8776. mov f65 = f0
  8777. nop __LINE__
  8778. }
  8779. { .mfi
  8780. STFD [C5 ] = f69, SIZE
  8781. mov f96 = f0
  8782. nop __LINE__
  8783. }
  8784. ;;
  8785. { .mfi
  8786. STFD [C1 ] = f66, SIZE
  8787. mov f80 = f0
  8788. sub L = K, KK
  8789. }
  8790. { .mfi
  8791. STFD [C5 ] = f70, SIZE
  8792. mov f97 = f0
  8793. nop __LINE__
  8794. }
  8795. ;;
  8796. { .mfi
  8797. STFD [C1 ] = f67, 5 * SIZE
  8798. mov f112 = f0
  8799. adds I = -1, I
  8800. }
  8801. { .mfb
  8802. STFD [C5 ] = f71, 5 * SIZE
  8803. mov f113 = f0
  8804. (p6) br.cond.dptk .L011
  8805. }
  8806. ;;
  8807. #endif
  8808. .L020:
  8809. { .mib
  8810. #if defined(LT) || defined(RN)
  8811. mov L = KK
  8812. #else
  8813. sub L = K, KK
  8814. #endif
  8815. tbit.z p6, p7 = M, 1
  8816. (p6) br.cond.dptk .L030
  8817. }
  8818. ;;
  8819. { .mmi
  8820. cmp.ne p7, p0 = r0, L
  8821. adds BOFFSET = 0 * SIZE, B
  8822. shl r2 = K, 1 + ZBASE_SHIFT
  8823. }
  8824. { .mmi
  8825. shladd r3 = KK, ZBASE_SHIFT, r0
  8826. nop __LINE__
  8827. nop __LINE__
  8828. }
  8829. ;;
  8830. #if defined(LT) || defined(RN)
  8831. { .mfb
  8832. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  8833. mov f66 = f0
  8834. nop __LINE__
  8835. }
  8836. { .mmf
  8837. nop __LINE__
  8838. nop __LINE__
  8839. mov f67 = f0
  8840. }
  8841. ;;
  8842. #else
  8843. { .mfi
  8844. shladd BOFFSET = r3, 2, B
  8845. mov f66 = f0
  8846. #ifdef LN
  8847. sub AORIG = AORIG, r2
  8848. #else
  8849. nop __LINE__
  8850. #endif
  8851. }
  8852. ;;
  8853. { .mfi
  8854. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  8855. mov f67 = f0
  8856. shladd AOFFSET = r3, 1, AORIG
  8857. }
  8858. ;;
  8859. #endif
  8860. ;;
  8861. adds L = 1, L
  8862. ;;
  8863. { .mfi
  8864. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  8865. mov f82 = f0
  8866. tbit.z p12, p0 = L, 0
  8867. }
  8868. { .mfi
  8869. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  8870. mov f83 = f0
  8871. shr L = L, 1
  8872. }
  8873. ;;
  8874. { .mfi
  8875. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  8876. mov f98 = f0
  8877. adds L = -1, L
  8878. }
  8879. { .mfi
  8880. (p7) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  8881. mov f99 = f0
  8882. cmp.eq p3, p0 = r0, r0
  8883. }
  8884. ;;
  8885. { .mfi
  8886. (p7) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  8887. mov f114 = f0
  8888. mov ar.lc = L
  8889. }
  8890. { .mfi
  8891. adds PREA = (PREFETCHSIZE + 0) * SIZE, AOFFSET
  8892. mov f115 = f0
  8893. nop __LINE__
  8894. }
  8895. ;;
  8896. cmp.eq p6, p0 = -1, L
  8897. (p6) br.cond.dpnt .L028
  8898. ;;
  8899. .align 16
  8900. .L022:
  8901. { .mfi
  8902. lfetch.nt1 [PREA], 8 * SIZE
  8903. FMA f64 = f32, f48, f64 // A1 * B1
  8904. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  8905. }
  8906. { .mfi
  8907. nop __LINE__
  8908. FMA_B f65 = f32, f49, f65 // A1 * B2
  8909. (p12) cmp.ne p3, p0 = 0, L
  8910. }
  8911. ;;
  8912. { .mfi
  8913. lfetch.nt1 [PREB], 16 * SIZE
  8914. FMA f80 = f32, f50, f80 // A1 * B3
  8915. cmp.ne p4, p5 = 0, L
  8916. }
  8917. { .mfb
  8918. nop __LINE__
  8919. FMA_B f81 = f32, f51, f81 // A1 * B4
  8920. nop __LINE__
  8921. }
  8922. ;;
  8923. { .mfb
  8924. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  8925. FMA f96 = f32, f52, f96 // A1 * B5
  8926. nop __LINE__
  8927. }
  8928. { .mfb
  8929. nop __LINE__
  8930. FMA_B f97 = f32, f53, f97 // A1 * B6
  8931. nop __LINE__
  8932. }
  8933. ;;
  8934. { .mfb
  8935. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  8936. FMA f112 = f32, f54, f112 // A1 * B7
  8937. nop __LINE__
  8938. }
  8939. { .mfb
  8940. nop __LINE__
  8941. FMA_B f113 = f32, f55, f113 // A1 * B8
  8942. nop __LINE__
  8943. }
  8944. ;;
  8945. { .mfb
  8946. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  8947. FMA f65 = f33, f48, f65 // A2 * B1
  8948. nop __LINE__
  8949. }
  8950. { .mfb
  8951. nop __LINE__
  8952. FMA_A f64 = f33, f49, f64 // A2 * B2
  8953. nop __LINE__
  8954. }
  8955. ;;
  8956. { .mfb
  8957. (p3) LDFPD f60, f61 = [BOFFSET], 2 * SIZE
  8958. FMA f81 = f33, f50, f81 // A2 * B3
  8959. nop __LINE__
  8960. }
  8961. { .mfb
  8962. nop __LINE__
  8963. FMA_A f80 = f33, f51, f80 // A2 * B4
  8964. nop __LINE__
  8965. }
  8966. ;;
  8967. { .mfb
  8968. (p3) LDFPD f62, f63 = [BOFFSET], 2 * SIZE
  8969. FMA f97 = f33, f52, f97 // A2 * B5
  8970. nop __LINE__
  8971. }
  8972. { .mfb
  8973. nop __LINE__
  8974. FMA_A f96 = f33, f53, f96 // A2 * B6
  8975. nop __LINE__
  8976. }
  8977. ;;
  8978. { .mfb
  8979. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  8980. FMA f113 = f33, f54, f113 // A2 * B7
  8981. nop __LINE__
  8982. }
  8983. { .mfb
  8984. nop __LINE__
  8985. FMA_A f112 = f33, f55, f112 // A2 * B8
  8986. nop __LINE__
  8987. }
  8988. ;;
  8989. { .mfb
  8990. nop __LINE__
  8991. FMA f66 = f34, f48, f66 // A3 * B1
  8992. nop __LINE__
  8993. }
  8994. { .mfb
  8995. nop __LINE__
  8996. FMA_B f67 = f34, f49, f67 // A3 * B2
  8997. nop __LINE__
  8998. }
  8999. ;;
  9000. { .mfb
  9001. nop __LINE__
  9002. FMA f82 = f34, f50, f82 // A3 * B3
  9003. nop __LINE__
  9004. }
  9005. { .mfb
  9006. nop __LINE__
  9007. FMA_B f83 = f34, f51, f83 // A3 * B4
  9008. nop __LINE__
  9009. }
  9010. ;;
  9011. { .mfb
  9012. nop __LINE__
  9013. FMA f98 = f34, f52, f98 // A3 * B5
  9014. nop __LINE__
  9015. }
  9016. { .mfb
  9017. nop __LINE__
  9018. FMA_B f99 = f34, f53, f99 // A3 * B6
  9019. nop __LINE__
  9020. }
  9021. ;;
  9022. { .mfb
  9023. nop __LINE__
  9024. FMA f114 = f34, f54, f114 // A3 * B7
  9025. nop __LINE__
  9026. }
  9027. { .mfb
  9028. nop __LINE__
  9029. FMA_B f115 = f34, f55, f115 // A3 * B8
  9030. nop __LINE__
  9031. }
  9032. ;;
  9033. { .mfb
  9034. nop __LINE__
  9035. FMA f67 = f35, f48, f67 // A4 * B1
  9036. nop __LINE__
  9037. }
  9038. { .mfb
  9039. nop __LINE__
  9040. FMA_A f66 = f35, f49, f66 // A4 * B2
  9041. nop __LINE__
  9042. }
  9043. ;;
  9044. { .mfb
  9045. nop __LINE__
  9046. FMA f83 = f35, f50, f83 // A4 * B3
  9047. nop __LINE__
  9048. }
  9049. { .mfb
  9050. nop __LINE__
  9051. FMA_A f82 = f35, f51, f82 // A4 * B4
  9052. nop __LINE__
  9053. }
  9054. ;;
  9055. { .mfb
  9056. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  9057. FMA f99 = f35, f52, f99 // A4 * B5
  9058. nop __LINE__
  9059. }
  9060. { .mfb
  9061. nop __LINE__
  9062. FMA_A f98 = f35, f53, f98 // A4 * B6
  9063. nop __LINE__
  9064. }
  9065. ;;
  9066. { .mfb
  9067. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  9068. FMA f115 = f35, f54, f115 // A4 * B7
  9069. nop __LINE__
  9070. }
  9071. { .mfb
  9072. nop __LINE__
  9073. FMA_A f114 = f35, f55, f114 // A4 * B8
  9074. nop __LINE__
  9075. }
  9076. ;;
  9077. { .mfb
  9078. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  9079. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  9080. nop __LINE__
  9081. }
  9082. { .mfb
  9083. nop __LINE__
  9084. (p3) FMA_B f65 = f40, f57, f65 // A1 * B2
  9085. nop __LINE__
  9086. }
  9087. ;;
  9088. { .mfb
  9089. (p4) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  9090. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  9091. nop __LINE__
  9092. }
  9093. { .mfb
  9094. nop __LINE__
  9095. (p3) FMA_B f81 = f40, f59, f81 // A1 * B4
  9096. nop __LINE__
  9097. }
  9098. ;;
  9099. { .mfb
  9100. nop __LINE__
  9101. (p3) FMA f96 = f40, f60, f96 // A1 * B5
  9102. nop __LINE__
  9103. }
  9104. { .mfb
  9105. nop __LINE__
  9106. (p3) FMA_B f97 = f40, f61, f97 // A1 * B6
  9107. nop __LINE__
  9108. }
  9109. ;;
  9110. { .mfb
  9111. nop __LINE__
  9112. (p3) FMA f112 = f40, f62, f112 // A1 * B7
  9113. nop __LINE__
  9114. }
  9115. { .mfb
  9116. nop __LINE__
  9117. (p3) FMA_B f113 = f40, f63, f113 // A1 * B8
  9118. nop __LINE__
  9119. }
  9120. ;;
  9121. { .mfb
  9122. (p4) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  9123. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  9124. nop __LINE__
  9125. }
  9126. { .mfb
  9127. (p3) FMA_A f64 = f41, f57, f64 // A2 * B2
  9128. nop __LINE__
  9129. }
  9130. { .mfb
  9131. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  9132. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  9133. nop __LINE__
  9134. }
  9135. { .mfb
  9136. (p3) FMA_A f80 = f41, f59, f80 // A2 * B4
  9137. nop __LINE__
  9138. }
  9139. ;;
  9140. { .mfb
  9141. nop __LINE__
  9142. (p3) FMA f97 = f41, f60, f97 // A2 * B5
  9143. nop __LINE__
  9144. }
  9145. { .mfb
  9146. nop __LINE__
  9147. (p3) FMA_A f96 = f41, f61, f96 // A2 * B6
  9148. nop __LINE__
  9149. }
  9150. ;;
  9151. { .mfb
  9152. nop __LINE__
  9153. (p3) FMA f113 = f41, f62, f113 // A2 * B7
  9154. nop __LINE__
  9155. }
  9156. { .mfb
  9157. nop __LINE__
  9158. (p3) FMA_A f112 = f41, f63, f112 // A2 * B8
  9159. nop __LINE__
  9160. }
  9161. ;;
  9162. { .mfb
  9163. nop __LINE__
  9164. (p3) FMA f66 = f42, f56, f66 // A3 * B1
  9165. nop __LINE__
  9166. }
  9167. { .mfb
  9168. nop __LINE__
  9169. (p3) FMA_B f67 = f42, f57, f67 // A3 * B2
  9170. nop __LINE__
  9171. }
  9172. ;;
  9173. { .mfb
  9174. nop __LINE__
  9175. (p3) FMA f82 = f42, f58, f82 // A3 * B3
  9176. nop __LINE__
  9177. }
  9178. { .mfb
  9179. nop __LINE__
  9180. (p3) FMA_B f83 = f42, f59, f83 // A3 * B4
  9181. nop __LINE__
  9182. }
  9183. ;;
  9184. { .mfb
  9185. nop __LINE__
  9186. (p3) FMA f98 = f42, f60, f98 // A3 * B5
  9187. nop __LINE__
  9188. }
  9189. { .mfb
  9190. nop __LINE__
  9191. (p3) FMA_B f99 = f42, f61, f99 // A3 * B6
  9192. nop __LINE__
  9193. }
  9194. ;;
  9195. { .mfb
  9196. nop __LINE__
  9197. (p3) FMA f114 = f42, f62, f114 // A3 * B7
  9198. nop __LINE__
  9199. }
  9200. { .mfb
  9201. nop __LINE__
  9202. (p3) FMA_B f115 = f42, f63, f115 // A3 * B8
  9203. nop __LINE__
  9204. }
  9205. ;;
  9206. { .mfb
  9207. nop __LINE__
  9208. (p3) FMA f67 = f43, f56, f67 // A4 * B1
  9209. nop __LINE__
  9210. }
  9211. { .mfb
  9212. nop __LINE__
  9213. (p3) FMA_A f66 = f43, f57, f66 // A4 * B2
  9214. nop __LINE__
  9215. }
  9216. ;;
  9217. { .mfb
  9218. nop __LINE__
  9219. (p3) FMA f83 = f43, f58, f83 // A4 * B3
  9220. nop __LINE__
  9221. }
  9222. { .mfb
  9223. nop __LINE__
  9224. (p3) FMA_A f82 = f43, f59, f82 // A4 * B4
  9225. nop __LINE__
  9226. }
  9227. ;;
  9228. { .mfb
  9229. nop __LINE__
  9230. (p3) FMA f99 = f43, f60, f99 // A4 * B5
  9231. nop __LINE__
  9232. }
  9233. { .mfb
  9234. nop __LINE__
  9235. (p3) FMA_A f98 = f43, f61, f98 // A4 * B6
  9236. nop __LINE__
  9237. }
  9238. ;;
  9239. { .mfi
  9240. nop __LINE__
  9241. (p3) FMA f115 = f43, f62, f115 // A4 * B7
  9242. adds L = -1, L
  9243. }
  9244. { .mfb
  9245. nop __LINE__
  9246. (p3) FMA_A f114 = f43, f63, f114 // A4 * B8
  9247. br.cloop.sptk.few .L022
  9248. }
  9249. ;;
  9250. .L028:
  9251. #if defined(LN) || defined(RT)
  9252. #ifdef LN
  9253. adds r2 = -2, KK
  9254. #else
  9255. adds r2 = -4, KK
  9256. #endif
  9257. ;;
  9258. shladd r2 = r2, ZBASE_SHIFT, r0
  9259. ;;
  9260. shladd AOFFSET = r2, 1, AORIG
  9261. shladd BOFFSET = r2, 2, B
  9262. ;;
  9263. #endif
  9264. #if defined(LN) || defined(LT)
  9265. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  9266. ;;
  9267. LDFPD f74, f75 = [BOFFSET], 2 * SIZE
  9268. ;;
  9269. LDFPD f88, f89 = [BOFFSET], 2 * SIZE
  9270. ;;
  9271. LDFPD f90, f91 = [BOFFSET], 2 * SIZE
  9272. ;;
  9273. LDFPD f104, f105 = [BOFFSET], 2 * SIZE
  9274. ;;
  9275. LDFPD f106, f107 = [BOFFSET], 2 * SIZE
  9276. ;;
  9277. { .mfi
  9278. LDFPD f120, f121 = [BOFFSET], 2 * SIZE
  9279. FSUB f64 = f72, f64
  9280. nop __LINE__
  9281. }
  9282. { .mfi
  9283. nop __LINE__
  9284. FSUB_A f65 = f73, f65
  9285. nop __LINE__
  9286. }
  9287. ;;
  9288. { .mfi
  9289. LDFPD f122, f123 = [BOFFSET]
  9290. FSUB f80 = f74, f80
  9291. adds BOFFSET = -14 * SIZE, BOFFSET
  9292. }
  9293. { .mfi
  9294. nop __LINE__
  9295. FSUB_A f81 = f75, f81
  9296. nop __LINE__
  9297. }
  9298. ;;
  9299. { .mfi
  9300. nop __LINE__
  9301. FSUB f96 = f88, f96
  9302. nop __LINE__
  9303. }
  9304. { .mfi
  9305. nop __LINE__
  9306. FSUB_A f97 = f89, f97
  9307. nop __LINE__
  9308. }
  9309. ;;
  9310. { .mfi
  9311. nop __LINE__
  9312. FSUB f112 = f90, f112
  9313. nop __LINE__
  9314. }
  9315. { .mfi
  9316. nop __LINE__
  9317. FSUB_A f113 = f91, f113
  9318. nop __LINE__
  9319. }
  9320. ;;
  9321. { .mfi
  9322. nop __LINE__
  9323. FSUB f66 = f104, f66
  9324. nop __LINE__
  9325. }
  9326. { .mfi
  9327. nop __LINE__
  9328. FSUB_A f67 = f105, f67
  9329. nop __LINE__
  9330. }
  9331. ;;
  9332. { .mfi
  9333. nop __LINE__
  9334. FSUB f82 = f106, f82
  9335. nop __LINE__
  9336. }
  9337. { .mfi
  9338. nop __LINE__
  9339. FSUB_A f83 = f107, f83
  9340. nop __LINE__
  9341. }
  9342. ;;
  9343. { .mfi
  9344. nop __LINE__
  9345. FSUB f98 = f120, f98
  9346. nop __LINE__
  9347. }
  9348. { .mfi
  9349. nop __LINE__
  9350. FSUB_A f99 = f121, f99
  9351. nop __LINE__
  9352. }
  9353. ;;
  9354. { .mfi
  9355. nop __LINE__
  9356. FSUB f114 = f122, f114
  9357. nop __LINE__
  9358. }
  9359. { .mfi
  9360. nop __LINE__
  9361. FSUB_A f115 = f123, f115
  9362. nop __LINE__
  9363. }
  9364. ;;
  9365. #else
  9366. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  9367. ;;
  9368. LDFPD f74, f75 = [AOFFSET], 2 * SIZE
  9369. ;;
  9370. LDFPD f88, f89 = [AOFFSET], 2 * SIZE
  9371. ;;
  9372. LDFPD f90, f91 = [AOFFSET], 2 * SIZE
  9373. ;;
  9374. LDFPD f104, f105 = [AOFFSET], 2 * SIZE
  9375. ;;
  9376. LDFPD f106, f107 = [AOFFSET], 2 * SIZE
  9377. ;;
  9378. { .mfi
  9379. LDFPD f120, f121 = [AOFFSET], 2 * SIZE
  9380. FSUB f64 = f72, f64
  9381. nop __LINE__
  9382. }
  9383. { .mfi
  9384. nop __LINE__
  9385. FSUB f65 = f73, f65
  9386. nop __LINE__
  9387. }
  9388. ;;
  9389. { .mfi
  9390. LDFPD f122, f123 = [AOFFSET]
  9391. FSUB f66 = f74, f66
  9392. adds AOFFSET = -14 * SIZE, AOFFSET
  9393. }
  9394. { .mfi
  9395. nop __LINE__
  9396. FSUB f67 = f75, f67
  9397. nop __LINE__
  9398. }
  9399. ;;
  9400. { .mfi
  9401. nop __LINE__
  9402. FSUB f80 = f88, f80
  9403. nop __LINE__
  9404. }
  9405. { .mfi
  9406. nop __LINE__
  9407. FSUB f81 = f89, f81
  9408. nop __LINE__
  9409. }
  9410. ;;
  9411. { .mfi
  9412. nop __LINE__
  9413. FSUB f82 = f90, f82
  9414. nop __LINE__
  9415. }
  9416. { .mfi
  9417. nop __LINE__
  9418. FSUB f83 = f91, f83
  9419. nop __LINE__
  9420. }
  9421. ;;
  9422. { .mfi
  9423. nop __LINE__
  9424. FSUB f96 = f104, f96
  9425. nop __LINE__
  9426. }
  9427. { .mfi
  9428. nop __LINE__
  9429. FSUB f97 = f105, f97
  9430. nop __LINE__
  9431. }
  9432. ;;
  9433. { .mfi
  9434. nop __LINE__
  9435. FSUB f98 = f106, f98
  9436. nop __LINE__
  9437. }
  9438. { .mfi
  9439. nop __LINE__
  9440. FSUB f99 = f107, f99
  9441. nop __LINE__
  9442. }
  9443. ;;
  9444. { .mfi
  9445. nop __LINE__
  9446. FSUB f112 = f120, f112
  9447. nop __LINE__
  9448. }
  9449. { .mfi
  9450. nop __LINE__
  9451. FSUB f113 = f121, f113
  9452. nop __LINE__
  9453. }
  9454. ;;
  9455. { .mfi
  9456. nop __LINE__
  9457. FSUB f114 = f122, f114
  9458. nop __LINE__
  9459. }
  9460. { .mfi
  9461. nop __LINE__
  9462. FSUB f115 = f123, f115
  9463. nop __LINE__
  9464. }
  9465. ;;
  9466. #endif
  9467. #ifdef LN
  9468. adds AOFFSET = 6 * SIZE, AOFFSET
  9469. ;;
  9470. LDFPD f104, f105 = [AOFFSET]
  9471. adds AOFFSET = - 2 * SIZE, AOFFSET
  9472. ;;
  9473. LDFPD f106, f107 = [AOFFSET]
  9474. adds AOFFSET = - 4 * SIZE, AOFFSET
  9475. ;;
  9476. LDFPD f120, f121 = [AOFFSET]
  9477. ;;
  9478. FMPY f32 = f104, f66
  9479. FMPY f33 = f105, f66
  9480. FMPY f34 = f104, f82
  9481. FMPY f35 = f105, f82
  9482. FMPY f36 = f104, f98
  9483. FMPY f37 = f105, f98
  9484. FMPY f38 = f104, f114
  9485. FMPY f39 = f105, f114
  9486. ;;
  9487. FMA_C f66 = f105, f67, f32
  9488. FMA_D f67 = f104, f67, f33
  9489. FMA_C f82 = f105, f83, f34
  9490. FMA_D f83 = f104, f83, f35
  9491. FMA_C f98 = f105, f99, f36
  9492. FMA_D f99 = f104, f99, f37
  9493. FMA_C f114 = f105, f115, f38
  9494. FMA_D f115 = f104, f115, f39
  9495. ;;
  9496. FNMA f64 = f106, f66, f64
  9497. FMA_A f65 = f107, f66, f65
  9498. FNMA f80 = f106, f82, f80
  9499. FMA_A f81 = f107, f82, f81
  9500. FNMA f96 = f106, f98, f96
  9501. FMA_A f97 = f107, f98, f97
  9502. FNMA f112 = f106, f114, f112
  9503. FMA_A f113 = f107, f114, f113
  9504. ;;
  9505. FMA_B f64 = f107, f67, f64
  9506. FNMA f65 = f106, f67, f65
  9507. FMA_B f80 = f107, f83, f80
  9508. FNMA f81 = f106, f83, f81
  9509. FMA_B f96 = f107, f99, f96
  9510. FNMA f97 = f106, f99, f97
  9511. FMA_B f112 = f107, f115, f112
  9512. FNMA f113 = f106, f115, f113
  9513. ;;
  9514. FMPY f32 = f120, f64
  9515. FMPY f33 = f121, f64
  9516. FMPY f34 = f120, f80
  9517. FMPY f35 = f121, f80
  9518. FMPY f36 = f120, f96
  9519. FMPY f37 = f121, f96
  9520. FMPY f38 = f120, f112
  9521. FMPY f39 = f121, f112
  9522. ;;
  9523. FMA_C f64 = f121, f65, f32
  9524. FMA_D f65 = f120, f65, f33
  9525. FMA_C f80 = f121, f81, f34
  9526. FMA_D f81 = f120, f81, f35
  9527. FMA_C f96 = f121, f97, f36
  9528. FMA_D f97 = f120, f97, f37
  9529. FMA_C f112 = f121, f113, f38
  9530. FMA_D f113 = f120, f113, f39
  9531. ;;
  9532. #endif
  9533. #ifdef LT
  9534. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  9535. ;;
  9536. LDFPD f74, f75 = [AOFFSET]
  9537. adds AOFFSET = 4 * SIZE, AOFFSET
  9538. ;;
  9539. LDFPD f90, f91 = [AOFFSET]
  9540. adds AOFFSET = - 6 * SIZE, AOFFSET
  9541. ;;
  9542. FMPY f32 = f72, f64
  9543. FMPY f33 = f73, f64
  9544. FMPY f34 = f72, f80
  9545. FMPY f35 = f73, f80
  9546. FMPY f36 = f72, f96
  9547. FMPY f37 = f73, f96
  9548. FMPY f38 = f72, f112
  9549. FMPY f39 = f73, f112
  9550. ;;
  9551. FMA_C f64 = f73, f65, f32
  9552. FMA_D f65 = f72, f65, f33
  9553. FMA_C f80 = f73, f81, f34
  9554. FMA_D f81 = f72, f81, f35
  9555. FMA_C f96 = f73, f97, f36
  9556. FMA_D f97 = f72, f97, f37
  9557. FMA_C f112 = f73, f113, f38
  9558. FMA_D f113 = f72, f113, f39
  9559. ;;
  9560. FNMA f66 = f74, f64, f66
  9561. FMA_A f67 = f75, f64, f67
  9562. FNMA f82 = f74, f80, f82
  9563. FMA_A f83 = f75, f80, f83
  9564. FNMA f98 = f74, f96, f98
  9565. FMA_A f99 = f75, f96, f99
  9566. FNMA f114 = f74, f112, f114
  9567. FMA_A f115 = f75, f112, f115
  9568. ;;
  9569. FMA_B f66 = f75, f65, f66
  9570. FNMA f67 = f74, f65, f67
  9571. FMA_B f82 = f75, f81, f82
  9572. FNMA f83 = f74, f81, f83
  9573. FMA_B f98 = f75, f97, f98
  9574. FNMA f99 = f74, f97, f99
  9575. FMA_B f114 = f75, f113, f114
  9576. FNMA f115 = f74, f113, f115
  9577. ;;
  9578. FMPY f32 = f90, f66
  9579. FMPY f33 = f91, f66
  9580. FMPY f34 = f90, f82
  9581. FMPY f35 = f91, f82
  9582. FMPY f36 = f90, f98
  9583. FMPY f37 = f91, f98
  9584. FMPY f38 = f90, f114
  9585. FMPY f39 = f91, f114
  9586. ;;
  9587. FMA_C f66 = f91, f67, f32
  9588. FMA_D f67 = f90, f67, f33
  9589. FMA_C f82 = f91, f83, f34
  9590. FMA_D f83 = f90, f83, f35
  9591. FMA_C f98 = f91, f99, f36
  9592. FMA_D f99 = f90, f99, f37
  9593. FMA_C f114 = f91, f115, f38
  9594. FMA_D f115 = f90, f115, f39
  9595. ;;
  9596. #endif
  9597. #ifdef RN
  9598. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  9599. ;;
  9600. LDFPD f74, f75 = [BOFFSET], 2 * SIZE
  9601. ;;
  9602. LDFPD f76, f77 = [BOFFSET], 2 * SIZE
  9603. ;;
  9604. LDFPD f78, f79 = [BOFFSET]
  9605. adds BOFFSET = 4 * SIZE, BOFFSET
  9606. ;;
  9607. LDFPD f90, f91 = [BOFFSET], 2 * SIZE
  9608. ;;
  9609. LDFPD f92, f93 = [BOFFSET], 2 * SIZE
  9610. ;;
  9611. LDFPD f94, f95 = [BOFFSET]
  9612. adds BOFFSET = 6 * SIZE, BOFFSET
  9613. ;;
  9614. LDFPD f108, f109 = [BOFFSET], 2 * SIZE
  9615. ;;
  9616. LDFPD f110, f111 = [BOFFSET]
  9617. adds BOFFSET = 8 * SIZE, BOFFSET
  9618. ;;
  9619. LDFPD f126, f127 = [BOFFSET]
  9620. adds BOFFSET = - 30 * SIZE, BOFFSET
  9621. ;;
  9622. FMPY f32 = f72, f64
  9623. FMPY f33 = f73, f64
  9624. FMPY f34 = f72, f66
  9625. FMPY f35 = f73, f66
  9626. ;;
  9627. FMA_C f64 = f73, f65, f32
  9628. FMA_D f65 = f72, f65, f33
  9629. FMA_C f66 = f73, f67, f34
  9630. FMA_D f67 = f72, f67, f35
  9631. ;;
  9632. FNMA f80 = f74, f64, f80
  9633. FMA_A f81 = f75, f64, f81
  9634. FNMA f82 = f74, f66, f82
  9635. FMA_A f83 = f75, f66, f83
  9636. ;;
  9637. FMA_B f80 = f75, f65, f80
  9638. FNMA f81 = f74, f65, f81
  9639. FMA_B f82 = f75, f67, f82
  9640. FNMA f83 = f74, f67, f83
  9641. ;;
  9642. FNMA f96 = f76, f64, f96
  9643. FMA_A f97 = f77, f64, f97
  9644. FNMA f98 = f76, f66, f98
  9645. FMA_A f99 = f77, f66, f99
  9646. ;;
  9647. FMA_B f96 = f77, f65, f96
  9648. FNMA f97 = f76, f65, f97
  9649. FMA_B f98 = f77, f67, f98
  9650. FNMA f99 = f76, f67, f99
  9651. ;;
  9652. FNMA f112 = f78, f64, f112
  9653. FMA_A f113 = f79, f64, f113
  9654. FNMA f114 = f78, f66, f114
  9655. FMA_A f115 = f79, f66, f115
  9656. ;;
  9657. FMA_B f112 = f79, f65, f112
  9658. FNMA f113 = f78, f65, f113
  9659. FMA_B f114 = f79, f67, f114
  9660. FNMA f115 = f78, f67, f115
  9661. ;;
  9662. FMPY f32 = f90, f80
  9663. FMPY f33 = f91, f80
  9664. FMPY f34 = f90, f82
  9665. FMPY f35 = f91, f82
  9666. ;;
  9667. FMA_C f80 = f91, f81, f32
  9668. FMA_D f81 = f90, f81, f33
  9669. FMA_C f82 = f91, f83, f34
  9670. FMA_D f83 = f90, f83, f35
  9671. ;;
  9672. FNMA f96 = f92, f80, f96
  9673. FMA_A f97 = f93, f80, f97
  9674. FNMA f98 = f92, f82, f98
  9675. FMA_A f99 = f93, f82, f99
  9676. ;;
  9677. FMA_B f96 = f93, f81, f96
  9678. FNMA f97 = f92, f81, f97
  9679. FMA_B f98 = f93, f83, f98
  9680. FNMA f99 = f92, f83, f99
  9681. ;;
  9682. FNMA f112 = f94, f80, f112
  9683. FMA_A f113 = f95, f80, f113
  9684. FNMA f114 = f94, f82, f114
  9685. FMA_A f115 = f95, f82, f115
  9686. ;;
  9687. FMA_B f112 = f95, f81, f112
  9688. FNMA f113 = f94, f81, f113
  9689. FMA_B f114 = f95, f83, f114
  9690. FNMA f115 = f94, f83, f115
  9691. ;;
  9692. FMPY f32 = f108, f96
  9693. FMPY f33 = f109, f96
  9694. FMPY f34 = f108, f98
  9695. FMPY f35 = f109, f98
  9696. ;;
  9697. FMA_C f96 = f109, f97, f32
  9698. FMA_D f97 = f108, f97, f33
  9699. FMA_C f98 = f109, f99, f34
  9700. FMA_D f99 = f108, f99, f35
  9701. ;;
  9702. FNMA f112 = f110, f96, f112
  9703. FMA_A f113 = f111, f96, f113
  9704. FNMA f114 = f110, f98, f114
  9705. FMA_A f115 = f111, f98, f115
  9706. ;;
  9707. FMA_B f112 = f111, f97, f112
  9708. FNMA f113 = f110, f97, f113
  9709. FMA_B f114 = f111, f99, f114
  9710. FNMA f115 = f110, f99, f115
  9711. ;;
  9712. FMPY f32 = f126, f112
  9713. FMPY f33 = f127, f112
  9714. FMPY f34 = f126, f114
  9715. FMPY f35 = f127, f114
  9716. ;;
  9717. FMA_C f112 = f127, f113, f32
  9718. FMA_D f113 = f126, f113, f33
  9719. FMA_C f114 = f127, f115, f34
  9720. FMA_D f115 = f126, f115, f35
  9721. ;;
  9722. #endif
  9723. #ifdef RT
  9724. adds BOFFSET = 30 * SIZE, BOFFSET
  9725. ;;
  9726. LDFPD f72, f73 = [BOFFSET]
  9727. adds BOFFSET = - 2 * SIZE, BOFFSET
  9728. ;;
  9729. LDFPD f74, f75 = [BOFFSET]
  9730. adds BOFFSET = - 2 * SIZE, BOFFSET
  9731. ;;
  9732. LDFPD f76, f77 = [BOFFSET]
  9733. adds BOFFSET = - 2 * SIZE, BOFFSET
  9734. ;;
  9735. LDFPD f78, f79 = [BOFFSET]
  9736. adds BOFFSET = - 4 * SIZE, BOFFSET
  9737. ;;
  9738. LDFPD f88, f89 = [BOFFSET]
  9739. adds BOFFSET = - 2 * SIZE, BOFFSET
  9740. ;;
  9741. LDFPD f90, f91 = [BOFFSET]
  9742. adds BOFFSET = - 2 * SIZE, BOFFSET
  9743. ;;
  9744. LDFPD f92, f93 = [BOFFSET]
  9745. adds BOFFSET = - 6 * SIZE, BOFFSET
  9746. ;;
  9747. LDFPD f104, f105 = [BOFFSET]
  9748. adds BOFFSET = - 2 * SIZE, BOFFSET
  9749. ;;
  9750. LDFPD f106, f107 = [BOFFSET]
  9751. adds BOFFSET = - 8 * SIZE, BOFFSET
  9752. ;;
  9753. LDFPD f120, f121 = [BOFFSET]
  9754. ;;
  9755. FMPY f32 = f72, f112
  9756. FMPY f33 = f73, f112
  9757. FMPY f34 = f72, f114
  9758. FMPY f35 = f73, f114
  9759. ;;
  9760. FMA_C f112 = f73, f113, f32
  9761. FMA_D f113 = f72, f113, f33
  9762. FMA_C f114 = f73, f115, f34
  9763. FMA_D f115 = f72, f115, f35
  9764. ;;
  9765. FNMA f96 = f74, f112, f96
  9766. FMA_A f97 = f75, f112, f97
  9767. FNMA f98 = f74, f114, f98
  9768. FMA_A f99 = f75, f114, f99
  9769. ;;
  9770. FMA_B f96 = f75, f113, f96
  9771. FNMA f97 = f74, f113, f97
  9772. FMA_B f98 = f75, f115, f98
  9773. FNMA f99 = f74, f115, f99
  9774. ;;
  9775. FNMA f80 = f76, f112, f80
  9776. FMA_A f81 = f77, f112, f81
  9777. FNMA f82 = f76, f114, f82
  9778. FMA_A f83 = f77, f114, f83
  9779. ;;
  9780. FMA_B f80 = f77, f113, f80
  9781. FNMA f81 = f76, f113, f81
  9782. FMA_B f82 = f77, f115, f82
  9783. FNMA f83 = f76, f115, f83
  9784. ;;
  9785. FNMA f64 = f78, f112, f64
  9786. FMA_A f65 = f79, f112, f65
  9787. FNMA f66 = f78, f114, f66
  9788. FMA_A f67 = f79, f114, f67
  9789. ;;
  9790. FMA_B f64 = f79, f113, f64
  9791. FNMA f65 = f78, f113, f65
  9792. FMA_B f66 = f79, f115, f66
  9793. FNMA f67 = f78, f115, f67
  9794. ;;
  9795. FMPY f32 = f88, f96
  9796. FMPY f33 = f89, f96
  9797. FMPY f34 = f88, f98
  9798. FMPY f35 = f89, f98
  9799. ;;
  9800. FMA_C f96 = f89, f97, f32
  9801. FMA_D f97 = f88, f97, f33
  9802. FMA_C f98 = f89, f99, f34
  9803. FMA_D f99 = f88, f99, f35
  9804. ;;
  9805. FNMA f80 = f90, f96, f80
  9806. FMA_A f81 = f91, f96, f81
  9807. FNMA f82 = f90, f98, f82
  9808. FMA_A f83 = f91, f98, f83
  9809. ;;
  9810. FMA_B f80 = f91, f97, f80
  9811. FNMA f81 = f90, f97, f81
  9812. FMA_B f82 = f91, f99, f82
  9813. FNMA f83 = f90, f99, f83
  9814. ;;
  9815. FNMA f64 = f92, f96, f64
  9816. FMA_A f65 = f93, f96, f65
  9817. FNMA f66 = f92, f98, f66
  9818. FMA_A f67 = f93, f98, f67
  9819. ;;
  9820. FMA_B f64 = f93, f97, f64
  9821. FNMA f65 = f92, f97, f65
  9822. FMA_B f66 = f93, f99, f66
  9823. FNMA f67 = f92, f99, f67
  9824. ;;
  9825. FMPY f32 = f104, f80
  9826. FMPY f33 = f105, f80
  9827. FMPY f34 = f104, f82
  9828. FMPY f35 = f105, f82
  9829. ;;
  9830. FMA_C f80 = f105, f81, f32
  9831. FMA_D f81 = f104, f81, f33
  9832. FMA_C f82 = f105, f83, f34
  9833. FMA_D f83 = f104, f83, f35
  9834. ;;
  9835. FNMA f64 = f106, f80, f64
  9836. FMA_A f65 = f107, f80, f65
  9837. FNMA f66 = f106, f82, f66
  9838. FMA_A f67 = f107, f82, f67
  9839. ;;
  9840. FMA_B f64 = f107, f81, f64
  9841. FNMA f65 = f106, f81, f65
  9842. FMA_B f66 = f107, f83, f66
  9843. FNMA f67 = f106, f83, f67
  9844. ;;
  9845. FMPY f32 = f120, f64
  9846. FMPY f33 = f121, f64
  9847. FMPY f34 = f120, f66
  9848. FMPY f35 = f121, f66
  9849. ;;
  9850. FMA_C f64 = f121, f65, f32
  9851. FMA_D f65 = f120, f65, f33
  9852. FMA_C f66 = f121, f67, f34
  9853. FMA_D f67 = f120, f67, f35
  9854. ;;
  9855. #endif
  9856. #if defined(LN) || defined(LT)
  9857. adds BOFFSET2 = 4 * SIZE, BOFFSET
  9858. ;;
  9859. STFD [BOFFSET] = f64, SIZE
  9860. STFD [BOFFSET2] = f96, SIZE
  9861. ;;
  9862. STFD [BOFFSET] = f65, SIZE
  9863. STFD [BOFFSET2] = f97, SIZE
  9864. ;;
  9865. STFD [BOFFSET] = f80, SIZE
  9866. STFD [BOFFSET2] = f112, SIZE
  9867. ;;
  9868. STFD [BOFFSET] = f81, 5 * SIZE
  9869. STFD [BOFFSET2] = f113, 5 * SIZE
  9870. ;;
  9871. STFD [BOFFSET] = f66, SIZE
  9872. STFD [BOFFSET2] = f98, SIZE
  9873. ;;
  9874. STFD [BOFFSET] = f67, SIZE
  9875. STFD [BOFFSET2] = f99, SIZE
  9876. ;;
  9877. STFD [BOFFSET] = f82, SIZE
  9878. STFD [BOFFSET2] = f114, SIZE
  9879. ;;
  9880. STFD [BOFFSET] = f83, 5 * SIZE
  9881. STFD [BOFFSET2] = f115, 5 * SIZE
  9882. ;;
  9883. adds BOFFSET = - 16 * SIZE, BOFFSET
  9884. ;;
  9885. #else
  9886. adds AOFFSET2 = 4 * SIZE, AOFFSET
  9887. ;;
  9888. STFD [AOFFSET] = f64, SIZE
  9889. STFD [AOFFSET2] = f80, SIZE
  9890. ;;
  9891. STFD [AOFFSET] = f65, SIZE
  9892. STFD [AOFFSET2] = f81, SIZE
  9893. ;;
  9894. STFD [AOFFSET] = f66, SIZE
  9895. STFD [AOFFSET2] = f82, SIZE
  9896. ;;
  9897. STFD [AOFFSET] = f67, 5 * SIZE
  9898. STFD [AOFFSET2] = f83, 5 * SIZE
  9899. ;;
  9900. STFD [AOFFSET] = f96, SIZE
  9901. STFD [AOFFSET2] = f112, SIZE
  9902. ;;
  9903. STFD [AOFFSET] = f97, SIZE
  9904. STFD [AOFFSET2] = f113, SIZE
  9905. ;;
  9906. STFD [AOFFSET] = f98, SIZE
  9907. STFD [AOFFSET2] = f114, SIZE
  9908. ;;
  9909. STFD [AOFFSET] = f99, 5 * SIZE
  9910. STFD [AOFFSET2] = f115, 5 * SIZE
  9911. ;;
  9912. adds AOFFSET = - 16 * SIZE, AOFFSET
  9913. ;;
  9914. #endif
  9915. #ifdef LN
  9916. adds C1 = -4 * SIZE, C1
  9917. adds C2 = -4 * SIZE, C2
  9918. adds C3 = -4 * SIZE, C3
  9919. adds C4 = -4 * SIZE, C4
  9920. #endif
  9921. ;;
  9922. STFD [C1 ] = f64, SIZE
  9923. ;;
  9924. STFD [C1 ] = f65, SIZE
  9925. ;;
  9926. STFD [C1 ] = f66, SIZE
  9927. ;;
  9928. STFD [C1 ] = f67, SIZE
  9929. ;;
  9930. STFD [C2 ] = f80, SIZE
  9931. ;;
  9932. STFD [C2 ] = f81, SIZE
  9933. ;;
  9934. STFD [C2 ] = f82, SIZE
  9935. ;;
  9936. STFD [C2 ] = f83, SIZE
  9937. ;;
  9938. STFD [C3 ] = f96, SIZE
  9939. ;;
  9940. STFD [C3 ] = f97, SIZE
  9941. ;;
  9942. STFD [C3 ] = f98, SIZE
  9943. ;;
  9944. STFD [C3 ] = f99, SIZE
  9945. ;;
  9946. STFD [C4 ] = f112, SIZE
  9947. ;;
  9948. STFD [C4 ] = f113, SIZE
  9949. ;;
  9950. STFD [C4 ] = f114, SIZE
  9951. ;;
  9952. STFD [C4 ] = f115, SIZE
  9953. ;;
  9954. mov f64 = f0
  9955. mov f65 = f0
  9956. mov f80 = f0
  9957. mov f81 = f0
  9958. mov f96 = f0
  9959. mov f97 = f0
  9960. mov f112 = f0
  9961. mov f113 = f0
  9962. ;;
  9963. #ifdef LN
  9964. adds C1 = -4 * SIZE, C1
  9965. adds C2 = -4 * SIZE, C2
  9966. adds C3 = -4 * SIZE, C3
  9967. adds C4 = -4 * SIZE, C4
  9968. #endif
  9969. ;;
  9970. cmp.ne p6, p0 = 1, I
  9971. ;;
  9972. adds I = -1, I
  9973. ;;
  9974. shladd r2 = K, ZBASE_SHIFT, r0
  9975. ;;
  9976. sub L = K, KK
  9977. ;;
  9978. #ifdef RT
  9979. shladd AORIG = r2, 1, AORIG
  9980. #endif
  9981. ;;
  9982. #if defined(LT) || defined(RN)
  9983. shladd L = L, ZBASE_SHIFT, r0
  9984. ;;
  9985. shladd AOFFSET = L, 1, AOFFSET
  9986. shladd BOFFSET = L, 2, BOFFSET
  9987. #endif
  9988. ;;
  9989. #ifdef LT
  9990. adds KK = 2, KK
  9991. #elif defined LN
  9992. adds KK = -2, KK
  9993. #else
  9994. nop __LINE__
  9995. #endif
  9996. ;;
  9997. #if defined(LT) || defined(RN)
  9998. mov L = KK
  9999. #else
  10000. sub L = K, KK
  10001. #endif
  10002. ;;
  10003. .align 16
  10004. .L030:
  10005. { .mib
  10006. #if defined(LT) || defined(RN)
  10007. mov L = KK
  10008. #else
  10009. sub L = K, KK
  10010. #endif
  10011. tbit.z p6, p7 = M, 0
  10012. (p6) br.cond.dptk .L049
  10013. }
  10014. ;;
  10015. { .mmi
  10016. cmp.ne p7, p0 = r0, L
  10017. adds BOFFSET = 0 * SIZE, B
  10018. shl r2 = K, ZBASE_SHIFT
  10019. }
  10020. { .mmi
  10021. shladd r3 = KK, ZBASE_SHIFT, r0
  10022. nop __LINE__
  10023. nop __LINE__
  10024. }
  10025. ;;
  10026. #if defined(LT) || defined(RN)
  10027. { .mfb
  10028. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  10029. mov f72 = f0
  10030. nop __LINE__
  10031. }
  10032. { .mmf
  10033. nop __LINE__
  10034. nop __LINE__
  10035. mov f73 = f0
  10036. }
  10037. ;;
  10038. #else
  10039. { .mfi
  10040. shladd BOFFSET = r3, 2, B
  10041. mov f72 = f0
  10042. #ifdef LN
  10043. sub AORIG = AORIG, r2
  10044. #else
  10045. nop __LINE__
  10046. #endif
  10047. }
  10048. ;;
  10049. { .mfi
  10050. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  10051. mov f73 = f0
  10052. add AOFFSET = r3, AORIG
  10053. }
  10054. ;;
  10055. #endif
  10056. ;;
  10057. adds L = 1, L
  10058. ;;
  10059. { .mmi
  10060. nop __LINE__
  10061. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  10062. tbit.z p12, p0 = L, 0
  10063. }
  10064. ;;
  10065. { .mfi
  10066. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  10067. mov f88 = f0
  10068. shr L = L, 1
  10069. }
  10070. { .mfi
  10071. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  10072. mov f89 = f0
  10073. nop __LINE__
  10074. }
  10075. ;;
  10076. { .mfi
  10077. (p7) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  10078. mov f104 = f0
  10079. adds L = -1, L
  10080. }
  10081. { .mfb
  10082. adds PREA = (PREFETCHSIZE + 0) * SIZE, AOFFSET
  10083. mov f105 = f0
  10084. nop __LINE__
  10085. }
  10086. ;;
  10087. { .mfi
  10088. (p7) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  10089. mov f120 = f0
  10090. mov ar.lc = L
  10091. }
  10092. { .mfi
  10093. cmp.eq p3, p0 = r0, r0
  10094. mov f121 = f0
  10095. nop __LINE__
  10096. }
  10097. ;;
  10098. cmp.eq p6, p0 = -1, L
  10099. (p6) br.cond.dpnt .L038
  10100. ;;
  10101. .align 16
  10102. .L032:
  10103. { .mfb
  10104. lfetch.nt1 [PREA], 4 * SIZE
  10105. FMA f64 = f32, f48, f64 // A1 * B1
  10106. nop __LINE__
  10107. }
  10108. { .mfi
  10109. nop __LINE__
  10110. FMA_B f65 = f32, f49, f65 // A1 * B2
  10111. (p12) cmp.ne p3, p0 = 0, L
  10112. }
  10113. ;;
  10114. { .mfi
  10115. lfetch.nt1 [PREB], 16 * SIZE
  10116. FMA f80 = f32, f50, f80 // A1 * B3
  10117. cmp.ne p4, p5 = 0, L
  10118. }
  10119. { .mfb
  10120. nop __LINE__
  10121. FMA_B f81 = f32, f51, f81 // A1 * B4
  10122. nop __LINE__
  10123. }
  10124. ;;
  10125. { .mfb
  10126. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  10127. FMA f96 = f32, f52, f96 // A1 * B5
  10128. nop __LINE__
  10129. }
  10130. { .mfb
  10131. nop __LINE__
  10132. FMA_B f97 = f32, f53, f97 // A1 * B6
  10133. nop __LINE__
  10134. }
  10135. ;;
  10136. { .mfb
  10137. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  10138. FMA f112 = f32, f54, f112 // A1 * B7
  10139. nop __LINE__
  10140. }
  10141. { .mfb
  10142. nop __LINE__
  10143. FMA_B f113 = f32, f55, f113 // A1 * B8
  10144. nop __LINE__
  10145. }
  10146. ;;
  10147. { .mfb
  10148. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  10149. FMA f65 = f33, f48, f65 // A2 * B1
  10150. nop __LINE__
  10151. }
  10152. { .mfb
  10153. nop __LINE__
  10154. FMA_A f64 = f33, f49, f64 // A2 * B2
  10155. nop __LINE__
  10156. }
  10157. ;;
  10158. { .mfb
  10159. (p3) LDFPD f60, f61 = [BOFFSET], 2 * SIZE
  10160. FMA f81 = f33, f50, f81 // A2 * B3
  10161. nop __LINE__
  10162. }
  10163. { .mfb
  10164. nop __LINE__
  10165. FMA_A f80 = f33, f51, f80 // A2 * B4
  10166. nop __LINE__
  10167. }
  10168. ;;
  10169. { .mfb
  10170. (p3) LDFPD f62, f63 = [BOFFSET], 2 * SIZE
  10171. FMA f97 = f33, f52, f97 // A2 * B5
  10172. nop __LINE__
  10173. }
  10174. { .mfb
  10175. nop __LINE__
  10176. FMA_A f96 = f33, f53, f96 // A2 * B6
  10177. nop __LINE__
  10178. }
  10179. ;;
  10180. { .mfb
  10181. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  10182. FMA f113 = f33, f54, f113 // A2 * B7
  10183. nop __LINE__
  10184. }
  10185. { .mfb
  10186. nop __LINE__
  10187. FMA_A f112 = f33, f55, f112 // A2 * B8
  10188. nop __LINE__
  10189. }
  10190. ;;
  10191. { .mfb
  10192. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  10193. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  10194. nop __LINE__
  10195. }
  10196. { .mfb
  10197. nop __LINE__
  10198. (p3) FMA_B f65 = f40, f57, f65 // A1 * B2
  10199. nop __LINE__
  10200. }
  10201. ;;
  10202. { .mfb
  10203. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  10204. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  10205. nop __LINE__
  10206. }
  10207. { .mfb
  10208. nop __LINE__
  10209. (p3) FMA_B f81 = f40, f59, f81 // A1 * B4
  10210. nop __LINE__
  10211. }
  10212. ;;
  10213. { .mfb
  10214. (p4) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  10215. (p3) FMA f96 = f40, f60, f96 // A1 * B5
  10216. nop __LINE__
  10217. }
  10218. { .mfb
  10219. nop __LINE__
  10220. (p3) FMA_B f97 = f40, f61, f97 // A1 * B6
  10221. nop __LINE__
  10222. }
  10223. ;;
  10224. { .mfb
  10225. nop __LINE__
  10226. (p3) FMA f112 = f40, f62, f112 // A1 * B7
  10227. nop __LINE__
  10228. }
  10229. { .mfb
  10230. nop __LINE__
  10231. (p3) FMA_B f113 = f40, f63, f113 // A1 * B8
  10232. nop __LINE__
  10233. }
  10234. ;;
  10235. { .mfb
  10236. (p4) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  10237. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  10238. nop __LINE__
  10239. }
  10240. { .mfb
  10241. nop __LINE__
  10242. (p3) FMA_A f64 = f41, f57, f64 // A2 * B2
  10243. nop __LINE__
  10244. }
  10245. ;;
  10246. { .mfb
  10247. nop __LINE__
  10248. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  10249. nop __LINE__
  10250. }
  10251. { .mfb
  10252. nop __LINE__
  10253. (p3) FMA_A f80 = f41, f59, f80 // A2 * B4
  10254. nop __LINE__
  10255. }
  10256. ;;
  10257. { .mfb
  10258. nop __LINE__
  10259. (p3) FMA f97 = f41, f60, f97 // A2 * B5
  10260. nop __LINE__
  10261. }
  10262. { .mfb
  10263. nop __LINE__
  10264. (p3) FMA_A f96 = f41, f61, f96 // A2 * B6
  10265. nop __LINE__
  10266. }
  10267. ;;
  10268. { .mfi
  10269. nop __LINE__
  10270. (p3) FMA f113 = f41, f62, f113 // A2 * B7
  10271. adds L = -1, L
  10272. }
  10273. { .mfb
  10274. nop __LINE__
  10275. (p3) FMA_A f112 = f41, f63, f112 // A2 * B8
  10276. br.cloop.sptk.few .L032
  10277. }
  10278. ;;
  10279. .L038:
  10280. #if defined(LN) || defined(RT)
  10281. #ifdef LN
  10282. adds r2 = -1, KK
  10283. #else
  10284. adds r2 = -4, KK
  10285. #endif
  10286. ;;
  10287. shladd r2 = r2, ZBASE_SHIFT, r0
  10288. ;;
  10289. add AOFFSET = r2, AORIG
  10290. shladd BOFFSET = r2, 2, B
  10291. ;;
  10292. #endif
  10293. #if defined(LN) || defined(LT)
  10294. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  10295. ;;
  10296. LDFPD f88, f89 = [BOFFSET], 2 * SIZE
  10297. ;;
  10298. LDFPD f104, f105 = [BOFFSET], 2 * SIZE
  10299. ;;
  10300. LDFPD f120, f121 = [BOFFSET]
  10301. adds BOFFSET = -6 * SIZE, BOFFSET
  10302. ;;
  10303. FSUB f64 = f72, f64
  10304. FSUB_A f65 = f73, f65
  10305. FSUB f80 = f88, f80
  10306. FSUB_A f81 = f89, f81
  10307. FSUB f96 = f104, f96
  10308. FSUB_A f97 = f105, f97
  10309. FSUB f112 = f120, f112
  10310. FSUB_A f113 = f121, f113
  10311. ;;
  10312. #else
  10313. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  10314. ;;
  10315. LDFPD f88, f89 = [AOFFSET], 2 * SIZE
  10316. ;;
  10317. LDFPD f104, f105 = [AOFFSET], 2 * SIZE
  10318. ;;
  10319. LDFPD f120, f121 = [AOFFSET]
  10320. adds AOFFSET = -6 * SIZE, AOFFSET
  10321. ;;
  10322. FSUB f64 = f72, f64
  10323. FSUB f65 = f73, f65
  10324. FSUB f80 = f88, f80
  10325. FSUB f81 = f89, f81
  10326. FSUB f96 = f104, f96
  10327. FSUB f97 = f105, f97
  10328. FSUB f112 = f120, f112
  10329. FSUB f113 = f121, f113
  10330. ;;
  10331. #endif
  10332. #ifdef LN
  10333. LDFPD f120, f121 = [AOFFSET]
  10334. ;;
  10335. FMPY f32 = f120, f64
  10336. FMPY f33 = f121, f64
  10337. FMPY f34 = f120, f80
  10338. FMPY f35 = f121, f80
  10339. FMPY f36 = f120, f96
  10340. FMPY f37 = f121, f96
  10341. FMPY f38 = f120, f112
  10342. FMPY f39 = f121, f112
  10343. ;;
  10344. FMA_C f64 = f121, f65, f32
  10345. FMA_D f65 = f120, f65, f33
  10346. FMA_C f80 = f121, f81, f34
  10347. FMA_D f81 = f120, f81, f35
  10348. FMA_C f96 = f121, f97, f36
  10349. FMA_D f97 = f120, f97, f37
  10350. FMA_C f112 = f121, f113, f38
  10351. FMA_D f113 = f120, f113, f39
  10352. ;;
  10353. #endif
  10354. #ifdef LT
  10355. LDFPD f90, f91 = [AOFFSET]
  10356. ;;
  10357. FMPY f32 = f90, f64
  10358. FMPY f33 = f91, f64
  10359. FMPY f34 = f90, f80
  10360. FMPY f35 = f91, f80
  10361. FMPY f36 = f90, f96
  10362. FMPY f37 = f91, f96
  10363. FMPY f38 = f90, f112
  10364. FMPY f39 = f91, f112
  10365. ;;
  10366. FMA_C f64 = f91, f65, f32
  10367. FMA_D f65 = f90, f65, f33
  10368. FMA_C f80 = f91, f81, f34
  10369. FMA_D f81 = f90, f81, f35
  10370. FMA_C f96 = f91, f97, f36
  10371. FMA_D f97 = f90, f97, f37
  10372. FMA_C f112 = f91, f113, f38
  10373. FMA_D f113 = f90, f113, f39
  10374. ;;
  10375. #endif
  10376. #ifdef RN
  10377. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  10378. ;;
  10379. LDFPD f74, f75 = [BOFFSET], 2 * SIZE
  10380. ;;
  10381. LDFPD f76, f77 = [BOFFSET], 2 * SIZE
  10382. ;;
  10383. LDFPD f78, f79 = [BOFFSET]
  10384. adds BOFFSET = 4 * SIZE, BOFFSET
  10385. ;;
  10386. LDFPD f90, f91 = [BOFFSET], 2 * SIZE
  10387. ;;
  10388. LDFPD f92, f93 = [BOFFSET], 2 * SIZE
  10389. ;;
  10390. LDFPD f94, f95 = [BOFFSET]
  10391. adds BOFFSET = 6 * SIZE, BOFFSET
  10392. ;;
  10393. LDFPD f108, f109 = [BOFFSET], 2 * SIZE
  10394. ;;
  10395. LDFPD f110, f111 = [BOFFSET]
  10396. adds BOFFSET = 8 * SIZE, BOFFSET
  10397. ;;
  10398. LDFPD f126, f127 = [BOFFSET]
  10399. adds BOFFSET = - 30 * SIZE, BOFFSET
  10400. ;;
  10401. FMPY f32 = f72, f64
  10402. FMPY f33 = f73, f64
  10403. ;;
  10404. FMA_C f64 = f73, f65, f32
  10405. FMA_D f65 = f72, f65, f33
  10406. ;;
  10407. FNMA f80 = f74, f64, f80
  10408. FMA_A f81 = f75, f64, f81
  10409. ;;
  10410. FMA_B f80 = f75, f65, f80
  10411. FNMA f81 = f74, f65, f81
  10412. ;;
  10413. FNMA f96 = f76, f64, f96
  10414. FMA_A f97 = f77, f64, f97
  10415. ;;
  10416. FMA_B f96 = f77, f65, f96
  10417. FNMA f97 = f76, f65, f97
  10418. ;;
  10419. FNMA f112 = f78, f64, f112
  10420. FMA_A f113 = f79, f64, f113
  10421. ;;
  10422. FMA_B f112 = f79, f65, f112
  10423. FNMA f113 = f78, f65, f113
  10424. ;;
  10425. FMPY f32 = f90, f80
  10426. FMPY f33 = f91, f80
  10427. ;;
  10428. FMA_C f80 = f91, f81, f32
  10429. FMA_D f81 = f90, f81, f33
  10430. ;;
  10431. FNMA f96 = f92, f80, f96
  10432. FMA_A f97 = f93, f80, f97
  10433. ;;
  10434. FMA_B f96 = f93, f81, f96
  10435. FNMA f97 = f92, f81, f97
  10436. ;;
  10437. FNMA f112 = f94, f80, f112
  10438. FMA_A f113 = f95, f80, f113
  10439. ;;
  10440. FMA_B f112 = f95, f81, f112
  10441. FNMA f113 = f94, f81, f113
  10442. ;;
  10443. FMPY f32 = f108, f96
  10444. FMPY f33 = f109, f96
  10445. ;;
  10446. FMA_C f96 = f109, f97, f32
  10447. FMA_D f97 = f108, f97, f33
  10448. ;;
  10449. FNMA f112 = f110, f96, f112
  10450. FMA_A f113 = f111, f96, f113
  10451. ;;
  10452. FMA_B f112 = f111, f97, f112
  10453. FNMA f113 = f110, f97, f113
  10454. ;;
  10455. FMPY f32 = f126, f112
  10456. FMPY f33 = f127, f112
  10457. ;;
  10458. FMA_C f112 = f127, f113, f32
  10459. FMA_D f113 = f126, f113, f33
  10460. ;;
  10461. #endif
  10462. #ifdef RT
  10463. adds BOFFSET = 30 * SIZE, BOFFSET
  10464. ;;
  10465. LDFPD f72, f73 = [BOFFSET]
  10466. adds BOFFSET = - 2 * SIZE, BOFFSET
  10467. ;;
  10468. LDFPD f74, f75 = [BOFFSET]
  10469. adds BOFFSET = - 2 * SIZE, BOFFSET
  10470. ;;
  10471. LDFPD f76, f77 = [BOFFSET]
  10472. adds BOFFSET = - 2 * SIZE, BOFFSET
  10473. ;;
  10474. LDFPD f78, f79 = [BOFFSET]
  10475. adds BOFFSET = - 4 * SIZE, BOFFSET
  10476. ;;
  10477. LDFPD f88, f89 = [BOFFSET]
  10478. adds BOFFSET = - 2 * SIZE, BOFFSET
  10479. ;;
  10480. LDFPD f90, f91 = [BOFFSET]
  10481. adds BOFFSET = - 2 * SIZE, BOFFSET
  10482. ;;
  10483. LDFPD f92, f93 = [BOFFSET]
  10484. adds BOFFSET = - 6 * SIZE, BOFFSET
  10485. ;;
  10486. LDFPD f104, f105 = [BOFFSET]
  10487. adds BOFFSET = - 2 * SIZE, BOFFSET
  10488. ;;
  10489. LDFPD f106, f107 = [BOFFSET]
  10490. adds BOFFSET = - 8 * SIZE, BOFFSET
  10491. ;;
  10492. LDFPD f120, f121 = [BOFFSET]
  10493. ;;
  10494. FMPY f32 = f72, f112
  10495. FMPY f33 = f73, f112
  10496. ;;
  10497. FMA_C f112 = f73, f113, f32
  10498. FMA_D f113 = f72, f113, f33
  10499. ;;
  10500. FNMA f96 = f74, f112, f96
  10501. FMA_A f97 = f75, f112, f97
  10502. ;;
  10503. FMA_B f96 = f75, f113, f96
  10504. FNMA f97 = f74, f113, f97
  10505. ;;
  10506. FNMA f80 = f76, f112, f80
  10507. FMA_A f81 = f77, f112, f81
  10508. ;;
  10509. FMA_B f80 = f77, f113, f80
  10510. FNMA f81 = f76, f113, f81
  10511. ;;
  10512. FNMA f64 = f78, f112, f64
  10513. FMA_A f65 = f79, f112, f65
  10514. ;;
  10515. FMA_B f64 = f79, f113, f64
  10516. FNMA f65 = f78, f113, f65
  10517. ;;
  10518. FMPY f32 = f88, f96
  10519. FMPY f33 = f89, f96
  10520. ;;
  10521. FMA_C f96 = f89, f97, f32
  10522. FMA_D f97 = f88, f97, f33
  10523. ;;
  10524. FNMA f80 = f90, f96, f80
  10525. FMA_A f81 = f91, f96, f81
  10526. ;;
  10527. FMA_B f80 = f91, f97, f80
  10528. FNMA f81 = f90, f97, f81
  10529. ;;
  10530. FNMA f64 = f92, f96, f64
  10531. FMA_A f65 = f93, f96, f65
  10532. ;;
  10533. FMA_B f64 = f93, f97, f64
  10534. FNMA f65 = f92, f97, f65
  10535. ;;
  10536. FMPY f32 = f104, f80
  10537. FMPY f33 = f105, f80
  10538. ;;
  10539. FMA_C f80 = f105, f81, f32
  10540. FMA_D f81 = f104, f81, f33
  10541. ;;
  10542. FNMA f64 = f106, f80, f64
  10543. FMA_A f65 = f107, f80, f65
  10544. ;;
  10545. FMA_B f64 = f107, f81, f64
  10546. FNMA f65 = f106, f81, f65
  10547. ;;
  10548. FMPY f32 = f120, f64
  10549. FMPY f33 = f121, f64
  10550. ;;
  10551. FMA_C f64 = f121, f65, f32
  10552. FMA_D f65 = f120, f65, f33
  10553. ;;
  10554. #endif
  10555. #if defined(LN) || defined(LT)
  10556. adds BOFFSET2 = 4 * SIZE, BOFFSET
  10557. ;;
  10558. STFD [BOFFSET] = f64, SIZE
  10559. STFD [BOFFSET2] = f96, SIZE
  10560. ;;
  10561. STFD [BOFFSET] = f65, SIZE
  10562. STFD [BOFFSET2] = f97, SIZE
  10563. ;;
  10564. STFD [BOFFSET] = f80, SIZE
  10565. STFD [BOFFSET2] = f112, SIZE
  10566. ;;
  10567. STFD [BOFFSET] = f81, 5 * SIZE
  10568. STFD [BOFFSET2] = f113, 5 * SIZE
  10569. ;;
  10570. adds BOFFSET = - 8 * SIZE, BOFFSET
  10571. ;;
  10572. #else
  10573. adds AOFFSET2 = 4 * SIZE, AOFFSET
  10574. ;;
  10575. STFD [AOFFSET] = f64, SIZE
  10576. STFD [AOFFSET2] = f96, SIZE
  10577. ;;
  10578. STFD [AOFFSET] = f65, SIZE
  10579. STFD [AOFFSET2] = f97, SIZE
  10580. ;;
  10581. STFD [AOFFSET] = f80, SIZE
  10582. STFD [AOFFSET2] = f112, SIZE
  10583. ;;
  10584. STFD [AOFFSET] = f81, 5 * SIZE
  10585. STFD [AOFFSET2] = f113, 5 * SIZE
  10586. ;;
  10587. adds AOFFSET = - 8 * SIZE, AOFFSET
  10588. ;;
  10589. #endif
  10590. #ifdef LN
  10591. adds C1 = -2 * SIZE, C1
  10592. adds C2 = -2 * SIZE, C2
  10593. adds C3 = -2 * SIZE, C3
  10594. adds C4 = -2 * SIZE, C4
  10595. #endif
  10596. ;;
  10597. STFD [C1 ] = f64, SIZE
  10598. ;;
  10599. STFD [C1 ] = f65, SIZE
  10600. ;;
  10601. STFD [C2 ] = f80, SIZE
  10602. ;;
  10603. STFD [C2 ] = f81, SIZE
  10604. ;;
  10605. STFD [C3 ] = f96, SIZE
  10606. ;;
  10607. STFD [C3 ] = f97, SIZE
  10608. ;;
  10609. STFD [C4 ] = f112, SIZE
  10610. ;;
  10611. STFD [C4 ] = f113, SIZE
  10612. ;;
  10613. mov f64 = f0
  10614. mov f65 = f0
  10615. mov f80 = f0
  10616. mov f81 = f0
  10617. mov f96 = f0
  10618. mov f97 = f0
  10619. mov f112 = f0
  10620. mov f113 = f0
  10621. ;;
  10622. #ifdef LN
  10623. adds C1 = -2 * SIZE, C1
  10624. adds C2 = -2 * SIZE, C2
  10625. adds C3 = -2 * SIZE, C3
  10626. adds C4 = -2 * SIZE, C4
  10627. #endif
  10628. ;;
  10629. cmp.ne p6, p0 = 1, I
  10630. ;;
  10631. adds I = -1, I
  10632. ;;
  10633. shladd r2 = K, ZBASE_SHIFT, r0
  10634. ;;
  10635. sub L = K, KK
  10636. ;;
  10637. #ifdef RT
  10638. add AORIG = r2, AORIG
  10639. #endif
  10640. ;;
  10641. #if defined(LT) || defined(RN)
  10642. shladd L = L, ZBASE_SHIFT, r0
  10643. ;;
  10644. add AOFFSET = L, AOFFSET
  10645. shladd BOFFSET = L, 2, BOFFSET
  10646. #endif
  10647. ;;
  10648. #ifdef LT
  10649. adds KK = 1, KK
  10650. #elif defined LN
  10651. adds KK = -1, KK
  10652. #else
  10653. nop __LINE__
  10654. #endif
  10655. ;;
  10656. #if defined(LT) || defined(RN)
  10657. mov L = KK
  10658. #else
  10659. sub L = K, KK
  10660. #endif
  10661. ;;
  10662. .align 16
  10663. .L049:
  10664. #ifdef LN
  10665. shladd KK8 = K, ZBASE_SHIFT, r0
  10666. ;;
  10667. shladd B = KK8, 2, B
  10668. #endif
  10669. #if defined(LT) || defined(RN)
  10670. mov B = BOFFSET
  10671. #endif
  10672. #ifdef RN
  10673. adds KK = 4, KK
  10674. #endif
  10675. #ifdef RT
  10676. adds KK = -4, KK
  10677. #endif
  10678. ;;
  10679. { .mmb
  10680. mov AOFFSET = A
  10681. cmp.lt p6, p0 = 0, J
  10682. (p6) br.cond.dptk .L010x
  10683. }
  10684. ;;
  10685. .align 16
  10686. .L999:
  10687. { .mii
  10688. nop __LINE__
  10689. mov ar.lc = ARLC
  10690. mov pr = PR, -1
  10691. }
  10692. { .mib
  10693. nop __LINE__
  10694. #ifdef TRMMKERNEL
  10695. mov ar.pfs = ARPFS
  10696. #else
  10697. nop __LINE__
  10698. #endif
  10699. br.ret.sptk.many b0
  10700. }
  10701. EPILOGUE