You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ztrsm_kernel_LN.S 171 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #ifdef DOUBLE
  41. #define PREFETCHSIZE (16 * 8)
  42. #else
  43. #define PREFETCHSIZE (32 * 8)
  44. #endif
  45. #ifndef LN
  46. #define CPREFETCHSIZE 7
  47. #else
  48. #define CPREFETCHSIZE -8
  49. #endif
  50. #define CPREFETCH lfetch.excl.nt1
  51. #define M r32
  52. #define N r33
  53. #define K r34
  54. #define A r37
  55. #define B r38
  56. #define C r39
  57. #define LDC r35
  58. #define I r15
  59. #define J r16
  60. #define AOFFSET r17
  61. #define BOFFSET r18
  62. #define TEMP r19
  63. #define L r20
  64. #define C1 r21
  65. #define C2 r22
  66. #define C3 r23
  67. #define C4 r24
  68. #define C5 r25
  69. #define C6 r26
  70. #define C7 r27
  71. #define C8 r28
  72. #define PREA r8
  73. #define PREB r9
  74. #define PREC r10
  75. #define SP r12
  76. #define ARLC r29
  77. #define PR r30
  78. #define ARPFS r31
  79. #define ALPHA_R f8
  80. #define ALPHA_I f9
  81. #define AORIG loc0
  82. #define KK loc1
  83. #define KK8 loc2
  84. #define OFFSET loc3
  85. #define AOFFSET2 loc4
  86. #define BOFFSET2 loc5
  87. #ifndef CONJ
  88. #define FCALC_A FSUB
  89. #define FCALC_B FADD
  90. #define FMA_A FNMA
  91. #define FMA_B FMA
  92. #else
  93. #define FCALC_A FADD
  94. #define FCALC_B FSUB
  95. #define FMA_A FMA
  96. #define FMA_B FNMA
  97. #endif
  98. #ifndef CONJ
  99. #define FCALC_C FMA
  100. #define FCALC_D FNMA
  101. #else
  102. #define FCALC_C FNMA
  103. #define FCALC_D FMA
  104. #endif
  105. #ifndef CONJ
  106. #define FMA_C FNMA
  107. #define FMA_D FMA
  108. #define FSUB_A FSUB
  109. #else
  110. #define FMA_C FMA
  111. #define FMA_D FMS
  112. #define FSUB_A FADD
  113. #endif
  114. PROLOGUE
  115. .prologue
  116. PROFCODE
  117. { .mfi
  118. .save ar.pfs, ARPFS
  119. alloc ARPFS = ar.pfs, 8, 8, 0, 0
  120. mov f64 = f0
  121. adds r14 = 16, SP
  122. }
  123. { .mfi
  124. nop __LINE__
  125. mov f65 = f0
  126. adds r15 = 24, SP
  127. }
  128. ;;
  129. { .mfi
  130. ld8 LDC = [r14]
  131. mov f81 = f0
  132. mov PR = pr
  133. }
  134. { .mfi
  135. ld8 OFFSET = [r15]
  136. mov f96 = f0
  137. shr J = N, 2
  138. }
  139. ;;
  140. { .mfi
  141. shladd LDC = LDC, ZBASE_SHIFT, r0
  142. mov f97 = f0
  143. }
  144. { .mfi
  145. nop __LINE__
  146. mov f113 = f0
  147. }
  148. ;;
  149. #ifdef LN
  150. { .mmi
  151. setf.sig f32 = M
  152. setf.sig f33 = K
  153. shladd C = M, ZBASE_SHIFT, C
  154. }
  155. ;;
  156. {.mmf
  157. nop __LINE__
  158. nop __LINE__
  159. xmpy.l f32 = f32, f33
  160. }
  161. ;;
  162. { .mmi
  163. getf.sig r2 = f32
  164. ;;
  165. nop __LINE__
  166. shladd A = r2, ZBASE_SHIFT, A
  167. }
  168. ;;
  169. #endif
  170. #ifdef RN
  171. sub KK = r0, OFFSET
  172. #endif
  173. #ifdef RT
  174. { .mmi
  175. setf.sig f32 = N
  176. setf.sig f33 = K
  177. nop __LINE__
  178. }
  179. ;;
  180. { .mmi
  181. setf.sig f34 = LDC
  182. nop __LINE__
  183. nop __LINE__
  184. }
  185. ;;
  186. { .mmf
  187. nop __LINE__
  188. nop __LINE__
  189. xmpy.l f33 = f32, f33
  190. }
  191. { .mmf
  192. nop __LINE__
  193. sub KK = N, OFFSET
  194. xmpy.l f34 = f32, f34
  195. }
  196. ;;
  197. { .mmi
  198. getf.sig r2 = f33
  199. getf.sig r3 = f34
  200. }
  201. ;;
  202. shladd B = r2, ZBASE_SHIFT, B
  203. add C = r3, C
  204. #endif
  205. ;;
  206. .body
  207. { .mfi
  208. nop __LINE__
  209. mov f80 = f0
  210. mov ARLC = ar.lc
  211. }
  212. { .mfb
  213. cmp.ge p6, p0 = 0, J
  214. mov f112 = f0
  215. (p6) br.cond.dpnt .L050
  216. }
  217. ;;
  218. .align 16
  219. .L010:
  220. #ifdef RT
  221. { .mmi
  222. shladd r3 = LDC, 2, r0
  223. nop __LINE__
  224. shl r2 = K, 2 + ZBASE_SHIFT
  225. }
  226. ;;
  227. { .mmi
  228. sub B = B, r2
  229. sub C = C, r3
  230. nop __LINE__
  231. }
  232. ;;
  233. #endif
  234. { .mmi
  235. mov C1 = C // coffset1 = c + 0 * ldc
  236. add C2 = LDC, C // coffset2 = c + 1 * ldc
  237. }
  238. { .mmi
  239. adds J = -1, J
  240. #ifdef LN
  241. add KK = M, OFFSET
  242. #elif defined LT
  243. mov KK = OFFSET
  244. #else
  245. nop __LINE__
  246. #endif
  247. #if defined(LN) || defined(RT)
  248. mov AORIG = A
  249. #else
  250. mov AOFFSET = A
  251. #endif
  252. }
  253. ;;
  254. { .mmi
  255. shladd C3 = LDC, 1, C // coffset3 = c + 2 * ldc
  256. shladd C4 = LDC, 1, C2 // coffset4 = c + 3 * ldc
  257. #if defined(LT) || defined(RN)
  258. mov L = KK
  259. #else
  260. sub L = K, KK
  261. #endif
  262. }
  263. { .mib
  264. #ifndef RT
  265. shladd C = LDC, 2, C // coffset += 8 * ldc
  266. #else
  267. nop __LINE__
  268. #endif
  269. tbit.z p6, p7 = M, 0
  270. (p6) br.cond.dptk .L020
  271. }
  272. ;;
  273. { .mmi
  274. cmp.ne p7, p0 = r0, L
  275. adds BOFFSET = 0 * SIZE, B
  276. shl r2 = K, ZBASE_SHIFT
  277. }
  278. { .mmi
  279. shladd r3 = KK, ZBASE_SHIFT, r0
  280. nop __LINE__
  281. nop __LINE__
  282. }
  283. ;;
  284. #if defined(LT) || defined(RN)
  285. { .mfb
  286. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  287. mov f72 = f0
  288. nop __LINE__
  289. }
  290. { .mmf
  291. nop __LINE__
  292. nop __LINE__
  293. mov f73 = f0
  294. }
  295. ;;
  296. #else
  297. { .mfi
  298. shladd BOFFSET = r3, 2, B
  299. mov f72 = f0
  300. #ifdef LN
  301. sub AORIG = AORIG, r2
  302. #else
  303. nop __LINE__
  304. #endif
  305. }
  306. ;;
  307. { .mfi
  308. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  309. mov f73 = f0
  310. add AOFFSET = r3, AORIG
  311. }
  312. ;;
  313. #endif
  314. ;;
  315. adds L = 1, L
  316. ;;
  317. { .mmi
  318. nop __LINE__
  319. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  320. tbit.z p12, p0 = L, 0
  321. }
  322. ;;
  323. { .mfi
  324. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  325. mov f88 = f0
  326. shr L = L, 1
  327. }
  328. { .mfi
  329. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  330. mov f89 = f0
  331. nop __LINE__
  332. }
  333. ;;
  334. { .mfi
  335. (p7) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  336. mov f104 = f0
  337. adds L = -1, L
  338. }
  339. { .mfb
  340. adds PREA = (PREFETCHSIZE + 0) * SIZE, AOFFSET
  341. mov f105 = f0
  342. nop __LINE__
  343. }
  344. ;;
  345. { .mfi
  346. (p7) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  347. mov f120 = f0
  348. mov ar.lc = L
  349. }
  350. { .mfi
  351. cmp.eq p3, p0 = r0, r0
  352. mov f121 = f0
  353. nop __LINE__
  354. }
  355. ;;
  356. cmp.eq p6, p0 = -1, L
  357. (p6) br.cond.dpnt .L038
  358. ;;
  359. .align 16
  360. .L032:
  361. { .mfb
  362. lfetch.nt1 [PREA], 4 * SIZE
  363. FMA f64 = f32, f48, f64 // A1 * B1
  364. nop __LINE__
  365. }
  366. { .mfi
  367. nop __LINE__
  368. FMA_B f65 = f32, f49, f65 // A1 * B2
  369. (p12) cmp.ne p3, p0 = 0, L
  370. }
  371. ;;
  372. { .mfi
  373. lfetch.nt1 [PREB], 16 * SIZE
  374. FMA f80 = f32, f50, f80 // A1 * B3
  375. cmp.ne p4, p5 = 0, L
  376. }
  377. { .mfb
  378. nop __LINE__
  379. FMA_B f81 = f32, f51, f81 // A1 * B4
  380. nop __LINE__
  381. }
  382. ;;
  383. { .mfb
  384. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  385. FMA f96 = f32, f52, f96 // A1 * B5
  386. nop __LINE__
  387. }
  388. { .mfb
  389. nop __LINE__
  390. FMA_B f97 = f32, f53, f97 // A1 * B6
  391. nop __LINE__
  392. }
  393. ;;
  394. { .mfb
  395. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  396. FMA f112 = f32, f54, f112 // A1 * B7
  397. nop __LINE__
  398. }
  399. { .mfb
  400. nop __LINE__
  401. FMA_B f113 = f32, f55, f113 // A1 * B8
  402. nop __LINE__
  403. }
  404. ;;
  405. { .mfb
  406. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  407. FMA f65 = f33, f48, f65 // A2 * B1
  408. nop __LINE__
  409. }
  410. { .mfb
  411. nop __LINE__
  412. FMA_A f64 = f33, f49, f64 // A2 * B2
  413. nop __LINE__
  414. }
  415. ;;
  416. { .mfb
  417. (p3) LDFPD f60, f61 = [BOFFSET], 2 * SIZE
  418. FMA f81 = f33, f50, f81 // A2 * B3
  419. nop __LINE__
  420. }
  421. { .mfb
  422. nop __LINE__
  423. FMA_A f80 = f33, f51, f80 // A2 * B4
  424. nop __LINE__
  425. }
  426. ;;
  427. { .mfb
  428. (p3) LDFPD f62, f63 = [BOFFSET], 2 * SIZE
  429. FMA f97 = f33, f52, f97 // A2 * B5
  430. nop __LINE__
  431. }
  432. { .mfb
  433. nop __LINE__
  434. FMA_A f96 = f33, f53, f96 // A2 * B6
  435. nop __LINE__
  436. }
  437. ;;
  438. { .mfb
  439. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  440. FMA f113 = f33, f54, f113 // A2 * B7
  441. nop __LINE__
  442. }
  443. { .mfb
  444. nop __LINE__
  445. FMA_A f112 = f33, f55, f112 // A2 * B8
  446. nop __LINE__
  447. }
  448. ;;
  449. { .mfb
  450. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  451. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  452. nop __LINE__
  453. }
  454. { .mfb
  455. nop __LINE__
  456. (p3) FMA_B f65 = f40, f57, f65 // A1 * B2
  457. nop __LINE__
  458. }
  459. ;;
  460. { .mfb
  461. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  462. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  463. nop __LINE__
  464. }
  465. { .mfb
  466. nop __LINE__
  467. (p3) FMA_B f81 = f40, f59, f81 // A1 * B4
  468. nop __LINE__
  469. }
  470. ;;
  471. { .mfb
  472. (p4) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  473. (p3) FMA f96 = f40, f60, f96 // A1 * B5
  474. nop __LINE__
  475. }
  476. { .mfb
  477. nop __LINE__
  478. (p3) FMA_B f97 = f40, f61, f97 // A1 * B6
  479. nop __LINE__
  480. }
  481. ;;
  482. { .mfb
  483. nop __LINE__
  484. (p3) FMA f112 = f40, f62, f112 // A1 * B7
  485. nop __LINE__
  486. }
  487. { .mfb
  488. nop __LINE__
  489. (p3) FMA_B f113 = f40, f63, f113 // A1 * B8
  490. nop __LINE__
  491. }
  492. ;;
  493. { .mfb
  494. (p4) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  495. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  496. nop __LINE__
  497. }
  498. { .mfb
  499. nop __LINE__
  500. (p3) FMA_A f64 = f41, f57, f64 // A2 * B2
  501. nop __LINE__
  502. }
  503. ;;
  504. { .mfb
  505. nop __LINE__
  506. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  507. nop __LINE__
  508. }
  509. { .mfb
  510. nop __LINE__
  511. (p3) FMA_A f80 = f41, f59, f80 // A2 * B4
  512. nop __LINE__
  513. }
  514. ;;
  515. { .mfb
  516. nop __LINE__
  517. (p3) FMA f97 = f41, f60, f97 // A2 * B5
  518. nop __LINE__
  519. }
  520. { .mfb
  521. nop __LINE__
  522. (p3) FMA_A f96 = f41, f61, f96 // A2 * B6
  523. nop __LINE__
  524. }
  525. ;;
  526. { .mfi
  527. nop __LINE__
  528. (p3) FMA f113 = f41, f62, f113 // A2 * B7
  529. adds L = -1, L
  530. }
  531. { .mfb
  532. nop __LINE__
  533. (p3) FMA_A f112 = f41, f63, f112 // A2 * B8
  534. br.cloop.sptk.few .L032
  535. }
  536. ;;
  537. .L038:
  538. #if defined(LN) || defined(RT)
  539. #ifdef LN
  540. adds r2 = -1, KK
  541. #else
  542. adds r2 = -4, KK
  543. #endif
  544. ;;
  545. shladd r2 = r2, ZBASE_SHIFT, r0
  546. ;;
  547. add AOFFSET = r2, AORIG
  548. shladd BOFFSET = r2, 2, B
  549. ;;
  550. #endif
  551. #if defined(LN) || defined(LT)
  552. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  553. ;;
  554. LDFPD f88, f89 = [BOFFSET], 2 * SIZE
  555. ;;
  556. LDFPD f104, f105 = [BOFFSET], 2 * SIZE
  557. ;;
  558. LDFPD f120, f121 = [BOFFSET]
  559. adds BOFFSET = -6 * SIZE, BOFFSET
  560. ;;
  561. FSUB f64 = f72, f64
  562. FSUB_A f65 = f73, f65
  563. FSUB f80 = f88, f80
  564. FSUB_A f81 = f89, f81
  565. FSUB f96 = f104, f96
  566. FSUB_A f97 = f105, f97
  567. FSUB f112 = f120, f112
  568. FSUB_A f113 = f121, f113
  569. ;;
  570. #else
  571. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  572. ;;
  573. LDFPD f88, f89 = [AOFFSET], 2 * SIZE
  574. ;;
  575. LDFPD f104, f105 = [AOFFSET], 2 * SIZE
  576. ;;
  577. LDFPD f120, f121 = [AOFFSET]
  578. adds AOFFSET = -6 * SIZE, AOFFSET
  579. ;;
  580. FSUB f64 = f72, f64
  581. FSUB f65 = f73, f65
  582. FSUB f80 = f88, f80
  583. FSUB f81 = f89, f81
  584. FSUB f96 = f104, f96
  585. FSUB f97 = f105, f97
  586. FSUB f112 = f120, f112
  587. FSUB f113 = f121, f113
  588. ;;
  589. #endif
  590. #ifdef LN
  591. LDFPD f120, f121 = [AOFFSET]
  592. ;;
  593. FMPY f32 = f120, f64
  594. FMPY f33 = f121, f64
  595. FMPY f34 = f120, f80
  596. FMPY f35 = f121, f80
  597. FMPY f36 = f120, f96
  598. FMPY f37 = f121, f96
  599. FMPY f38 = f120, f112
  600. FMPY f39 = f121, f112
  601. ;;
  602. FMA_C f64 = f121, f65, f32
  603. FMA_D f65 = f120, f65, f33
  604. FMA_C f80 = f121, f81, f34
  605. FMA_D f81 = f120, f81, f35
  606. FMA_C f96 = f121, f97, f36
  607. FMA_D f97 = f120, f97, f37
  608. FMA_C f112 = f121, f113, f38
  609. FMA_D f113 = f120, f113, f39
  610. ;;
  611. #endif
  612. #ifdef LT
  613. LDFPD f90, f91 = [AOFFSET]
  614. ;;
  615. FMPY f32 = f90, f64
  616. FMPY f33 = f91, f64
  617. FMPY f34 = f90, f80
  618. FMPY f35 = f91, f80
  619. FMPY f36 = f90, f96
  620. FMPY f37 = f91, f96
  621. FMPY f38 = f90, f112
  622. FMPY f39 = f91, f112
  623. ;;
  624. FMA_C f64 = f91, f65, f32
  625. FMA_D f65 = f90, f65, f33
  626. FMA_C f80 = f91, f81, f34
  627. FMA_D f81 = f90, f81, f35
  628. FMA_C f96 = f91, f97, f36
  629. FMA_D f97 = f90, f97, f37
  630. FMA_C f112 = f91, f113, f38
  631. FMA_D f113 = f90, f113, f39
  632. ;;
  633. #endif
  634. #ifdef RN
  635. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  636. ;;
  637. LDFPD f74, f75 = [BOFFSET], 2 * SIZE
  638. ;;
  639. LDFPD f76, f77 = [BOFFSET], 2 * SIZE
  640. ;;
  641. LDFPD f78, f79 = [BOFFSET]
  642. adds BOFFSET = 4 * SIZE, BOFFSET
  643. ;;
  644. LDFPD f90, f91 = [BOFFSET], 2 * SIZE
  645. ;;
  646. LDFPD f92, f93 = [BOFFSET], 2 * SIZE
  647. ;;
  648. LDFPD f94, f95 = [BOFFSET]
  649. adds BOFFSET = 6 * SIZE, BOFFSET
  650. ;;
  651. LDFPD f108, f109 = [BOFFSET], 2 * SIZE
  652. ;;
  653. LDFPD f110, f111 = [BOFFSET]
  654. adds BOFFSET = 8 * SIZE, BOFFSET
  655. ;;
  656. LDFPD f126, f127 = [BOFFSET]
  657. adds BOFFSET = - 30 * SIZE, BOFFSET
  658. ;;
  659. FMPY f32 = f72, f64
  660. FMPY f33 = f73, f64
  661. ;;
  662. FMA_C f64 = f73, f65, f32
  663. FMA_D f65 = f72, f65, f33
  664. ;;
  665. FNMA f80 = f74, f64, f80
  666. FMA_A f81 = f75, f64, f81
  667. ;;
  668. FMA_B f80 = f75, f65, f80
  669. FNMA f81 = f74, f65, f81
  670. ;;
  671. FNMA f96 = f76, f64, f96
  672. FMA_A f97 = f77, f64, f97
  673. ;;
  674. FMA_B f96 = f77, f65, f96
  675. FNMA f97 = f76, f65, f97
  676. ;;
  677. FNMA f112 = f78, f64, f112
  678. FMA_A f113 = f79, f64, f113
  679. ;;
  680. FMA_B f112 = f79, f65, f112
  681. FNMA f113 = f78, f65, f113
  682. ;;
  683. FMPY f32 = f90, f80
  684. FMPY f33 = f91, f80
  685. ;;
  686. FMA_C f80 = f91, f81, f32
  687. FMA_D f81 = f90, f81, f33
  688. ;;
  689. FNMA f96 = f92, f80, f96
  690. FMA_A f97 = f93, f80, f97
  691. ;;
  692. FMA_B f96 = f93, f81, f96
  693. FNMA f97 = f92, f81, f97
  694. ;;
  695. FNMA f112 = f94, f80, f112
  696. FMA_A f113 = f95, f80, f113
  697. ;;
  698. FMA_B f112 = f95, f81, f112
  699. FNMA f113 = f94, f81, f113
  700. ;;
  701. FMPY f32 = f108, f96
  702. FMPY f33 = f109, f96
  703. ;;
  704. FMA_C f96 = f109, f97, f32
  705. FMA_D f97 = f108, f97, f33
  706. ;;
  707. FNMA f112 = f110, f96, f112
  708. FMA_A f113 = f111, f96, f113
  709. ;;
  710. FMA_B f112 = f111, f97, f112
  711. FNMA f113 = f110, f97, f113
  712. ;;
  713. FMPY f32 = f126, f112
  714. FMPY f33 = f127, f112
  715. ;;
  716. FMA_C f112 = f127, f113, f32
  717. FMA_D f113 = f126, f113, f33
  718. ;;
  719. #endif
  720. #ifdef RT
  721. adds BOFFSET = 30 * SIZE, BOFFSET
  722. ;;
  723. LDFPD f72, f73 = [BOFFSET]
  724. adds BOFFSET = - 2 * SIZE, BOFFSET
  725. ;;
  726. LDFPD f74, f75 = [BOFFSET]
  727. adds BOFFSET = - 2 * SIZE, BOFFSET
  728. ;;
  729. LDFPD f76, f77 = [BOFFSET]
  730. adds BOFFSET = - 2 * SIZE, BOFFSET
  731. ;;
  732. LDFPD f78, f79 = [BOFFSET]
  733. adds BOFFSET = - 4 * SIZE, BOFFSET
  734. ;;
  735. LDFPD f88, f89 = [BOFFSET]
  736. adds BOFFSET = - 2 * SIZE, BOFFSET
  737. ;;
  738. LDFPD f90, f91 = [BOFFSET]
  739. adds BOFFSET = - 2 * SIZE, BOFFSET
  740. ;;
  741. LDFPD f92, f93 = [BOFFSET]
  742. adds BOFFSET = - 6 * SIZE, BOFFSET
  743. ;;
  744. LDFPD f104, f105 = [BOFFSET]
  745. adds BOFFSET = - 2 * SIZE, BOFFSET
  746. ;;
  747. LDFPD f106, f107 = [BOFFSET]
  748. adds BOFFSET = - 8 * SIZE, BOFFSET
  749. ;;
  750. LDFPD f120, f121 = [BOFFSET]
  751. ;;
  752. FMPY f32 = f72, f112
  753. FMPY f33 = f73, f112
  754. ;;
  755. FMA_C f112 = f73, f113, f32
  756. FMA_D f113 = f72, f113, f33
  757. ;;
  758. FNMA f96 = f74, f112, f96
  759. FMA_A f97 = f75, f112, f97
  760. ;;
  761. FMA_B f96 = f75, f113, f96
  762. FNMA f97 = f74, f113, f97
  763. ;;
  764. FNMA f80 = f76, f112, f80
  765. FMA_A f81 = f77, f112, f81
  766. ;;
  767. FMA_B f80 = f77, f113, f80
  768. FNMA f81 = f76, f113, f81
  769. ;;
  770. FNMA f64 = f78, f112, f64
  771. FMA_A f65 = f79, f112, f65
  772. ;;
  773. FMA_B f64 = f79, f113, f64
  774. FNMA f65 = f78, f113, f65
  775. ;;
  776. FMPY f32 = f88, f96
  777. FMPY f33 = f89, f96
  778. ;;
  779. FMA_C f96 = f89, f97, f32
  780. FMA_D f97 = f88, f97, f33
  781. ;;
  782. FNMA f80 = f90, f96, f80
  783. FMA_A f81 = f91, f96, f81
  784. ;;
  785. FMA_B f80 = f91, f97, f80
  786. FNMA f81 = f90, f97, f81
  787. ;;
  788. FNMA f64 = f92, f96, f64
  789. FMA_A f65 = f93, f96, f65
  790. ;;
  791. FMA_B f64 = f93, f97, f64
  792. FNMA f65 = f92, f97, f65
  793. ;;
  794. FMPY f32 = f104, f80
  795. FMPY f33 = f105, f80
  796. ;;
  797. FMA_C f80 = f105, f81, f32
  798. FMA_D f81 = f104, f81, f33
  799. ;;
  800. FNMA f64 = f106, f80, f64
  801. FMA_A f65 = f107, f80, f65
  802. ;;
  803. FMA_B f64 = f107, f81, f64
  804. FNMA f65 = f106, f81, f65
  805. ;;
  806. FMPY f32 = f120, f64
  807. FMPY f33 = f121, f64
  808. ;;
  809. FMA_C f64 = f121, f65, f32
  810. FMA_D f65 = f120, f65, f33
  811. ;;
  812. #endif
  813. #if defined(LN) || defined(LT)
  814. adds BOFFSET2 = 4 * SIZE, BOFFSET
  815. ;;
  816. STFD [BOFFSET] = f64, SIZE
  817. STFD [BOFFSET2] = f96, SIZE
  818. ;;
  819. STFD [BOFFSET] = f65, SIZE
  820. STFD [BOFFSET2] = f97, SIZE
  821. ;;
  822. STFD [BOFFSET] = f80, SIZE
  823. STFD [BOFFSET2] = f112, SIZE
  824. ;;
  825. STFD [BOFFSET] = f81, 5 * SIZE
  826. STFD [BOFFSET2] = f113, 5 * SIZE
  827. ;;
  828. adds BOFFSET = - 8 * SIZE, BOFFSET
  829. ;;
  830. #else
  831. adds AOFFSET2 = 4 * SIZE, AOFFSET
  832. ;;
  833. STFD [AOFFSET] = f64, SIZE
  834. STFD [AOFFSET2] = f96, SIZE
  835. ;;
  836. STFD [AOFFSET] = f65, SIZE
  837. STFD [AOFFSET2] = f97, SIZE
  838. ;;
  839. STFD [AOFFSET] = f80, SIZE
  840. STFD [AOFFSET2] = f112, SIZE
  841. ;;
  842. STFD [AOFFSET] = f81, 5 * SIZE
  843. STFD [AOFFSET2] = f113, 5 * SIZE
  844. ;;
  845. adds AOFFSET = - 8 * SIZE, AOFFSET
  846. ;;
  847. #endif
  848. #ifdef LN
  849. adds C1 = -2 * SIZE, C1
  850. adds C2 = -2 * SIZE, C2
  851. adds C3 = -2 * SIZE, C3
  852. adds C4 = -2 * SIZE, C4
  853. #endif
  854. ;;
  855. STFD [C1 ] = f64, SIZE
  856. ;;
  857. STFD [C1 ] = f65, SIZE
  858. ;;
  859. STFD [C2 ] = f80, SIZE
  860. ;;
  861. STFD [C2 ] = f81, SIZE
  862. ;;
  863. STFD [C3 ] = f96, SIZE
  864. ;;
  865. STFD [C3 ] = f97, SIZE
  866. ;;
  867. STFD [C4 ] = f112, SIZE
  868. ;;
  869. STFD [C4 ] = f113, SIZE
  870. ;;
  871. mov f64 = f0
  872. mov f65 = f0
  873. mov f80 = f0
  874. mov f81 = f0
  875. mov f96 = f0
  876. mov f97 = f0
  877. mov f112 = f0
  878. mov f113 = f0
  879. ;;
  880. #ifdef LN
  881. adds C1 = -2 * SIZE, C1
  882. adds C2 = -2 * SIZE, C2
  883. adds C3 = -2 * SIZE, C3
  884. adds C4 = -2 * SIZE, C4
  885. #endif
  886. ;;
  887. cmp.ne p6, p0 = 1, I
  888. ;;
  889. adds I = -1, I
  890. ;;
  891. shladd r2 = K, ZBASE_SHIFT, r0
  892. ;;
  893. sub L = K, KK
  894. ;;
  895. #ifdef RT
  896. add AORIG = r2, AORIG
  897. #endif
  898. ;;
  899. #if defined(LT) || defined(RN)
  900. shladd L = L, ZBASE_SHIFT, r0
  901. ;;
  902. add AOFFSET = L, AOFFSET
  903. shladd BOFFSET = L, 2, BOFFSET
  904. #endif
  905. ;;
  906. #ifdef LT
  907. adds KK = 1, KK
  908. #elif defined LN
  909. adds KK = -1, KK
  910. #else
  911. nop __LINE__
  912. #endif
  913. ;;
  914. #if defined(LT) || defined(RN)
  915. mov L = KK
  916. #else
  917. sub L = K, KK
  918. #endif
  919. ;;
  920. .align 16
  921. .L020:
  922. { .mib
  923. #if defined(LT) || defined(RN)
  924. mov L = KK
  925. #else
  926. sub L = K, KK
  927. #endif
  928. tbit.z p6, p7 = M, 1
  929. (p6) br.cond.dptk .L010x
  930. }
  931. ;;
  932. { .mmi
  933. cmp.ne p7, p0 = r0, L
  934. adds BOFFSET = 0 * SIZE, B
  935. shl r2 = K, 1 + ZBASE_SHIFT
  936. }
  937. { .mmi
  938. shladd r3 = KK, ZBASE_SHIFT, r0
  939. nop __LINE__
  940. nop __LINE__
  941. }
  942. ;;
  943. #if defined(LT) || defined(RN)
  944. { .mfb
  945. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  946. mov f66 = f0
  947. nop __LINE__
  948. }
  949. { .mmf
  950. nop __LINE__
  951. nop __LINE__
  952. mov f67 = f0
  953. }
  954. ;;
  955. #else
  956. { .mfi
  957. shladd BOFFSET = r3, 2, B
  958. mov f66 = f0
  959. #ifdef LN
  960. sub AORIG = AORIG, r2
  961. #else
  962. nop __LINE__
  963. #endif
  964. }
  965. ;;
  966. { .mfi
  967. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  968. mov f67 = f0
  969. shladd AOFFSET = r3, 1, AORIG
  970. }
  971. ;;
  972. #endif
  973. ;;
  974. adds L = 1, L
  975. ;;
  976. { .mfi
  977. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  978. mov f82 = f0
  979. tbit.z p12, p0 = L, 0
  980. }
  981. { .mfi
  982. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  983. mov f83 = f0
  984. shr L = L, 1
  985. }
  986. ;;
  987. { .mfi
  988. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  989. mov f98 = f0
  990. adds L = -1, L
  991. }
  992. { .mfi
  993. (p7) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  994. mov f99 = f0
  995. cmp.eq p3, p0 = r0, r0
  996. }
  997. ;;
  998. { .mfi
  999. (p7) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  1000. mov f114 = f0
  1001. mov ar.lc = L
  1002. }
  1003. { .mfi
  1004. adds PREA = (PREFETCHSIZE + 0) * SIZE, AOFFSET
  1005. mov f115 = f0
  1006. nop __LINE__
  1007. }
  1008. ;;
  1009. cmp.eq p6, p0 = -1, L
  1010. (p6) br.cond.dpnt .L028
  1011. ;;
  1012. .align 16
  1013. .L022:
  1014. { .mfi
  1015. lfetch.nt1 [PREA], 8 * SIZE
  1016. FMA f64 = f32, f48, f64 // A1 * B1
  1017. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  1018. }
  1019. { .mfi
  1020. nop __LINE__
  1021. FMA_B f65 = f32, f49, f65 // A1 * B2
  1022. (p12) cmp.ne p3, p0 = 0, L
  1023. }
  1024. ;;
  1025. { .mfi
  1026. lfetch.nt1 [PREB], 16 * SIZE
  1027. FMA f80 = f32, f50, f80 // A1 * B3
  1028. cmp.ne p4, p5 = 0, L
  1029. }
  1030. { .mfb
  1031. nop __LINE__
  1032. FMA_B f81 = f32, f51, f81 // A1 * B4
  1033. nop __LINE__
  1034. }
  1035. ;;
  1036. { .mfb
  1037. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  1038. FMA f96 = f32, f52, f96 // A1 * B5
  1039. nop __LINE__
  1040. }
  1041. { .mfb
  1042. nop __LINE__
  1043. FMA_B f97 = f32, f53, f97 // A1 * B6
  1044. nop __LINE__
  1045. }
  1046. ;;
  1047. { .mfb
  1048. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  1049. FMA f112 = f32, f54, f112 // A1 * B7
  1050. nop __LINE__
  1051. }
  1052. { .mfb
  1053. nop __LINE__
  1054. FMA_B f113 = f32, f55, f113 // A1 * B8
  1055. nop __LINE__
  1056. }
  1057. ;;
  1058. { .mfb
  1059. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  1060. FMA f65 = f33, f48, f65 // A2 * B1
  1061. nop __LINE__
  1062. }
  1063. { .mfb
  1064. nop __LINE__
  1065. FMA_A f64 = f33, f49, f64 // A2 * B2
  1066. nop __LINE__
  1067. }
  1068. ;;
  1069. { .mfb
  1070. (p3) LDFPD f60, f61 = [BOFFSET], 2 * SIZE
  1071. FMA f81 = f33, f50, f81 // A2 * B3
  1072. nop __LINE__
  1073. }
  1074. { .mfb
  1075. nop __LINE__
  1076. FMA_A f80 = f33, f51, f80 // A2 * B4
  1077. nop __LINE__
  1078. }
  1079. ;;
  1080. { .mfb
  1081. (p3) LDFPD f62, f63 = [BOFFSET], 2 * SIZE
  1082. FMA f97 = f33, f52, f97 // A2 * B5
  1083. nop __LINE__
  1084. }
  1085. { .mfb
  1086. nop __LINE__
  1087. FMA_A f96 = f33, f53, f96 // A2 * B6
  1088. nop __LINE__
  1089. }
  1090. ;;
  1091. { .mfb
  1092. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  1093. FMA f113 = f33, f54, f113 // A2 * B7
  1094. nop __LINE__
  1095. }
  1096. { .mfb
  1097. nop __LINE__
  1098. FMA_A f112 = f33, f55, f112 // A2 * B8
  1099. nop __LINE__
  1100. }
  1101. ;;
  1102. { .mfb
  1103. nop __LINE__
  1104. FMA f66 = f34, f48, f66 // A3 * B1
  1105. nop __LINE__
  1106. }
  1107. { .mfb
  1108. nop __LINE__
  1109. FMA_B f67 = f34, f49, f67 // A3 * B2
  1110. nop __LINE__
  1111. }
  1112. ;;
  1113. { .mfb
  1114. nop __LINE__
  1115. FMA f82 = f34, f50, f82 // A3 * B3
  1116. nop __LINE__
  1117. }
  1118. { .mfb
  1119. nop __LINE__
  1120. FMA_B f83 = f34, f51, f83 // A3 * B4
  1121. nop __LINE__
  1122. }
  1123. ;;
  1124. { .mfb
  1125. nop __LINE__
  1126. FMA f98 = f34, f52, f98 // A3 * B5
  1127. nop __LINE__
  1128. }
  1129. { .mfb
  1130. nop __LINE__
  1131. FMA_B f99 = f34, f53, f99 // A3 * B6
  1132. nop __LINE__
  1133. }
  1134. ;;
  1135. { .mfb
  1136. nop __LINE__
  1137. FMA f114 = f34, f54, f114 // A3 * B7
  1138. nop __LINE__
  1139. }
  1140. { .mfb
  1141. nop __LINE__
  1142. FMA_B f115 = f34, f55, f115 // A3 * B8
  1143. nop __LINE__
  1144. }
  1145. ;;
  1146. { .mfb
  1147. nop __LINE__
  1148. FMA f67 = f35, f48, f67 // A4 * B1
  1149. nop __LINE__
  1150. }
  1151. { .mfb
  1152. nop __LINE__
  1153. FMA_A f66 = f35, f49, f66 // A4 * B2
  1154. nop __LINE__
  1155. }
  1156. ;;
  1157. { .mfb
  1158. nop __LINE__
  1159. FMA f83 = f35, f50, f83 // A4 * B3
  1160. nop __LINE__
  1161. }
  1162. { .mfb
  1163. nop __LINE__
  1164. FMA_A f82 = f35, f51, f82 // A4 * B4
  1165. nop __LINE__
  1166. }
  1167. ;;
  1168. { .mfb
  1169. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  1170. FMA f99 = f35, f52, f99 // A4 * B5
  1171. nop __LINE__
  1172. }
  1173. { .mfb
  1174. nop __LINE__
  1175. FMA_A f98 = f35, f53, f98 // A4 * B6
  1176. nop __LINE__
  1177. }
  1178. ;;
  1179. { .mfb
  1180. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  1181. FMA f115 = f35, f54, f115 // A4 * B7
  1182. nop __LINE__
  1183. }
  1184. { .mfb
  1185. nop __LINE__
  1186. FMA_A f114 = f35, f55, f114 // A4 * B8
  1187. nop __LINE__
  1188. }
  1189. ;;
  1190. { .mfb
  1191. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  1192. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  1193. nop __LINE__
  1194. }
  1195. { .mfb
  1196. nop __LINE__
  1197. (p3) FMA_B f65 = f40, f57, f65 // A1 * B2
  1198. nop __LINE__
  1199. }
  1200. ;;
  1201. { .mfb
  1202. (p4) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  1203. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  1204. nop __LINE__
  1205. }
  1206. { .mfb
  1207. nop __LINE__
  1208. (p3) FMA_B f81 = f40, f59, f81 // A1 * B4
  1209. nop __LINE__
  1210. }
  1211. ;;
  1212. { .mfb
  1213. nop __LINE__
  1214. (p3) FMA f96 = f40, f60, f96 // A1 * B5
  1215. nop __LINE__
  1216. }
  1217. { .mfb
  1218. nop __LINE__
  1219. (p3) FMA_B f97 = f40, f61, f97 // A1 * B6
  1220. nop __LINE__
  1221. }
  1222. ;;
  1223. { .mfb
  1224. nop __LINE__
  1225. (p3) FMA f112 = f40, f62, f112 // A1 * B7
  1226. nop __LINE__
  1227. }
  1228. { .mfb
  1229. nop __LINE__
  1230. (p3) FMA_B f113 = f40, f63, f113 // A1 * B8
  1231. nop __LINE__
  1232. }
  1233. ;;
  1234. { .mfb
  1235. (p4) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  1236. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  1237. nop __LINE__
  1238. }
  1239. { .mfb
  1240. (p3) FMA_A f64 = f41, f57, f64 // A2 * B2
  1241. nop __LINE__
  1242. }
  1243. { .mfb
  1244. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  1245. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  1246. nop __LINE__
  1247. }
  1248. { .mfb
  1249. (p3) FMA_A f80 = f41, f59, f80 // A2 * B4
  1250. nop __LINE__
  1251. }
  1252. ;;
  1253. { .mfb
  1254. nop __LINE__
  1255. (p3) FMA f97 = f41, f60, f97 // A2 * B5
  1256. nop __LINE__
  1257. }
  1258. { .mfb
  1259. nop __LINE__
  1260. (p3) FMA_A f96 = f41, f61, f96 // A2 * B6
  1261. nop __LINE__
  1262. }
  1263. ;;
  1264. { .mfb
  1265. nop __LINE__
  1266. (p3) FMA f113 = f41, f62, f113 // A2 * B7
  1267. nop __LINE__
  1268. }
  1269. { .mfb
  1270. nop __LINE__
  1271. (p3) FMA_A f112 = f41, f63, f112 // A2 * B8
  1272. nop __LINE__
  1273. }
  1274. ;;
  1275. { .mfb
  1276. nop __LINE__
  1277. (p3) FMA f66 = f42, f56, f66 // A3 * B1
  1278. nop __LINE__
  1279. }
  1280. { .mfb
  1281. nop __LINE__
  1282. (p3) FMA_B f67 = f42, f57, f67 // A3 * B2
  1283. nop __LINE__
  1284. }
  1285. ;;
  1286. { .mfb
  1287. nop __LINE__
  1288. (p3) FMA f82 = f42, f58, f82 // A3 * B3
  1289. nop __LINE__
  1290. }
  1291. { .mfb
  1292. nop __LINE__
  1293. (p3) FMA_B f83 = f42, f59, f83 // A3 * B4
  1294. nop __LINE__
  1295. }
  1296. ;;
  1297. { .mfb
  1298. nop __LINE__
  1299. (p3) FMA f98 = f42, f60, f98 // A3 * B5
  1300. nop __LINE__
  1301. }
  1302. { .mfb
  1303. nop __LINE__
  1304. (p3) FMA_B f99 = f42, f61, f99 // A3 * B6
  1305. nop __LINE__
  1306. }
  1307. ;;
  1308. { .mfb
  1309. nop __LINE__
  1310. (p3) FMA f114 = f42, f62, f114 // A3 * B7
  1311. nop __LINE__
  1312. }
  1313. { .mfb
  1314. nop __LINE__
  1315. (p3) FMA_B f115 = f42, f63, f115 // A3 * B8
  1316. nop __LINE__
  1317. }
  1318. ;;
  1319. { .mfb
  1320. nop __LINE__
  1321. (p3) FMA f67 = f43, f56, f67 // A4 * B1
  1322. nop __LINE__
  1323. }
  1324. { .mfb
  1325. nop __LINE__
  1326. (p3) FMA_A f66 = f43, f57, f66 // A4 * B2
  1327. nop __LINE__
  1328. }
  1329. ;;
  1330. { .mfb
  1331. nop __LINE__
  1332. (p3) FMA f83 = f43, f58, f83 // A4 * B3
  1333. nop __LINE__
  1334. }
  1335. { .mfb
  1336. nop __LINE__
  1337. (p3) FMA_A f82 = f43, f59, f82 // A4 * B4
  1338. nop __LINE__
  1339. }
  1340. ;;
  1341. { .mfb
  1342. nop __LINE__
  1343. (p3) FMA f99 = f43, f60, f99 // A4 * B5
  1344. nop __LINE__
  1345. }
  1346. { .mfb
  1347. nop __LINE__
  1348. (p3) FMA_A f98 = f43, f61, f98 // A4 * B6
  1349. nop __LINE__
  1350. }
  1351. ;;
  1352. { .mfi
  1353. nop __LINE__
  1354. (p3) FMA f115 = f43, f62, f115 // A4 * B7
  1355. adds L = -1, L
  1356. }
  1357. { .mfb
  1358. nop __LINE__
  1359. (p3) FMA_A f114 = f43, f63, f114 // A4 * B8
  1360. br.cloop.sptk.few .L022
  1361. }
  1362. ;;
  1363. .L028:
  1364. #if defined(LN) || defined(RT)
  1365. #ifdef LN
  1366. adds r2 = -2, KK
  1367. #else
  1368. adds r2 = -4, KK
  1369. #endif
  1370. ;;
  1371. shladd r2 = r2, ZBASE_SHIFT, r0
  1372. ;;
  1373. shladd AOFFSET = r2, 1, AORIG
  1374. shladd BOFFSET = r2, 2, B
  1375. ;;
  1376. #endif
  1377. #if defined(LN) || defined(LT)
  1378. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  1379. ;;
  1380. LDFPD f74, f75 = [BOFFSET], 2 * SIZE
  1381. ;;
  1382. LDFPD f88, f89 = [BOFFSET], 2 * SIZE
  1383. ;;
  1384. LDFPD f90, f91 = [BOFFSET], 2 * SIZE
  1385. ;;
  1386. LDFPD f104, f105 = [BOFFSET], 2 * SIZE
  1387. ;;
  1388. LDFPD f106, f107 = [BOFFSET], 2 * SIZE
  1389. ;;
  1390. { .mfi
  1391. LDFPD f120, f121 = [BOFFSET], 2 * SIZE
  1392. FSUB f64 = f72, f64
  1393. nop __LINE__
  1394. }
  1395. { .mfi
  1396. nop __LINE__
  1397. FSUB_A f65 = f73, f65
  1398. nop __LINE__
  1399. }
  1400. ;;
  1401. { .mfi
  1402. LDFPD f122, f123 = [BOFFSET]
  1403. FSUB f80 = f74, f80
  1404. adds BOFFSET = -14 * SIZE, BOFFSET
  1405. }
  1406. { .mfi
  1407. nop __LINE__
  1408. FSUB_A f81 = f75, f81
  1409. nop __LINE__
  1410. }
  1411. ;;
  1412. { .mfi
  1413. nop __LINE__
  1414. FSUB f96 = f88, f96
  1415. nop __LINE__
  1416. }
  1417. { .mfi
  1418. nop __LINE__
  1419. FSUB_A f97 = f89, f97
  1420. nop __LINE__
  1421. }
  1422. ;;
  1423. { .mfi
  1424. nop __LINE__
  1425. FSUB f112 = f90, f112
  1426. nop __LINE__
  1427. }
  1428. { .mfi
  1429. nop __LINE__
  1430. FSUB_A f113 = f91, f113
  1431. nop __LINE__
  1432. }
  1433. ;;
  1434. { .mfi
  1435. nop __LINE__
  1436. FSUB f66 = f104, f66
  1437. nop __LINE__
  1438. }
  1439. { .mfi
  1440. nop __LINE__
  1441. FSUB_A f67 = f105, f67
  1442. nop __LINE__
  1443. }
  1444. ;;
  1445. { .mfi
  1446. nop __LINE__
  1447. FSUB f82 = f106, f82
  1448. nop __LINE__
  1449. }
  1450. { .mfi
  1451. nop __LINE__
  1452. FSUB_A f83 = f107, f83
  1453. nop __LINE__
  1454. }
  1455. ;;
  1456. { .mfi
  1457. nop __LINE__
  1458. FSUB f98 = f120, f98
  1459. nop __LINE__
  1460. }
  1461. { .mfi
  1462. nop __LINE__
  1463. FSUB_A f99 = f121, f99
  1464. nop __LINE__
  1465. }
  1466. ;;
  1467. { .mfi
  1468. nop __LINE__
  1469. FSUB f114 = f122, f114
  1470. nop __LINE__
  1471. }
  1472. { .mfi
  1473. nop __LINE__
  1474. FSUB_A f115 = f123, f115
  1475. nop __LINE__
  1476. }
  1477. ;;
  1478. #else
  1479. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  1480. ;;
  1481. LDFPD f74, f75 = [AOFFSET], 2 * SIZE
  1482. ;;
  1483. LDFPD f88, f89 = [AOFFSET], 2 * SIZE
  1484. ;;
  1485. LDFPD f90, f91 = [AOFFSET], 2 * SIZE
  1486. ;;
  1487. LDFPD f104, f105 = [AOFFSET], 2 * SIZE
  1488. ;;
  1489. LDFPD f106, f107 = [AOFFSET], 2 * SIZE
  1490. ;;
  1491. { .mfi
  1492. LDFPD f120, f121 = [AOFFSET], 2 * SIZE
  1493. FSUB f64 = f72, f64
  1494. nop __LINE__
  1495. }
  1496. { .mfi
  1497. nop __LINE__
  1498. FSUB f65 = f73, f65
  1499. nop __LINE__
  1500. }
  1501. ;;
  1502. { .mfi
  1503. LDFPD f122, f123 = [AOFFSET]
  1504. FSUB f66 = f74, f66
  1505. adds AOFFSET = -14 * SIZE, AOFFSET
  1506. }
  1507. { .mfi
  1508. nop __LINE__
  1509. FSUB f67 = f75, f67
  1510. nop __LINE__
  1511. }
  1512. ;;
  1513. { .mfi
  1514. nop __LINE__
  1515. FSUB f80 = f88, f80
  1516. nop __LINE__
  1517. }
  1518. { .mfi
  1519. nop __LINE__
  1520. FSUB f81 = f89, f81
  1521. nop __LINE__
  1522. }
  1523. ;;
  1524. { .mfi
  1525. nop __LINE__
  1526. FSUB f82 = f90, f82
  1527. nop __LINE__
  1528. }
  1529. { .mfi
  1530. nop __LINE__
  1531. FSUB f83 = f91, f83
  1532. nop __LINE__
  1533. }
  1534. ;;
  1535. { .mfi
  1536. nop __LINE__
  1537. FSUB f96 = f104, f96
  1538. nop __LINE__
  1539. }
  1540. { .mfi
  1541. nop __LINE__
  1542. FSUB f97 = f105, f97
  1543. nop __LINE__
  1544. }
  1545. ;;
  1546. { .mfi
  1547. nop __LINE__
  1548. FSUB f98 = f106, f98
  1549. nop __LINE__
  1550. }
  1551. { .mfi
  1552. nop __LINE__
  1553. FSUB f99 = f107, f99
  1554. nop __LINE__
  1555. }
  1556. ;;
  1557. { .mfi
  1558. nop __LINE__
  1559. FSUB f112 = f120, f112
  1560. nop __LINE__
  1561. }
  1562. { .mfi
  1563. nop __LINE__
  1564. FSUB f113 = f121, f113
  1565. nop __LINE__
  1566. }
  1567. ;;
  1568. { .mfi
  1569. nop __LINE__
  1570. FSUB f114 = f122, f114
  1571. nop __LINE__
  1572. }
  1573. { .mfi
  1574. nop __LINE__
  1575. FSUB f115 = f123, f115
  1576. nop __LINE__
  1577. }
  1578. ;;
  1579. #endif
  1580. #ifdef LN
  1581. adds AOFFSET = 6 * SIZE, AOFFSET
  1582. ;;
  1583. LDFPD f104, f105 = [AOFFSET]
  1584. adds AOFFSET = - 2 * SIZE, AOFFSET
  1585. ;;
  1586. LDFPD f106, f107 = [AOFFSET]
  1587. adds AOFFSET = - 4 * SIZE, AOFFSET
  1588. ;;
  1589. LDFPD f120, f121 = [AOFFSET]
  1590. ;;
  1591. FMPY f32 = f104, f66
  1592. FMPY f33 = f105, f66
  1593. FMPY f34 = f104, f82
  1594. FMPY f35 = f105, f82
  1595. FMPY f36 = f104, f98
  1596. FMPY f37 = f105, f98
  1597. FMPY f38 = f104, f114
  1598. FMPY f39 = f105, f114
  1599. ;;
  1600. FMA_C f66 = f105, f67, f32
  1601. FMA_D f67 = f104, f67, f33
  1602. FMA_C f82 = f105, f83, f34
  1603. FMA_D f83 = f104, f83, f35
  1604. FMA_C f98 = f105, f99, f36
  1605. FMA_D f99 = f104, f99, f37
  1606. FMA_C f114 = f105, f115, f38
  1607. FMA_D f115 = f104, f115, f39
  1608. ;;
  1609. FNMA f64 = f106, f66, f64
  1610. FMA_A f65 = f107, f66, f65
  1611. FNMA f80 = f106, f82, f80
  1612. FMA_A f81 = f107, f82, f81
  1613. FNMA f96 = f106, f98, f96
  1614. FMA_A f97 = f107, f98, f97
  1615. FNMA f112 = f106, f114, f112
  1616. FMA_A f113 = f107, f114, f113
  1617. ;;
  1618. FMA_B f64 = f107, f67, f64
  1619. FNMA f65 = f106, f67, f65
  1620. FMA_B f80 = f107, f83, f80
  1621. FNMA f81 = f106, f83, f81
  1622. FMA_B f96 = f107, f99, f96
  1623. FNMA f97 = f106, f99, f97
  1624. FMA_B f112 = f107, f115, f112
  1625. FNMA f113 = f106, f115, f113
  1626. ;;
  1627. FMPY f32 = f120, f64
  1628. FMPY f33 = f121, f64
  1629. FMPY f34 = f120, f80
  1630. FMPY f35 = f121, f80
  1631. FMPY f36 = f120, f96
  1632. FMPY f37 = f121, f96
  1633. FMPY f38 = f120, f112
  1634. FMPY f39 = f121, f112
  1635. ;;
  1636. FMA_C f64 = f121, f65, f32
  1637. FMA_D f65 = f120, f65, f33
  1638. FMA_C f80 = f121, f81, f34
  1639. FMA_D f81 = f120, f81, f35
  1640. FMA_C f96 = f121, f97, f36
  1641. FMA_D f97 = f120, f97, f37
  1642. FMA_C f112 = f121, f113, f38
  1643. FMA_D f113 = f120, f113, f39
  1644. ;;
  1645. #endif
  1646. #ifdef LT
  1647. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  1648. ;;
  1649. LDFPD f74, f75 = [AOFFSET]
  1650. adds AOFFSET = 4 * SIZE, AOFFSET
  1651. ;;
  1652. LDFPD f90, f91 = [AOFFSET]
  1653. adds AOFFSET = - 6 * SIZE, AOFFSET
  1654. ;;
  1655. FMPY f32 = f72, f64
  1656. FMPY f33 = f73, f64
  1657. FMPY f34 = f72, f80
  1658. FMPY f35 = f73, f80
  1659. FMPY f36 = f72, f96
  1660. FMPY f37 = f73, f96
  1661. FMPY f38 = f72, f112
  1662. FMPY f39 = f73, f112
  1663. ;;
  1664. FMA_C f64 = f73, f65, f32
  1665. FMA_D f65 = f72, f65, f33
  1666. FMA_C f80 = f73, f81, f34
  1667. FMA_D f81 = f72, f81, f35
  1668. FMA_C f96 = f73, f97, f36
  1669. FMA_D f97 = f72, f97, f37
  1670. FMA_C f112 = f73, f113, f38
  1671. FMA_D f113 = f72, f113, f39
  1672. ;;
  1673. FNMA f66 = f74, f64, f66
  1674. FMA_A f67 = f75, f64, f67
  1675. FNMA f82 = f74, f80, f82
  1676. FMA_A f83 = f75, f80, f83
  1677. FNMA f98 = f74, f96, f98
  1678. FMA_A f99 = f75, f96, f99
  1679. FNMA f114 = f74, f112, f114
  1680. FMA_A f115 = f75, f112, f115
  1681. ;;
  1682. FMA_B f66 = f75, f65, f66
  1683. FNMA f67 = f74, f65, f67
  1684. FMA_B f82 = f75, f81, f82
  1685. FNMA f83 = f74, f81, f83
  1686. FMA_B f98 = f75, f97, f98
  1687. FNMA f99 = f74, f97, f99
  1688. FMA_B f114 = f75, f113, f114
  1689. FNMA f115 = f74, f113, f115
  1690. ;;
  1691. FMPY f32 = f90, f66
  1692. FMPY f33 = f91, f66
  1693. FMPY f34 = f90, f82
  1694. FMPY f35 = f91, f82
  1695. FMPY f36 = f90, f98
  1696. FMPY f37 = f91, f98
  1697. FMPY f38 = f90, f114
  1698. FMPY f39 = f91, f114
  1699. ;;
  1700. FMA_C f66 = f91, f67, f32
  1701. FMA_D f67 = f90, f67, f33
  1702. FMA_C f82 = f91, f83, f34
  1703. FMA_D f83 = f90, f83, f35
  1704. FMA_C f98 = f91, f99, f36
  1705. FMA_D f99 = f90, f99, f37
  1706. FMA_C f114 = f91, f115, f38
  1707. FMA_D f115 = f90, f115, f39
  1708. ;;
  1709. #endif
  1710. #ifdef RN
  1711. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  1712. ;;
  1713. LDFPD f74, f75 = [BOFFSET], 2 * SIZE
  1714. ;;
  1715. LDFPD f76, f77 = [BOFFSET], 2 * SIZE
  1716. ;;
  1717. LDFPD f78, f79 = [BOFFSET]
  1718. adds BOFFSET = 4 * SIZE, BOFFSET
  1719. ;;
  1720. LDFPD f90, f91 = [BOFFSET], 2 * SIZE
  1721. ;;
  1722. LDFPD f92, f93 = [BOFFSET], 2 * SIZE
  1723. ;;
  1724. LDFPD f94, f95 = [BOFFSET]
  1725. adds BOFFSET = 6 * SIZE, BOFFSET
  1726. ;;
  1727. LDFPD f108, f109 = [BOFFSET], 2 * SIZE
  1728. ;;
  1729. LDFPD f110, f111 = [BOFFSET]
  1730. adds BOFFSET = 8 * SIZE, BOFFSET
  1731. ;;
  1732. LDFPD f126, f127 = [BOFFSET]
  1733. adds BOFFSET = - 30 * SIZE, BOFFSET
  1734. ;;
  1735. FMPY f32 = f72, f64
  1736. FMPY f33 = f73, f64
  1737. FMPY f34 = f72, f66
  1738. FMPY f35 = f73, f66
  1739. ;;
  1740. FMA_C f64 = f73, f65, f32
  1741. FMA_D f65 = f72, f65, f33
  1742. FMA_C f66 = f73, f67, f34
  1743. FMA_D f67 = f72, f67, f35
  1744. ;;
  1745. FNMA f80 = f74, f64, f80
  1746. FMA_A f81 = f75, f64, f81
  1747. FNMA f82 = f74, f66, f82
  1748. FMA_A f83 = f75, f66, f83
  1749. ;;
  1750. FMA_B f80 = f75, f65, f80
  1751. FNMA f81 = f74, f65, f81
  1752. FMA_B f82 = f75, f67, f82
  1753. FNMA f83 = f74, f67, f83
  1754. ;;
  1755. FNMA f96 = f76, f64, f96
  1756. FMA_A f97 = f77, f64, f97
  1757. FNMA f98 = f76, f66, f98
  1758. FMA_A f99 = f77, f66, f99
  1759. ;;
  1760. FMA_B f96 = f77, f65, f96
  1761. FNMA f97 = f76, f65, f97
  1762. FMA_B f98 = f77, f67, f98
  1763. FNMA f99 = f76, f67, f99
  1764. ;;
  1765. FNMA f112 = f78, f64, f112
  1766. FMA_A f113 = f79, f64, f113
  1767. FNMA f114 = f78, f66, f114
  1768. FMA_A f115 = f79, f66, f115
  1769. ;;
  1770. FMA_B f112 = f79, f65, f112
  1771. FNMA f113 = f78, f65, f113
  1772. FMA_B f114 = f79, f67, f114
  1773. FNMA f115 = f78, f67, f115
  1774. ;;
  1775. FMPY f32 = f90, f80
  1776. FMPY f33 = f91, f80
  1777. FMPY f34 = f90, f82
  1778. FMPY f35 = f91, f82
  1779. ;;
  1780. FMA_C f80 = f91, f81, f32
  1781. FMA_D f81 = f90, f81, f33
  1782. FMA_C f82 = f91, f83, f34
  1783. FMA_D f83 = f90, f83, f35
  1784. ;;
  1785. FNMA f96 = f92, f80, f96
  1786. FMA_A f97 = f93, f80, f97
  1787. FNMA f98 = f92, f82, f98
  1788. FMA_A f99 = f93, f82, f99
  1789. ;;
  1790. FMA_B f96 = f93, f81, f96
  1791. FNMA f97 = f92, f81, f97
  1792. FMA_B f98 = f93, f83, f98
  1793. FNMA f99 = f92, f83, f99
  1794. ;;
  1795. FNMA f112 = f94, f80, f112
  1796. FMA_A f113 = f95, f80, f113
  1797. FNMA f114 = f94, f82, f114
  1798. FMA_A f115 = f95, f82, f115
  1799. ;;
  1800. FMA_B f112 = f95, f81, f112
  1801. FNMA f113 = f94, f81, f113
  1802. FMA_B f114 = f95, f83, f114
  1803. FNMA f115 = f94, f83, f115
  1804. ;;
  1805. FMPY f32 = f108, f96
  1806. FMPY f33 = f109, f96
  1807. FMPY f34 = f108, f98
  1808. FMPY f35 = f109, f98
  1809. ;;
  1810. FMA_C f96 = f109, f97, f32
  1811. FMA_D f97 = f108, f97, f33
  1812. FMA_C f98 = f109, f99, f34
  1813. FMA_D f99 = f108, f99, f35
  1814. ;;
  1815. FNMA f112 = f110, f96, f112
  1816. FMA_A f113 = f111, f96, f113
  1817. FNMA f114 = f110, f98, f114
  1818. FMA_A f115 = f111, f98, f115
  1819. ;;
  1820. FMA_B f112 = f111, f97, f112
  1821. FNMA f113 = f110, f97, f113
  1822. FMA_B f114 = f111, f99, f114
  1823. FNMA f115 = f110, f99, f115
  1824. ;;
  1825. FMPY f32 = f126, f112
  1826. FMPY f33 = f127, f112
  1827. FMPY f34 = f126, f114
  1828. FMPY f35 = f127, f114
  1829. ;;
  1830. FMA_C f112 = f127, f113, f32
  1831. FMA_D f113 = f126, f113, f33
  1832. FMA_C f114 = f127, f115, f34
  1833. FMA_D f115 = f126, f115, f35
  1834. ;;
  1835. #endif
  1836. #ifdef RT
  1837. adds BOFFSET = 30 * SIZE, BOFFSET
  1838. ;;
  1839. LDFPD f72, f73 = [BOFFSET]
  1840. adds BOFFSET = - 2 * SIZE, BOFFSET
  1841. ;;
  1842. LDFPD f74, f75 = [BOFFSET]
  1843. adds BOFFSET = - 2 * SIZE, BOFFSET
  1844. ;;
  1845. LDFPD f76, f77 = [BOFFSET]
  1846. adds BOFFSET = - 2 * SIZE, BOFFSET
  1847. ;;
  1848. LDFPD f78, f79 = [BOFFSET]
  1849. adds BOFFSET = - 4 * SIZE, BOFFSET
  1850. ;;
  1851. LDFPD f88, f89 = [BOFFSET]
  1852. adds BOFFSET = - 2 * SIZE, BOFFSET
  1853. ;;
  1854. LDFPD f90, f91 = [BOFFSET]
  1855. adds BOFFSET = - 2 * SIZE, BOFFSET
  1856. ;;
  1857. LDFPD f92, f93 = [BOFFSET]
  1858. adds BOFFSET = - 6 * SIZE, BOFFSET
  1859. ;;
  1860. LDFPD f104, f105 = [BOFFSET]
  1861. adds BOFFSET = - 2 * SIZE, BOFFSET
  1862. ;;
  1863. LDFPD f106, f107 = [BOFFSET]
  1864. adds BOFFSET = - 8 * SIZE, BOFFSET
  1865. ;;
  1866. LDFPD f120, f121 = [BOFFSET]
  1867. ;;
  1868. FMPY f32 = f72, f112
  1869. FMPY f33 = f73, f112
  1870. FMPY f34 = f72, f114
  1871. FMPY f35 = f73, f114
  1872. ;;
  1873. FMA_C f112 = f73, f113, f32
  1874. FMA_D f113 = f72, f113, f33
  1875. FMA_C f114 = f73, f115, f34
  1876. FMA_D f115 = f72, f115, f35
  1877. ;;
  1878. FNMA f96 = f74, f112, f96
  1879. FMA_A f97 = f75, f112, f97
  1880. FNMA f98 = f74, f114, f98
  1881. FMA_A f99 = f75, f114, f99
  1882. ;;
  1883. FMA_B f96 = f75, f113, f96
  1884. FNMA f97 = f74, f113, f97
  1885. FMA_B f98 = f75, f115, f98
  1886. FNMA f99 = f74, f115, f99
  1887. ;;
  1888. FNMA f80 = f76, f112, f80
  1889. FMA_A f81 = f77, f112, f81
  1890. FNMA f82 = f76, f114, f82
  1891. FMA_A f83 = f77, f114, f83
  1892. ;;
  1893. FMA_B f80 = f77, f113, f80
  1894. FNMA f81 = f76, f113, f81
  1895. FMA_B f82 = f77, f115, f82
  1896. FNMA f83 = f76, f115, f83
  1897. ;;
  1898. FNMA f64 = f78, f112, f64
  1899. FMA_A f65 = f79, f112, f65
  1900. FNMA f66 = f78, f114, f66
  1901. FMA_A f67 = f79, f114, f67
  1902. ;;
  1903. FMA_B f64 = f79, f113, f64
  1904. FNMA f65 = f78, f113, f65
  1905. FMA_B f66 = f79, f115, f66
  1906. FNMA f67 = f78, f115, f67
  1907. ;;
  1908. FMPY f32 = f88, f96
  1909. FMPY f33 = f89, f96
  1910. FMPY f34 = f88, f98
  1911. FMPY f35 = f89, f98
  1912. ;;
  1913. FMA_C f96 = f89, f97, f32
  1914. FMA_D f97 = f88, f97, f33
  1915. FMA_C f98 = f89, f99, f34
  1916. FMA_D f99 = f88, f99, f35
  1917. ;;
  1918. FNMA f80 = f90, f96, f80
  1919. FMA_A f81 = f91, f96, f81
  1920. FNMA f82 = f90, f98, f82
  1921. FMA_A f83 = f91, f98, f83
  1922. ;;
  1923. FMA_B f80 = f91, f97, f80
  1924. FNMA f81 = f90, f97, f81
  1925. FMA_B f82 = f91, f99, f82
  1926. FNMA f83 = f90, f99, f83
  1927. ;;
  1928. FNMA f64 = f92, f96, f64
  1929. FMA_A f65 = f93, f96, f65
  1930. FNMA f66 = f92, f98, f66
  1931. FMA_A f67 = f93, f98, f67
  1932. ;;
  1933. FMA_B f64 = f93, f97, f64
  1934. FNMA f65 = f92, f97, f65
  1935. FMA_B f66 = f93, f99, f66
  1936. FNMA f67 = f92, f99, f67
  1937. ;;
  1938. FMPY f32 = f104, f80
  1939. FMPY f33 = f105, f80
  1940. FMPY f34 = f104, f82
  1941. FMPY f35 = f105, f82
  1942. ;;
  1943. FMA_C f80 = f105, f81, f32
  1944. FMA_D f81 = f104, f81, f33
  1945. FMA_C f82 = f105, f83, f34
  1946. FMA_D f83 = f104, f83, f35
  1947. ;;
  1948. FNMA f64 = f106, f80, f64
  1949. FMA_A f65 = f107, f80, f65
  1950. FNMA f66 = f106, f82, f66
  1951. FMA_A f67 = f107, f82, f67
  1952. ;;
  1953. FMA_B f64 = f107, f81, f64
  1954. FNMA f65 = f106, f81, f65
  1955. FMA_B f66 = f107, f83, f66
  1956. FNMA f67 = f106, f83, f67
  1957. ;;
  1958. FMPY f32 = f120, f64
  1959. FMPY f33 = f121, f64
  1960. FMPY f34 = f120, f66
  1961. FMPY f35 = f121, f66
  1962. ;;
  1963. FMA_C f64 = f121, f65, f32
  1964. FMA_D f65 = f120, f65, f33
  1965. FMA_C f66 = f121, f67, f34
  1966. FMA_D f67 = f120, f67, f35
  1967. ;;
  1968. #endif
  1969. #if defined(LN) || defined(LT)
  1970. adds BOFFSET2 = 4 * SIZE, BOFFSET
  1971. ;;
  1972. STFD [BOFFSET] = f64, SIZE
  1973. STFD [BOFFSET2] = f96, SIZE
  1974. ;;
  1975. STFD [BOFFSET] = f65, SIZE
  1976. STFD [BOFFSET2] = f97, SIZE
  1977. ;;
  1978. STFD [BOFFSET] = f80, SIZE
  1979. STFD [BOFFSET2] = f112, SIZE
  1980. ;;
  1981. STFD [BOFFSET] = f81, 5 * SIZE
  1982. STFD [BOFFSET2] = f113, 5 * SIZE
  1983. ;;
  1984. STFD [BOFFSET] = f66, SIZE
  1985. STFD [BOFFSET2] = f98, SIZE
  1986. ;;
  1987. STFD [BOFFSET] = f67, SIZE
  1988. STFD [BOFFSET2] = f99, SIZE
  1989. ;;
  1990. STFD [BOFFSET] = f82, SIZE
  1991. STFD [BOFFSET2] = f114, SIZE
  1992. ;;
  1993. STFD [BOFFSET] = f83, 5 * SIZE
  1994. STFD [BOFFSET2] = f115, 5 * SIZE
  1995. ;;
  1996. adds BOFFSET = - 16 * SIZE, BOFFSET
  1997. ;;
  1998. #else
  1999. adds AOFFSET2 = 4 * SIZE, AOFFSET
  2000. ;;
  2001. STFD [AOFFSET] = f64, SIZE
  2002. STFD [AOFFSET2] = f80, SIZE
  2003. ;;
  2004. STFD [AOFFSET] = f65, SIZE
  2005. STFD [AOFFSET2] = f81, SIZE
  2006. ;;
  2007. STFD [AOFFSET] = f66, SIZE
  2008. STFD [AOFFSET2] = f82, SIZE
  2009. ;;
  2010. STFD [AOFFSET] = f67, 5 * SIZE
  2011. STFD [AOFFSET2] = f83, 5 * SIZE
  2012. ;;
  2013. STFD [AOFFSET] = f96, SIZE
  2014. STFD [AOFFSET2] = f112, SIZE
  2015. ;;
  2016. STFD [AOFFSET] = f97, SIZE
  2017. STFD [AOFFSET2] = f113, SIZE
  2018. ;;
  2019. STFD [AOFFSET] = f98, SIZE
  2020. STFD [AOFFSET2] = f114, SIZE
  2021. ;;
  2022. STFD [AOFFSET] = f99, 5 * SIZE
  2023. STFD [AOFFSET2] = f115, 5 * SIZE
  2024. ;;
  2025. adds AOFFSET = - 16 * SIZE, AOFFSET
  2026. ;;
  2027. #endif
  2028. #ifdef LN
  2029. adds C1 = -4 * SIZE, C1
  2030. adds C2 = -4 * SIZE, C2
  2031. adds C3 = -4 * SIZE, C3
  2032. adds C4 = -4 * SIZE, C4
  2033. #endif
  2034. ;;
  2035. STFD [C1 ] = f64, SIZE
  2036. ;;
  2037. STFD [C1 ] = f65, SIZE
  2038. ;;
  2039. STFD [C1 ] = f66, SIZE
  2040. ;;
  2041. STFD [C1 ] = f67, SIZE
  2042. ;;
  2043. STFD [C2 ] = f80, SIZE
  2044. ;;
  2045. STFD [C2 ] = f81, SIZE
  2046. ;;
  2047. STFD [C2 ] = f82, SIZE
  2048. ;;
  2049. STFD [C2 ] = f83, SIZE
  2050. ;;
  2051. STFD [C3 ] = f96, SIZE
  2052. ;;
  2053. STFD [C3 ] = f97, SIZE
  2054. ;;
  2055. STFD [C3 ] = f98, SIZE
  2056. ;;
  2057. STFD [C3 ] = f99, SIZE
  2058. ;;
  2059. STFD [C4 ] = f112, SIZE
  2060. ;;
  2061. STFD [C4 ] = f113, SIZE
  2062. ;;
  2063. STFD [C4 ] = f114, SIZE
  2064. ;;
  2065. STFD [C4 ] = f115, SIZE
  2066. ;;
  2067. mov f64 = f0
  2068. mov f65 = f0
  2069. mov f80 = f0
  2070. mov f81 = f0
  2071. mov f96 = f0
  2072. mov f97 = f0
  2073. mov f112 = f0
  2074. mov f113 = f0
  2075. ;;
  2076. #ifdef LN
  2077. adds C1 = -4 * SIZE, C1
  2078. adds C2 = -4 * SIZE, C2
  2079. adds C3 = -4 * SIZE, C3
  2080. adds C4 = -4 * SIZE, C4
  2081. #endif
  2082. ;;
  2083. cmp.ne p6, p0 = 1, I
  2084. ;;
  2085. adds I = -1, I
  2086. ;;
  2087. shladd r2 = K, ZBASE_SHIFT, r0
  2088. ;;
  2089. sub L = K, KK
  2090. ;;
  2091. #ifdef RT
  2092. shladd AORIG = r2, 1, AORIG
  2093. #endif
  2094. ;;
  2095. #if defined(LT) || defined(RN)
  2096. shladd L = L, ZBASE_SHIFT, r0
  2097. ;;
  2098. shladd AOFFSET = L, 1, AOFFSET
  2099. shladd BOFFSET = L, 2, BOFFSET
  2100. #endif
  2101. ;;
  2102. #ifdef LT
  2103. adds KK = 2, KK
  2104. #elif defined LN
  2105. adds KK = -2, KK
  2106. #else
  2107. nop __LINE__
  2108. #endif
  2109. ;;
  2110. #if defined(LT) || defined(RN)
  2111. mov L = KK
  2112. #else
  2113. sub L = K, KK
  2114. #endif
  2115. ;;
  2116. .align 16
  2117. .L010x:
  2118. #if defined(LT) || defined(RN)
  2119. mov L = KK
  2120. #else
  2121. sub L = K, KK
  2122. #endif
  2123. shr I = M, 2
  2124. ;;
  2125. cmp.eq p6, p7 = 0, I
  2126. (p6) br.cond.dpnt .L049
  2127. ;;
  2128. .align 16
  2129. .L011:
  2130. { .mmi
  2131. cmp.ne p7, p0 = r0, L
  2132. adds BOFFSET = 0 * SIZE, B
  2133. shl r2 = K, 2 + ZBASE_SHIFT
  2134. }
  2135. { .mfi
  2136. shladd r3 = KK, ZBASE_SHIFT, r0
  2137. mov f118 = f0
  2138. nop __LINE__
  2139. }
  2140. ;;
  2141. #if defined(LT) || defined(RN)
  2142. { .mfb
  2143. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  2144. mov f66 = f0
  2145. nop __LINE__
  2146. }
  2147. { .mmf
  2148. nop __LINE__
  2149. nop __LINE__
  2150. mov f67 = f0
  2151. }
  2152. ;;
  2153. #else
  2154. { .mfi
  2155. shladd BOFFSET = r3, 2, B
  2156. mov f66 = f0
  2157. #ifdef LN
  2158. sub AORIG = AORIG, r2
  2159. #else
  2160. nop __LINE__
  2161. #endif
  2162. }
  2163. ;;
  2164. { .mfi
  2165. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  2166. mov f67 = f0
  2167. shladd AOFFSET = r3, 2, AORIG
  2168. }
  2169. ;;
  2170. #endif
  2171. ;;
  2172. { .mfi
  2173. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  2174. mov f82 = f0
  2175. nop __LINE__
  2176. }
  2177. { .mfi
  2178. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  2179. mov f83 = f0
  2180. adds PREC = CPREFETCHSIZE * SIZE, C1
  2181. }
  2182. ;;
  2183. { .mfi
  2184. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  2185. mov f98 = f0
  2186. adds L = 1, L
  2187. }
  2188. { .mfi
  2189. (p7) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  2190. mov f99 = f0
  2191. adds C5 = 4 * SIZE, C1
  2192. }
  2193. ;;
  2194. { .mfi
  2195. (p7) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  2196. mov f114 = f0
  2197. tbit.z p12, p0 = L, 0
  2198. }
  2199. { .mfi
  2200. (p7) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  2201. mov f115 = f0
  2202. adds C6 = 4 * SIZE, C2
  2203. }
  2204. ;;
  2205. { .mfi
  2206. (p7) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  2207. mov f68 = f0
  2208. shr L = L, 1
  2209. }
  2210. { .mfi
  2211. setf.d f86 = r0
  2212. mov f69 = f0
  2213. adds C7 = 4 * SIZE, C3
  2214. }
  2215. ;;
  2216. { .mfi
  2217. CPREFETCH [PREC], LDC
  2218. mov f84 = f0
  2219. adds L = -1, L
  2220. }
  2221. { .mfi
  2222. setf.d f87 = r0
  2223. mov f85 = f0
  2224. adds C8 = 4 * SIZE, C4
  2225. }
  2226. ;;
  2227. { .mfi
  2228. CPREFETCH [PREC], LDC
  2229. mov f100 = f0
  2230. mov ar.lc = L
  2231. }
  2232. { .mfi
  2233. setf.d f102 = r0
  2234. mov f101 = f0
  2235. cmp.eq p3, p0 = r0, r0
  2236. }
  2237. ;;
  2238. { .mfi
  2239. CPREFETCH [PREC], LDC
  2240. mov f116 = f0
  2241. adds PREA = (PREFETCHSIZE + 0) * SIZE, AOFFSET
  2242. }
  2243. { .mfi
  2244. setf.d f103 = r0
  2245. mov f117 = f0
  2246. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  2247. }
  2248. ;;
  2249. { .mfi
  2250. CPREFETCH [PREC]
  2251. mov f70 = f0
  2252. cmp.eq p6, p0 = -1, L
  2253. }
  2254. { .mfb
  2255. setf.d f119 = r0
  2256. mov f71 = f0
  2257. (p6) br.cond.dpnt .L018
  2258. }
  2259. ;;
  2260. .align 16
  2261. .L012:
  2262. /* 1 */
  2263. { .mfi
  2264. lfetch.nt1 [PREA], 16 * SIZE
  2265. FMA f64 = f32, f48, f64 // A1 * B1
  2266. nop __LINE__
  2267. }
  2268. { .mfb
  2269. (p12) cmp.ne p3, p0 = 0, L
  2270. FMA_B f65 = f32, f49, f65 // A1 * B2
  2271. nop __LINE__
  2272. }
  2273. ;;
  2274. /* 2 */
  2275. { .mfi
  2276. lfetch.nt1 [PREB], 16 * SIZE
  2277. FMA f80 = f32, f50, f80 // A1 * B3
  2278. nop __LINE__
  2279. }
  2280. { .mfb
  2281. cmp.ne p4, p5 = 0, L
  2282. FMA_B f81 = f32, f51, f81 // A1 * B4
  2283. nop __LINE__
  2284. }
  2285. ;;
  2286. /* 3 */
  2287. { .mfb
  2288. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  2289. FMA f96 = f32, f52, f96 // A1 * B5
  2290. nop __LINE__
  2291. }
  2292. { .mfb
  2293. FMA_B f97 = f32, f53, f97 // A1 * B6
  2294. nop __LINE__
  2295. }
  2296. ;;
  2297. /* 4 */
  2298. { .mfb
  2299. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  2300. FMA f112 = f32, f54, f112 // A1 * B7
  2301. nop __LINE__
  2302. }
  2303. { .mfb
  2304. FMA_B f113 = f32, f55, f113 // A1 * B8
  2305. nop __LINE__
  2306. }
  2307. ;;
  2308. /* 5 */
  2309. { .mfb
  2310. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  2311. FMA f65 = f33, f48, f65 // A2 * B1
  2312. nop __LINE__
  2313. }
  2314. { .mfb
  2315. FMA_A f64 = f33, f49, f64 // A2 * B2
  2316. nop __LINE__
  2317. }
  2318. ;;
  2319. /* 6 */
  2320. { .mfb
  2321. (p3) LDFPD f60, f61 = [BOFFSET], 2 * SIZE
  2322. FMA f81 = f33, f50, f81 // A2 * B3
  2323. nop __LINE__
  2324. }
  2325. { .mfb
  2326. FMA_A f80 = f33, f51, f80 // A2 * B4
  2327. nop __LINE__
  2328. }
  2329. ;;
  2330. /* 7 */
  2331. { .mfb
  2332. (p3) LDFPD f62, f63 = [BOFFSET], 2 * SIZE
  2333. FMA f97 = f33, f52, f97 // A2 * B5
  2334. nop __LINE__
  2335. }
  2336. { .mfb
  2337. FMA_A f96 = f33, f53, f96 // A2 * B6
  2338. nop __LINE__
  2339. }
  2340. ;;
  2341. /* 8 */
  2342. { .mfb
  2343. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  2344. FMA f113 = f33, f54, f113 // A2 * B7
  2345. nop __LINE__
  2346. }
  2347. { .mfb
  2348. FMA_A f112 = f33, f55, f112 // A2 * B8
  2349. nop __LINE__
  2350. }
  2351. ;;
  2352. /* 9 */
  2353. { .mfb
  2354. (p3) LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  2355. FMA f66 = f34, f48, f66 // A3 * B1
  2356. nop __LINE__
  2357. }
  2358. { .mfb
  2359. FMA_B f67 = f34, f49, f67 // A3 * B2
  2360. nop __LINE__
  2361. }
  2362. ;;
  2363. /* 10 */
  2364. { .mfb
  2365. (p3) LDFPD f46, f47 = [AOFFSET], 2 * SIZE
  2366. FMA f82 = f34, f50, f82 // A3 * B3
  2367. nop __LINE__
  2368. }
  2369. { .mfb
  2370. FMA_B f83 = f34, f51, f83 // A3 * B4
  2371. nop __LINE__
  2372. }
  2373. ;;
  2374. /* 11 */
  2375. { .mfb
  2376. FMA f98 = f34, f52, f98 // A3 * B5
  2377. nop __LINE__
  2378. }
  2379. { .mfb
  2380. nop __LINE__
  2381. FMA_B f99 = f34, f53, f99 // A3 * B6
  2382. nop __LINE__
  2383. }
  2384. ;;
  2385. /* 12 */
  2386. { .mfb
  2387. FMA f114 = f34, f54, f114 // A3 * B7
  2388. nop __LINE__
  2389. }
  2390. { .mfb
  2391. nop __LINE__
  2392. FMA_B f115 = f34, f55, f115 // A3 * B8
  2393. nop __LINE__
  2394. }
  2395. ;;
  2396. /* 13 */
  2397. { .mfb
  2398. nop __LINE__
  2399. FMA f67 = f35, f48, f67 // A4 * B1
  2400. }
  2401. { .mfb
  2402. nop __LINE__
  2403. FMA_A f66 = f35, f49, f66 // A4 * B2
  2404. nop __LINE__
  2405. }
  2406. ;;
  2407. /* 14 */
  2408. { .mfb
  2409. FMA f83 = f35, f50, f83 // A4 * B3
  2410. nop __LINE__
  2411. }
  2412. { .mfb
  2413. nop __LINE__
  2414. FMA_A f82 = f35, f51, f82 // A4 * B4
  2415. nop __LINE__
  2416. }
  2417. ;;
  2418. /* 15 */
  2419. { .mfb
  2420. FMA f99 = f35, f52, f99 // A4 * B5
  2421. nop __LINE__
  2422. }
  2423. { .mfb
  2424. nop __LINE__
  2425. FMA_A f98 = f35, f53, f98 // A4 * B6
  2426. nop __LINE__
  2427. }
  2428. ;;
  2429. /* 16 */
  2430. { .mfb
  2431. FMA f115 = f35, f54, f115 // A4 * B7
  2432. nop __LINE__
  2433. }
  2434. { .mfb
  2435. nop __LINE__
  2436. FMA_A f114 = f35, f55, f114 // A4 * B8
  2437. nop __LINE__
  2438. }
  2439. ;;
  2440. /* 17 */
  2441. { .mfb
  2442. nop __LINE__
  2443. FMA f68 = f36, f48, f68 // A5 * B1
  2444. nop __LINE__
  2445. }
  2446. { .mfb
  2447. nop __LINE__
  2448. FMA_B f69 = f36, f49, f69 // A5 * B2
  2449. nop __LINE__
  2450. }
  2451. ;;
  2452. /* 18 */
  2453. { .mfb
  2454. nop __LINE__
  2455. FMA f84 = f36, f50, f84 // A5 * B3
  2456. nop __LINE__
  2457. }
  2458. { .mfb
  2459. nop __LINE__
  2460. FMA_B f85 = f36, f51, f85 // A5 * B4
  2461. nop __LINE__
  2462. }
  2463. ;;
  2464. /* 19 */
  2465. { .mfb
  2466. nop __LINE__
  2467. FMA f100 = f36, f52, f100 // A5 * B5
  2468. nop __LINE__
  2469. }
  2470. { .mfb
  2471. nop __LINE__
  2472. FMA_B f101 = f36, f53, f101 // A5 * B6
  2473. nop __LINE__
  2474. }
  2475. ;;
  2476. /* 20 */
  2477. { .mfb
  2478. nop __LINE__
  2479. FMA f116 = f36, f54, f116 // A5 * B7
  2480. nop __LINE__
  2481. }
  2482. { .mfb
  2483. nop __LINE__
  2484. FMA_B f117 = f36, f55, f117 // A5 * B8
  2485. nop __LINE__
  2486. }
  2487. ;;
  2488. /* 21 */
  2489. { .mfb
  2490. nop __LINE__
  2491. FMA f69 = f37, f48, f69 // A6 * B1
  2492. nop __LINE__
  2493. }
  2494. { .mfb
  2495. nop __LINE__
  2496. FMA_A f68 = f37, f49, f68 // A6 * B2
  2497. nop __LINE__
  2498. }
  2499. ;;
  2500. /* 22 */
  2501. { .mfb
  2502. nop __LINE__
  2503. FMA f85 = f37, f50, f85 // A6 * B3
  2504. nop __LINE__
  2505. }
  2506. { .mfb
  2507. nop __LINE__
  2508. FMA_A f84 = f37, f51, f84 // A6 * B4
  2509. nop __LINE__
  2510. }
  2511. ;;
  2512. /* 23 */
  2513. { .mfb
  2514. nop __LINE__
  2515. FMA f101 = f37, f52, f101 // A6 * B5
  2516. nop __LINE__
  2517. }
  2518. { .mfb
  2519. nop __LINE__
  2520. FMA_A f100 = f37, f53, f100 // A6 * B6
  2521. nop __LINE__
  2522. }
  2523. ;;
  2524. /* 24 */
  2525. { .mfb
  2526. nop __LINE__
  2527. FMA f117 = f37, f54, f117 // A6 * B7
  2528. nop __LINE__
  2529. }
  2530. { .mfb
  2531. nop __LINE__
  2532. FMA_A f116 = f37, f55, f116 // A6 * B8
  2533. nop __LINE__
  2534. }
  2535. ;;
  2536. /* 25 */
  2537. { .mfb
  2538. nop __LINE__
  2539. FMA f70 = f38, f48, f70 // A7 * B1
  2540. nop __LINE__
  2541. }
  2542. { .mfb
  2543. nop __LINE__
  2544. FMA_B f71 = f38, f49, f71 // A7 * B2
  2545. nop __LINE__
  2546. }
  2547. ;;
  2548. /* 26 */
  2549. { .mfb
  2550. nop __LINE__
  2551. FMA f86 = f38, f50, f86 // A7 * B3
  2552. nop __LINE__
  2553. }
  2554. { .mfb
  2555. nop __LINE__
  2556. FMA_B f87 = f38, f51, f87 // A7 * B4
  2557. nop __LINE__
  2558. }
  2559. ;;
  2560. /* 27 */
  2561. { .mfb
  2562. nop __LINE__
  2563. FMA f102 = f38, f52, f102 // A7 * B5
  2564. nop __LINE__
  2565. }
  2566. { .mfb
  2567. nop __LINE__
  2568. FMA_B f103 = f38, f53, f103 // A7 * B6
  2569. nop __LINE__
  2570. }
  2571. ;;
  2572. /* 28 */
  2573. { .mfb
  2574. nop __LINE__
  2575. FMA f118 = f38, f54, f118 // A7 * B7
  2576. nop __LINE__
  2577. }
  2578. { .mfb
  2579. nop __LINE__
  2580. FMA_B f119 = f38, f55, f119 // A7 * B8
  2581. nop __LINE__
  2582. }
  2583. ;;
  2584. /* 29 */
  2585. { .mfb
  2586. nop __LINE__
  2587. FMA f71 = f39, f48, f71 // A8 * B1
  2588. nop __LINE__
  2589. }
  2590. { .mfb
  2591. nop __LINE__
  2592. FMA_A f70 = f39, f49, f70 // A8 * B2
  2593. nop __LINE__
  2594. }
  2595. ;;
  2596. /* 30 */
  2597. { .mfb
  2598. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  2599. FMA f87 = f39, f50, f87 // A8 * B3
  2600. nop __LINE__
  2601. }
  2602. { .mfb
  2603. nop __LINE__
  2604. FMA_A f86 = f39, f51, f86 // A8 * B4
  2605. nop __LINE__
  2606. }
  2607. ;;
  2608. /* 31 */
  2609. { .mfb
  2610. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  2611. FMA f103 = f39, f52, f103 // A8 * B5
  2612. nop __LINE__
  2613. }
  2614. { .mfb
  2615. nop __LINE__
  2616. FMA_A f102 = f39, f53, f102 // A8 * B6
  2617. nop __LINE__
  2618. }
  2619. ;;
  2620. /* 32 */
  2621. { .mfb
  2622. nop __LINE__
  2623. FMA f119 = f39, f54, f119 // A8 * B7
  2624. nop __LINE__
  2625. }
  2626. { .mfb
  2627. nop __LINE__
  2628. FMA_A f118 = f39, f55, f118 // A8 * B8
  2629. nop __LINE__
  2630. }
  2631. ;;
  2632. /* 33 */
  2633. { .mfb
  2634. nop __LINE__
  2635. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  2636. nop __LINE__
  2637. }
  2638. { .mfb
  2639. nop __LINE__
  2640. (p3) FMA_B f65 = f40, f57, f65 // A1 * B2
  2641. nop __LINE__
  2642. }
  2643. ;;
  2644. /* 34 */
  2645. { .mfb
  2646. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  2647. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  2648. nop __LINE__
  2649. }
  2650. { .mfb
  2651. nop __LINE__
  2652. (p3) FMA_B f81 = f40, f59, f81 // A1 * B4
  2653. nop __LINE__
  2654. }
  2655. ;;
  2656. /* 35 */
  2657. { .mfb
  2658. (p4) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  2659. (p3) FMA f96 = f40, f60, f96 // A1 * B5
  2660. nop __LINE__
  2661. }
  2662. { .mfb
  2663. nop __LINE__
  2664. (p3) FMA_B f97 = f40, f61, f97 // A1 * B6
  2665. nop __LINE__
  2666. }
  2667. ;;
  2668. /* 36 */
  2669. { .mfb
  2670. (p4) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  2671. (p3) FMA f112 = f40, f62, f112 // A1 * B7
  2672. nop __LINE__
  2673. }
  2674. { .mfb
  2675. nop __LINE__
  2676. (p3) FMA_B f113 = f40, f63, f113 // A1 * B8
  2677. nop __LINE__
  2678. }
  2679. ;;
  2680. /* 37 */
  2681. { .mfb
  2682. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  2683. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  2684. nop __LINE__
  2685. }
  2686. { .mfb
  2687. nop __LINE__
  2688. (p3) FMA_A f64 = f41, f57, f64 // A2 * B2
  2689. nop __LINE__
  2690. }
  2691. ;;
  2692. /* 38 */
  2693. { .mfb
  2694. (p4) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  2695. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  2696. nop __LINE__
  2697. }
  2698. { .mfb
  2699. nop __LINE__
  2700. (p3) FMA_A f80 = f41, f59, f80 // A2 * B4
  2701. nop __LINE__
  2702. }
  2703. ;;
  2704. /* 39 */
  2705. { .mfb
  2706. (p4) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  2707. (p3) FMA f97 = f41, f60, f97 // A2 * B5
  2708. nop __LINE__
  2709. }
  2710. { .mfb
  2711. nop __LINE__
  2712. (p3) FMA_A f96 = f41, f61, f96 // A2 * B6
  2713. nop __LINE__
  2714. }
  2715. ;;
  2716. /* 40 */
  2717. { .mfb
  2718. nop __LINE__
  2719. (p3) FMA f113 = f41, f62, f113 // A2 * B7
  2720. nop __LINE__
  2721. }
  2722. { .mfb
  2723. nop __LINE__
  2724. (p3) FMA_A f112 = f41, f63, f112 // A2 * B8
  2725. nop __LINE__
  2726. }
  2727. ;;
  2728. /* 41 */
  2729. { .mfb
  2730. nop __LINE__
  2731. (p3) FMA f66 = f42, f56, f66 // A3 * B1
  2732. nop __LINE__
  2733. }
  2734. { .mfb
  2735. nop __LINE__
  2736. (p3) FMA_B f67 = f42, f57, f67 // A3 * B2
  2737. nop __LINE__
  2738. }
  2739. ;;
  2740. /* 42 */
  2741. { .mfb
  2742. nop __LINE__
  2743. (p3) FMA f82 = f42, f58, f82 // A3 * B3
  2744. nop __LINE__
  2745. }
  2746. { .mfb
  2747. nop __LINE__
  2748. (p3) FMA_B f83 = f42, f59, f83 // A3 * B4
  2749. nop __LINE__
  2750. }
  2751. ;;
  2752. /* 43 */
  2753. { .mfb
  2754. nop __LINE__
  2755. (p3) FMA f98 = f42, f60, f98 // A3 * B5
  2756. nop __LINE__
  2757. }
  2758. { .mfb
  2759. nop __LINE__
  2760. (p3) FMA_B f99 = f42, f61, f99 // A3 * B6
  2761. nop __LINE__
  2762. }
  2763. ;;
  2764. /* 44 */
  2765. { .mfb
  2766. nop __LINE__
  2767. (p3) FMA f114 = f42, f62, f114 // A3 * B7
  2768. nop __LINE__
  2769. }
  2770. { .mfb
  2771. nop __LINE__
  2772. (p3) FMA_B f115 = f42, f63, f115 // A3 * B8
  2773. nop __LINE__
  2774. }
  2775. ;;
  2776. /* 45 */
  2777. { .mfb
  2778. nop __LINE__
  2779. (p3) FMA f67 = f43, f56, f67 // A4 * B1
  2780. nop __LINE__
  2781. }
  2782. { .mfb
  2783. nop __LINE__
  2784. (p3) FMA_A f66 = f43, f57, f66 // A4 * B2
  2785. nop __LINE__
  2786. }
  2787. ;;
  2788. /* 46 */
  2789. { .mfb
  2790. nop __LINE__
  2791. (p3) FMA f83 = f43, f58, f83 // A4 * B3
  2792. nop __LINE__
  2793. }
  2794. { .mfb
  2795. nop __LINE__
  2796. (p3) FMA_A f82 = f43, f59, f82 // A4 * B4
  2797. nop __LINE__
  2798. }
  2799. ;;
  2800. /* 47 */
  2801. { .mfb
  2802. nop __LINE__
  2803. (p3) FMA f99 = f43, f60, f99 // A4 * B5
  2804. nop __LINE__
  2805. }
  2806. { .mfb
  2807. nop __LINE__
  2808. (p3) FMA_A f98 = f43, f61, f98 // A4 * B6
  2809. nop __LINE__
  2810. }
  2811. ;;
  2812. /* 48 */
  2813. { .mfb
  2814. nop __LINE__
  2815. (p3) FMA f115 = f43, f62, f115 // A4 * B7
  2816. nop __LINE__
  2817. }
  2818. { .mfb
  2819. nop __LINE__
  2820. (p3) FMA_A f114 = f43, f63, f114 // A4 * B8
  2821. nop __LINE__
  2822. }
  2823. ;;
  2824. /* 49 */
  2825. { .mfb
  2826. nop __LINE__
  2827. (p3) FMA f68 = f44, f56, f68 // A5 * B1
  2828. nop __LINE__
  2829. }
  2830. { .mfb
  2831. nop __LINE__
  2832. (p3) FMA_B f69 = f44, f57, f69 // A5 * B2
  2833. nop __LINE__
  2834. }
  2835. ;;
  2836. /* 50 */
  2837. { .mfb
  2838. nop __LINE__
  2839. (p3) FMA f84 = f44, f58, f84 // A5 * B3
  2840. nop __LINE__
  2841. }
  2842. { .mfb
  2843. nop __LINE__
  2844. (p3) FMA_B f85 = f44, f59, f85 // A5 * B4
  2845. nop __LINE__
  2846. }
  2847. ;;
  2848. /* 51 */
  2849. { .mfb
  2850. nop __LINE__
  2851. (p3) FMA f100 = f44, f60, f100 // A5 * B5
  2852. nop __LINE__
  2853. }
  2854. { .mfb
  2855. nop __LINE__
  2856. (p3) FMA_B f101 = f44, f61, f101 // A5 * B6
  2857. nop __LINE__
  2858. }
  2859. ;;
  2860. /* 52 */
  2861. { .mfb
  2862. nop __LINE__
  2863. (p3) FMA f116 = f44, f62, f116 // A5 * B7
  2864. nop __LINE__
  2865. }
  2866. { .mfb
  2867. nop __LINE__
  2868. (p3) FMA_B f117 = f44, f63, f117 // A5 * B8
  2869. nop __LINE__
  2870. }
  2871. ;;
  2872. /* 53 */
  2873. { .mfb
  2874. nop __LINE__
  2875. (p3) FMA f69 = f45, f56, f69 // A6 * B1
  2876. nop __LINE__
  2877. }
  2878. { .mfb
  2879. nop __LINE__
  2880. (p3) FMA_A f68 = f45, f57, f68 // A6 * B2
  2881. nop __LINE__
  2882. }
  2883. ;;
  2884. /* 54 */
  2885. { .mfb
  2886. nop __LINE__
  2887. (p3) FMA f85 = f45, f58, f85 // A6 * B3
  2888. nop __LINE__
  2889. }
  2890. { .mfb
  2891. nop __LINE__
  2892. (p3) FMA_A f84 = f45, f59, f84 // A6 * B4
  2893. nop __LINE__
  2894. }
  2895. ;;
  2896. /* 55 */
  2897. { .mfb
  2898. nop __LINE__
  2899. (p3) FMA f101 = f45, f60, f101 // A6 * B5
  2900. nop __LINE__
  2901. }
  2902. { .mfb
  2903. nop __LINE__
  2904. (p3) FMA_A f100 = f45, f61, f100 // A6 * B6
  2905. nop __LINE__
  2906. }
  2907. ;;
  2908. /* 56 */
  2909. { .mfb
  2910. nop __LINE__
  2911. (p3) FMA f117 = f45, f62, f117 // A6 * B7
  2912. nop __LINE__
  2913. }
  2914. { .mfb
  2915. nop __LINE__
  2916. (p3) FMA_A f116 = f45, f63, f116 // A6 * B8
  2917. nop __LINE__
  2918. }
  2919. ;;
  2920. /* 57 */
  2921. { .mfb
  2922. nop __LINE__
  2923. (p3) FMA f70 = f46, f56, f70 // A7 * B1
  2924. nop __LINE__
  2925. }
  2926. { .mfb
  2927. nop __LINE__
  2928. (p3) FMA_B f71 = f46, f57, f71 // A7 * B2
  2929. nop __LINE__
  2930. }
  2931. ;;
  2932. /* 58 */
  2933. { .mfb
  2934. nop __LINE__
  2935. (p3) FMA f86 = f46, f58, f86 // A7 * B3
  2936. nop __LINE__
  2937. }
  2938. { .mfb
  2939. nop __LINE__
  2940. (p3) FMA_B f87 = f46, f59, f87 // A7 * B4
  2941. nop __LINE__
  2942. }
  2943. ;;
  2944. /* 59 */
  2945. { .mfb
  2946. nop __LINE__
  2947. (p3) FMA f102 = f46, f60, f102 // A7 * B5
  2948. nop __LINE__
  2949. }
  2950. { .mfb
  2951. nop __LINE__
  2952. (p3) FMA_B f103 = f46, f61, f103 // A7 * B6
  2953. nop __LINE__
  2954. }
  2955. ;;
  2956. /* 60 */
  2957. { .mfb
  2958. nop __LINE__
  2959. (p3) FMA f118 = f46, f62, f118 // A7 * B7
  2960. nop __LINE__
  2961. }
  2962. { .mfb
  2963. nop __LINE__
  2964. (p3) FMA_B f119 = f46, f63, f119 // A7 * B8
  2965. nop __LINE__
  2966. }
  2967. ;;
  2968. /* 61 */
  2969. { .mfb
  2970. nop __LINE__
  2971. (p3) FMA f71 = f47, f56, f71 // A8 * B1
  2972. nop __LINE__
  2973. }
  2974. { .mfb
  2975. nop __LINE__
  2976. (p3) FMA_A f70 = f47, f57, f70 // A8 * B2
  2977. nop __LINE__
  2978. }
  2979. ;;
  2980. /* 62 */
  2981. { .mfb
  2982. nop __LINE__
  2983. (p3) FMA f87 = f47, f58, f87 // A8 * B3
  2984. nop __LINE__
  2985. }
  2986. { .mfb
  2987. nop __LINE__
  2988. (p3) FMA_A f86 = f47, f59, f86 // A8 * B4
  2989. nop __LINE__
  2990. }
  2991. ;;
  2992. /* 63 */
  2993. { .mfb
  2994. nop __LINE__
  2995. (p3) FMA f103 = f47, f60, f103 // A8 * B5
  2996. nop __LINE__
  2997. }
  2998. { .mfb
  2999. nop __LINE__
  3000. (p3) FMA_A f102 = f47, f61, f102 // A8 * B6
  3001. nop __LINE__
  3002. }
  3003. ;;
  3004. /* 64 */
  3005. { .mfi
  3006. nop __LINE__
  3007. (p3) FMA f119 = f47, f62, f119 // A8 * B7
  3008. adds L = -1, L
  3009. }
  3010. { .mfb
  3011. nop __LINE__
  3012. (p3) FMA_A f118 = f47, f63, f118 // A8 * B8
  3013. br.cloop.sptk.few .L012
  3014. }
  3015. ;;
  3016. .L018:
  3017. #if defined(LN) || defined(RT)
  3018. #ifdef LN
  3019. adds r2 = -4, KK
  3020. #else
  3021. adds r2 = -4, KK
  3022. #endif
  3023. ;;
  3024. shladd r2 = r2, ZBASE_SHIFT, r0
  3025. ;;
  3026. shladd AOFFSET = r2, 2, AORIG
  3027. shladd BOFFSET = r2, 2, B
  3028. ;;
  3029. #endif
  3030. #if defined(LN) || defined(LT)
  3031. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  3032. ;;
  3033. LDFPD f74, f75 = [BOFFSET], 2 * SIZE
  3034. ;;
  3035. LDFPD f76, f77 = [BOFFSET], 2 * SIZE
  3036. ;;
  3037. LDFPD f78, f79 = [BOFFSET], 2 * SIZE
  3038. ;;
  3039. LDFPD f88, f89 = [BOFFSET], 2 * SIZE
  3040. ;;
  3041. LDFPD f90, f91 = [BOFFSET], 2 * SIZE
  3042. ;;
  3043. LDFPD f92, f93 = [BOFFSET], 2 * SIZE
  3044. ;;
  3045. { .mfi
  3046. LDFPD f94, f95 = [BOFFSET], 2 * SIZE
  3047. FSUB f64 = f72, f64
  3048. nop __LINE__
  3049. }
  3050. { .mfi
  3051. nop __LINE__
  3052. FSUB_A f65 = f73, f65
  3053. nop __LINE__
  3054. }
  3055. ;;
  3056. { .mfi
  3057. LDFPD f104, f105 = [BOFFSET], 2 * SIZE
  3058. FSUB f80 = f74, f80
  3059. nop __LINE__
  3060. }
  3061. { .mfi
  3062. nop __LINE__
  3063. FSUB_A f81 = f75, f81
  3064. nop __LINE__
  3065. }
  3066. ;;
  3067. { .mfi
  3068. LDFPD f106, f107 = [BOFFSET], 2 * SIZE
  3069. FSUB f96 = f76, f96
  3070. nop __LINE__
  3071. }
  3072. { .mfi
  3073. nop __LINE__
  3074. FSUB_A f97 = f77, f97
  3075. nop __LINE__
  3076. }
  3077. ;;
  3078. { .mfi
  3079. LDFPD f108, f109 = [BOFFSET], 2 * SIZE
  3080. FSUB f112 = f78, f112
  3081. nop __LINE__
  3082. }
  3083. { .mfi
  3084. nop __LINE__
  3085. FSUB_A f113 = f79, f113
  3086. nop __LINE__
  3087. }
  3088. ;;
  3089. { .mfi
  3090. LDFPD f110, f111 = [BOFFSET], 2 * SIZE
  3091. FSUB f66 = f88, f66
  3092. nop __LINE__
  3093. }
  3094. { .mfi
  3095. nop __LINE__
  3096. FSUB_A f67 = f89, f67
  3097. nop __LINE__
  3098. }
  3099. ;;
  3100. { .mfi
  3101. LDFPD f120, f121 = [BOFFSET], 2 * SIZE
  3102. FSUB f82 = f90, f82
  3103. nop __LINE__
  3104. }
  3105. { .mfi
  3106. nop __LINE__
  3107. FSUB_A f83 = f91, f83
  3108. nop __LINE__
  3109. }
  3110. ;;
  3111. { .mfi
  3112. LDFPD f122, f123 = [BOFFSET], 2 * SIZE
  3113. FSUB f98 = f92, f98
  3114. nop __LINE__
  3115. }
  3116. { .mfi
  3117. nop __LINE__
  3118. FSUB_A f99 = f93, f99
  3119. nop __LINE__
  3120. }
  3121. ;;
  3122. { .mfi
  3123. LDFPD f124, f125 = [BOFFSET], 2 * SIZE
  3124. FSUB f114 = f94, f114
  3125. nop __LINE__
  3126. }
  3127. { .mfi
  3128. nop __LINE__
  3129. FSUB_A f115 = f95, f115
  3130. nop __LINE__
  3131. }
  3132. ;;
  3133. { .mfi
  3134. LDFPD f126, f127 = [BOFFSET]
  3135. FSUB f68 = f104, f68
  3136. adds BOFFSET = -30 * SIZE, BOFFSET
  3137. }
  3138. { .mfi
  3139. nop __LINE__
  3140. FSUB_A f69 = f105, f69
  3141. #ifdef LN
  3142. adds AOFFSET = 30 * SIZE, AOFFSET
  3143. #else
  3144. nop __LINE__
  3145. #endif
  3146. }
  3147. ;;
  3148. { .mfi
  3149. LDFPD f72, f73 = [AOFFSET]
  3150. FSUB f84 = f106, f84
  3151. #ifdef LN
  3152. adds AOFFSET = - 2 * SIZE, AOFFSET
  3153. #else
  3154. adds AOFFSET = 2 * SIZE, AOFFSET
  3155. #endif
  3156. }
  3157. { .mfi
  3158. nop __LINE__
  3159. FSUB_A f85 = f107, f85
  3160. nop __LINE__
  3161. }
  3162. ;;
  3163. { .mfi
  3164. LDFPD f74, f75 = [AOFFSET]
  3165. FSUB f100 = f108, f100
  3166. #ifdef LN
  3167. adds AOFFSET = - 2 * SIZE, AOFFSET
  3168. #else
  3169. adds AOFFSET = 2 * SIZE, AOFFSET
  3170. #endif
  3171. }
  3172. { .mfi
  3173. nop __LINE__
  3174. FSUB_A f101 = f109, f101
  3175. nop __LINE__
  3176. }
  3177. ;;
  3178. { .mfi
  3179. nop __LINE__
  3180. FSUB f116 = f110, f116
  3181. nop __LINE__
  3182. }
  3183. { .mfi
  3184. nop __LINE__
  3185. FSUB_A f117 = f111, f117
  3186. nop __LINE__
  3187. }
  3188. ;;
  3189. { .mfi
  3190. nop __LINE__
  3191. FSUB f70 = f120, f70
  3192. nop __LINE__
  3193. }
  3194. { .mfi
  3195. nop __LINE__
  3196. FSUB_A f71 = f121, f71
  3197. nop __LINE__
  3198. }
  3199. ;;
  3200. { .mfi
  3201. nop __LINE__
  3202. FSUB f86 = f122, f86
  3203. nop __LINE__
  3204. }
  3205. { .mfi
  3206. nop __LINE__
  3207. FSUB_A f87 = f123, f87
  3208. nop __LINE__
  3209. }
  3210. ;;
  3211. { .mfi
  3212. nop __LINE__
  3213. FSUB f102 = f124, f102
  3214. nop __LINE__
  3215. }
  3216. { .mfi
  3217. nop __LINE__
  3218. FSUB_A f103 = f125, f103
  3219. nop __LINE__
  3220. }
  3221. ;;
  3222. { .mfi
  3223. nop __LINE__
  3224. FSUB f118 = f126, f118
  3225. nop __LINE__
  3226. }
  3227. { .mfi
  3228. nop __LINE__
  3229. FSUB_A f119 = f127, f119
  3230. nop __LINE__
  3231. }
  3232. ;;
  3233. #else
  3234. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  3235. ;;
  3236. LDFPD f74, f75 = [AOFFSET], 2 * SIZE
  3237. ;;
  3238. LDFPD f76, f77 = [AOFFSET], 2 * SIZE
  3239. ;;
  3240. LDFPD f78, f79 = [AOFFSET], 2 * SIZE
  3241. ;;
  3242. LDFPD f88, f89 = [AOFFSET], 2 * SIZE
  3243. ;;
  3244. LDFPD f90, f91 = [AOFFSET], 2 * SIZE
  3245. ;;
  3246. { .mfi
  3247. LDFPD f92, f93 = [AOFFSET], 2 * SIZE
  3248. FSUB f64 = f72, f64
  3249. nop __LINE__
  3250. }
  3251. { .mfi
  3252. nop __LINE__
  3253. FSUB f65 = f73, f65
  3254. nop __LINE__
  3255. }
  3256. ;;
  3257. { .mfi
  3258. LDFPD f94, f95 = [AOFFSET], 2 * SIZE
  3259. FSUB f66 = f74, f66
  3260. nop __LINE__
  3261. }
  3262. { .mfi
  3263. nop __LINE__
  3264. FSUB f67 = f75, f67
  3265. nop __LINE__
  3266. }
  3267. ;;
  3268. { .mfi
  3269. LDFPD f104, f105 = [AOFFSET], 2 * SIZE
  3270. FSUB f68 = f76, f68
  3271. nop __LINE__
  3272. }
  3273. { .mfi
  3274. nop __LINE__
  3275. FSUB f69 = f77, f69
  3276. nop __LINE__
  3277. }
  3278. ;;
  3279. { .mfi
  3280. LDFPD f106, f107 = [AOFFSET], 2 * SIZE
  3281. FSUB f70 = f78, f70
  3282. nop __LINE__
  3283. }
  3284. { .mfi
  3285. nop __LINE__
  3286. FSUB f71 = f79, f71
  3287. nop __LINE__
  3288. }
  3289. ;;
  3290. { .mfi
  3291. LDFPD f108, f109 = [AOFFSET], 2 * SIZE
  3292. FSUB f80 = f88, f80
  3293. nop __LINE__
  3294. }
  3295. { .mfi
  3296. nop __LINE__
  3297. FSUB f81 = f89, f81
  3298. nop __LINE__
  3299. }
  3300. ;;
  3301. { .mfi
  3302. LDFPD f110, f111 = [AOFFSET], 2 * SIZE
  3303. FSUB f82 = f90, f82
  3304. nop __LINE__
  3305. }
  3306. { .mfi
  3307. nop __LINE__
  3308. FSUB f83 = f91, f83
  3309. nop __LINE__
  3310. }
  3311. ;;
  3312. { .mfi
  3313. LDFPD f120, f121 = [AOFFSET], 2 * SIZE
  3314. FSUB f84 = f92, f84
  3315. nop __LINE__
  3316. }
  3317. { .mfi
  3318. nop __LINE__
  3319. FSUB f85 = f93, f85
  3320. nop __LINE__
  3321. }
  3322. ;;
  3323. { .mfi
  3324. LDFPD f122, f123 = [AOFFSET], 2 * SIZE
  3325. FSUB f86 = f94, f86
  3326. nop __LINE__
  3327. }
  3328. { .mfi
  3329. nop __LINE__
  3330. FSUB f87 = f95, f87
  3331. nop __LINE__
  3332. }
  3333. ;;
  3334. { .mfi
  3335. LDFPD f124, f125 = [AOFFSET], 2 * SIZE
  3336. FSUB f96 = f104, f96
  3337. nop __LINE__
  3338. }
  3339. { .mfi
  3340. nop __LINE__
  3341. FSUB f97 = f105, f97
  3342. nop __LINE__
  3343. }
  3344. ;;
  3345. { .mfi
  3346. LDFPD f126, f127 = [AOFFSET]
  3347. FSUB f98 = f106, f98
  3348. adds AOFFSET = -30 * SIZE, AOFFSET
  3349. }
  3350. { .mfi
  3351. nop __LINE__
  3352. FSUB f99 = f107, f99
  3353. #ifdef RT
  3354. adds BOFFSET = 30 * SIZE, BOFFSET
  3355. #else
  3356. nop __LINE__
  3357. #endif
  3358. }
  3359. ;;
  3360. { .mfi
  3361. LDFPD f72, f73 = [BOFFSET]
  3362. FSUB f100 = f108, f100
  3363. #ifdef RN
  3364. adds BOFFSET = 2 * SIZE, BOFFSET
  3365. #else
  3366. adds BOFFSET = - 2 * SIZE, BOFFSET
  3367. #endif
  3368. }
  3369. { .mfi
  3370. nop __LINE__
  3371. FSUB f101 = f109, f101
  3372. nop __LINE__
  3373. }
  3374. ;;
  3375. { .mfi
  3376. LDFPD f74, f75 = [BOFFSET]
  3377. FSUB f102 = f110, f102
  3378. #ifdef RN
  3379. adds BOFFSET = 2 * SIZE, BOFFSET
  3380. #else
  3381. adds BOFFSET = - 2 * SIZE, BOFFSET
  3382. #endif
  3383. }
  3384. { .mfi
  3385. nop __LINE__
  3386. FSUB f103 = f111, f103
  3387. nop __LINE__
  3388. }
  3389. ;;
  3390. { .mfi
  3391. nop __LINE__
  3392. FSUB f112 = f120, f112
  3393. nop __LINE__
  3394. }
  3395. { .mfi
  3396. nop __LINE__
  3397. FSUB f113 = f121, f113
  3398. nop __LINE__
  3399. }
  3400. ;;
  3401. { .mfi
  3402. nop __LINE__
  3403. FSUB f114 = f122, f114
  3404. nop __LINE__
  3405. }
  3406. { .mfi
  3407. nop __LINE__
  3408. FSUB f115 = f123, f115
  3409. nop __LINE__
  3410. }
  3411. ;;
  3412. { .mfi
  3413. nop __LINE__
  3414. FSUB f116 = f124, f116
  3415. nop __LINE__
  3416. }
  3417. { .mfi
  3418. nop __LINE__
  3419. FSUB f117 = f125, f117
  3420. nop __LINE__
  3421. }
  3422. ;;
  3423. { .mfi
  3424. nop __LINE__
  3425. FSUB f118 = f126, f118
  3426. nop __LINE__
  3427. }
  3428. { .mfi
  3429. nop __LINE__
  3430. FSUB f119 = f127, f119
  3431. nop __LINE__
  3432. }
  3433. ;;
  3434. #endif
  3435. #ifdef LN
  3436. { .mfi
  3437. LDFPD f76, f77 = [AOFFSET]
  3438. FMPY f32 = f72, f70
  3439. adds AOFFSET = - 2 * SIZE, AOFFSET
  3440. }
  3441. { .mfi
  3442. nop __LINE__
  3443. FMPY f36 = f72, f102
  3444. nop __LINE__
  3445. }
  3446. ;;
  3447. { .mfi
  3448. LDFPD f78, f79 = [AOFFSET]
  3449. FMPY f33 = f73, f70
  3450. adds AOFFSET = - 4 * SIZE, AOFFSET
  3451. }
  3452. { .mfi
  3453. nop __LINE__
  3454. FMPY f37 = f73, f102
  3455. nop __LINE__
  3456. }
  3457. ;;
  3458. { .mfi
  3459. LDFPD f88, f89 = [AOFFSET]
  3460. FMPY f34 = f72, f86
  3461. adds AOFFSET = - 2 * SIZE, AOFFSET
  3462. }
  3463. { .mfi
  3464. nop __LINE__
  3465. FMPY f38 = f72, f118
  3466. nop __LINE__
  3467. }
  3468. ;;
  3469. { .mfi
  3470. LDFPD f90, f91 = [AOFFSET]
  3471. FMPY f35 = f73, f86
  3472. adds AOFFSET = - 2 * SIZE, AOFFSET
  3473. }
  3474. { .mfi
  3475. nop __LINE__
  3476. FMPY f39 = f73, f118
  3477. nop __LINE__
  3478. }
  3479. ;;
  3480. { .mfi
  3481. LDFPD f92, f93 = [AOFFSET]
  3482. FMA_C f70 = f73, f71, f32
  3483. adds AOFFSET = - 6 * SIZE, AOFFSET
  3484. }
  3485. { .mfi
  3486. nop __LINE__
  3487. FMA_C f102 = f73, f103, f36
  3488. adds C1 = -2 * SIZE, C1
  3489. }
  3490. ;;
  3491. { .mfi
  3492. LDFPD f104, f105 = [AOFFSET]
  3493. FMA_D f71 = f72, f71, f33
  3494. adds AOFFSET = - 2 * SIZE, AOFFSET
  3495. }
  3496. { .mfi
  3497. nop __LINE__
  3498. FMA_D f103 = f72, f103, f37
  3499. adds C2 = -2 * SIZE, C2
  3500. }
  3501. ;;
  3502. { .mfi
  3503. LDFPD f106, f107 = [AOFFSET]
  3504. FMA_C f86 = f73, f87, f34
  3505. adds AOFFSET = - 8 * SIZE, AOFFSET
  3506. }
  3507. { .mfi
  3508. nop __LINE__
  3509. FMA_C f118 = f73, f119, f38
  3510. adds C3 = -2 * SIZE, C3
  3511. }
  3512. ;;
  3513. { .mfi
  3514. LDFPD f120, f121 = [AOFFSET]
  3515. FMA_D f87 = f72, f87, f35
  3516. adds BOFFSET2 = 28 * SIZE, BOFFSET
  3517. }
  3518. { .mfi
  3519. nop __LINE__
  3520. FMA_D f119 = f72, f119, f39
  3521. adds BOFFSET = 24 * SIZE, BOFFSET
  3522. }
  3523. ;;
  3524. { .mfi
  3525. STFD [BOFFSET] = f70, SIZE
  3526. FNMA f68 = f74, f70, f68
  3527. adds C4 = -2 * SIZE, C4
  3528. }
  3529. { .mfi
  3530. STFD [BOFFSET2] = f102, SIZE
  3531. FNMA f100 = f74, f102, f100
  3532. nop __LINE__
  3533. }
  3534. ;;
  3535. { .mfi
  3536. STFD [BOFFSET] = f71, SIZE
  3537. FMA_A f69 = f75, f70, f69
  3538. nop __LINE__
  3539. }
  3540. { .mfi
  3541. STFD [BOFFSET2] = f103, SIZE
  3542. FMA_A f101 = f75, f102, f101
  3543. nop __LINE__
  3544. }
  3545. ;;
  3546. { .mfi
  3547. STFD [BOFFSET] = f86, SIZE
  3548. FNMA f84 = f74, f86, f84
  3549. nop __LINE__
  3550. }
  3551. { .mfi
  3552. STFD [BOFFSET2] = f118, SIZE
  3553. FNMA f116 = f74, f118, f116
  3554. nop __LINE__
  3555. }
  3556. ;;
  3557. { .mfi
  3558. STFD [BOFFSET] = f87, -11 * SIZE
  3559. FMA_A f85 = f75, f86, f85
  3560. nop __LINE__
  3561. }
  3562. { .mfi
  3563. STFD [BOFFSET2] = f119, -11 * SIZE
  3564. FMA_A f117 = f75, f118, f117
  3565. nop __LINE__
  3566. }
  3567. ;;
  3568. { .mfi
  3569. STFD [C1 ] = f70, SIZE
  3570. FMA_B f68 = f75, f71, f68
  3571. nop __LINE__
  3572. }
  3573. { .mfi
  3574. STFD [C3 ] = f102, SIZE
  3575. FMA_B f100 = f75, f103, f100
  3576. nop __LINE__
  3577. }
  3578. ;;
  3579. { .mfi
  3580. STFD [C1 ] = f71, -3 * SIZE
  3581. FNMA f69 = f74, f71, f69
  3582. nop __LINE__
  3583. }
  3584. { .mfi
  3585. STFD [C3 ] = f103, -3 * SIZE
  3586. FNMA f101 = f74, f103, f101
  3587. nop __LINE__
  3588. }
  3589. ;;
  3590. { .mfi
  3591. STFD [C2 ] = f86, SIZE
  3592. FMA_B f84 = f75, f87, f84
  3593. nop __LINE__
  3594. }
  3595. { .mfi
  3596. STFD [C4 ] = f118, SIZE
  3597. FMA_B f116 = f75, f119, f116
  3598. nop __LINE__
  3599. }
  3600. ;;
  3601. { .mfi
  3602. STFD [C2 ] = f87, -3 * SIZE
  3603. FNMA f85 = f74, f87, f85
  3604. nop __LINE__
  3605. }
  3606. { .mfi
  3607. STFD [C4 ] = f119, -3 * SIZE
  3608. FNMA f117 = f74, f119, f117
  3609. nop __LINE__
  3610. }
  3611. ;;
  3612. { .mfi
  3613. nop __LINE__
  3614. FNMA f66 = f76, f70, f66
  3615. nop __LINE__
  3616. }
  3617. { .mfi
  3618. nop __LINE__
  3619. FNMA f98 = f76, f102, f98
  3620. nop __LINE__
  3621. }
  3622. ;;
  3623. { .mfi
  3624. nop __LINE__
  3625. FMA_A f67 = f77, f70, f67
  3626. nop __LINE__
  3627. }
  3628. { .mfi
  3629. nop __LINE__
  3630. FMA_A f99 = f77, f102, f99
  3631. nop __LINE__
  3632. }
  3633. ;;
  3634. { .mfi
  3635. nop __LINE__
  3636. FNMA f82 = f76, f86, f82
  3637. nop __LINE__
  3638. }
  3639. { .mfi
  3640. nop __LINE__
  3641. FNMA f114 = f76, f118, f114
  3642. nop __LINE__
  3643. }
  3644. ;;
  3645. { .mfi
  3646. nop __LINE__
  3647. FMA_A f83 = f77, f86, f83
  3648. nop __LINE__
  3649. }
  3650. { .mfi
  3651. nop __LINE__
  3652. FMA_A f115 = f77, f118, f115
  3653. nop __LINE__
  3654. }
  3655. ;;
  3656. { .mfi
  3657. nop __LINE__
  3658. FMA_B f66 = f77, f71, f66
  3659. nop __LINE__
  3660. }
  3661. { .mfi
  3662. nop __LINE__
  3663. FMA_B f98 = f77, f103, f98
  3664. nop __LINE__
  3665. }
  3666. ;;
  3667. { .mfi
  3668. nop __LINE__
  3669. FNMA f67 = f76, f71, f67
  3670. nop __LINE__
  3671. }
  3672. { .mfi
  3673. nop __LINE__
  3674. FNMA f99 = f76, f103, f99
  3675. nop __LINE__
  3676. }
  3677. ;;
  3678. { .mfi
  3679. nop __LINE__
  3680. FMA_B f82 = f77, f87, f82
  3681. nop __LINE__
  3682. }
  3683. { .mfi
  3684. nop __LINE__
  3685. FMA_B f114 = f77, f119, f114
  3686. nop __LINE__
  3687. }
  3688. ;;
  3689. { .mfi
  3690. nop __LINE__
  3691. FNMA f83 = f76, f87, f83
  3692. nop __LINE__
  3693. }
  3694. { .mfi
  3695. nop __LINE__
  3696. FNMA f115 = f76, f119, f115
  3697. nop __LINE__
  3698. }
  3699. ;;
  3700. { .mfi
  3701. nop __LINE__
  3702. FNMA f64 = f78, f70, f64
  3703. nop __LINE__
  3704. }
  3705. { .mfi
  3706. nop __LINE__
  3707. FNMA f96 = f78, f102, f96
  3708. nop __LINE__
  3709. }
  3710. ;;
  3711. { .mfi
  3712. nop __LINE__
  3713. FMA_A f65 = f79, f70, f65
  3714. nop __LINE__
  3715. }
  3716. { .mfi
  3717. nop __LINE__
  3718. FMA_A f97 = f79, f102, f97
  3719. nop __LINE__
  3720. }
  3721. ;;
  3722. { .mfi
  3723. nop __LINE__
  3724. FNMA f80 = f78, f86, f80
  3725. nop __LINE__
  3726. }
  3727. { .mfi
  3728. nop __LINE__
  3729. FNMA f112 = f78, f118, f112
  3730. nop __LINE__
  3731. }
  3732. ;;
  3733. { .mfi
  3734. nop __LINE__
  3735. FMA_A f81 = f79, f86, f81
  3736. nop __LINE__
  3737. }
  3738. { .mfi
  3739. nop __LINE__
  3740. FMA_A f113 = f79, f118, f113
  3741. nop __LINE__
  3742. }
  3743. ;;
  3744. { .mfi
  3745. nop __LINE__
  3746. FMA_B f64 = f79, f71, f64
  3747. nop __LINE__
  3748. }
  3749. { .mfi
  3750. nop __LINE__
  3751. FMA_B f96 = f79, f103, f96
  3752. nop __LINE__
  3753. }
  3754. ;;
  3755. { .mfi
  3756. nop __LINE__
  3757. FNMA f65 = f78, f71, f65
  3758. nop __LINE__
  3759. }
  3760. { .mfi
  3761. nop __LINE__
  3762. FNMA f97 = f78, f103, f97
  3763. nop __LINE__
  3764. }
  3765. ;;
  3766. { .mfi
  3767. nop __LINE__
  3768. FMA_B f80 = f79, f87, f80
  3769. nop __LINE__
  3770. }
  3771. { .mfi
  3772. nop __LINE__
  3773. FMA_B f112 = f79, f119, f112
  3774. nop __LINE__
  3775. }
  3776. ;;
  3777. { .mfi
  3778. nop __LINE__
  3779. FNMA f81 = f78, f87, f81
  3780. nop __LINE__
  3781. }
  3782. { .mfi
  3783. nop __LINE__
  3784. FNMA f113 = f78, f119, f113
  3785. nop __LINE__
  3786. }
  3787. ;;
  3788. { .mfi
  3789. nop __LINE__
  3790. FMPY f32 = f88, f68
  3791. nop __LINE__
  3792. }
  3793. { .mfi
  3794. nop __LINE__
  3795. FMPY f36 = f88, f100
  3796. nop __LINE__
  3797. }
  3798. ;;
  3799. { .mfi
  3800. nop __LINE__
  3801. FMPY f33 = f89, f68
  3802. nop __LINE__
  3803. }
  3804. { .mfi
  3805. nop __LINE__
  3806. FMPY f37 = f89, f100
  3807. nop __LINE__
  3808. }
  3809. ;;
  3810. { .mfi
  3811. nop __LINE__
  3812. FMPY f34 = f88, f84
  3813. nop __LINE__
  3814. }
  3815. { .mfi
  3816. nop __LINE__
  3817. FMPY f38 = f88, f116
  3818. nop __LINE__
  3819. }
  3820. ;;
  3821. { .mfi
  3822. nop __LINE__
  3823. FMPY f35 = f89, f84
  3824. nop __LINE__
  3825. }
  3826. { .mfi
  3827. nop __LINE__
  3828. FMPY f39 = f89, f116
  3829. nop __LINE__
  3830. }
  3831. ;;
  3832. { .mfi
  3833. nop __LINE__
  3834. FMA_C f68 = f89, f69, f32
  3835. nop __LINE__
  3836. }
  3837. { .mfi
  3838. nop __LINE__
  3839. FMA_C f100 = f89, f101, f36
  3840. nop __LINE__
  3841. }
  3842. ;;
  3843. { .mfi
  3844. nop __LINE__
  3845. FMA_D f69 = f88, f69, f33
  3846. nop __LINE__
  3847. }
  3848. { .mfi
  3849. nop __LINE__
  3850. FMA_D f101 = f88, f101, f37
  3851. nop __LINE__
  3852. }
  3853. ;;
  3854. { .mfi
  3855. nop __LINE__
  3856. FMA_C f84 = f89, f85, f34
  3857. nop __LINE__
  3858. }
  3859. { .mfi
  3860. nop __LINE__
  3861. FMA_C f116 = f89, f117, f38
  3862. nop __LINE__
  3863. }
  3864. ;;
  3865. { .mfi
  3866. nop __LINE__
  3867. FMA_D f85 = f88, f85, f35
  3868. nop __LINE__
  3869. }
  3870. { .mfi
  3871. nop __LINE__
  3872. FMA_D f117 = f88, f117, f39
  3873. nop __LINE__
  3874. }
  3875. ;;
  3876. { .mfi
  3877. STFD [BOFFSET] = f68, SIZE
  3878. FNMA f66 = f90, f68, f66
  3879. nop __LINE__
  3880. }
  3881. { .mfi
  3882. STFD [BOFFSET2] = f100, SIZE
  3883. FNMA f98 = f90, f100, f98
  3884. nop __LINE__
  3885. }
  3886. ;;
  3887. { .mfi
  3888. STFD [BOFFSET] = f69, SIZE
  3889. FMA_A f67 = f91, f68, f67
  3890. nop __LINE__
  3891. }
  3892. { .mfi
  3893. STFD [BOFFSET2] = f101, SIZE
  3894. FMA_A f99 = f91, f100, f99
  3895. nop __LINE__
  3896. }
  3897. ;;
  3898. { .mfi
  3899. STFD [BOFFSET] = f84, SIZE
  3900. FNMA f82 = f90, f84, f82
  3901. nop __LINE__
  3902. }
  3903. { .mfi
  3904. STFD [BOFFSET2] = f116, SIZE
  3905. FNMA f114 = f90, f116, f114
  3906. nop __LINE__
  3907. }
  3908. ;;
  3909. { .mfi
  3910. STFD [BOFFSET] = f85, -11 * SIZE
  3911. FMA_A f83 = f91, f84, f83
  3912. nop __LINE__
  3913. }
  3914. { .mfi
  3915. STFD [BOFFSET2] = f117, -11 * SIZE
  3916. FMA_A f115 = f91, f116, f115
  3917. nop __LINE__
  3918. }
  3919. ;;
  3920. { .mfi
  3921. STFD [C1 ] = f68, SIZE
  3922. FMA_B f66 = f91, f69, f66
  3923. nop __LINE__
  3924. }
  3925. { .mfi
  3926. STFD [C3 ] = f100, SIZE
  3927. FMA_B f98 = f91, f101, f98
  3928. nop __LINE__
  3929. }
  3930. ;;
  3931. { .mfi
  3932. STFD [C1 ] = f69, -3 * SIZE
  3933. FNMA f67 = f90, f69, f67
  3934. nop __LINE__
  3935. }
  3936. { .mfi
  3937. STFD [C3 ] = f101, -3 * SIZE
  3938. FNMA f99 = f90, f101, f99
  3939. nop __LINE__
  3940. }
  3941. ;;
  3942. { .mfi
  3943. STFD [C2 ] = f84, SIZE
  3944. FMA_B f82 = f91, f85, f82
  3945. nop __LINE__
  3946. }
  3947. { .mfi
  3948. STFD [C4 ] = f116, SIZE
  3949. FMA_B f114 = f91, f117, f114
  3950. nop __LINE__
  3951. }
  3952. ;;
  3953. { .mfi
  3954. STFD [C2 ] = f85, -3 * SIZE
  3955. FNMA f83 = f90, f85, f83
  3956. nop __LINE__
  3957. }
  3958. { .mfi
  3959. STFD [C4 ] = f117, -3 * SIZE
  3960. FNMA f115 = f90, f117, f115
  3961. nop __LINE__
  3962. }
  3963. ;;
  3964. { .mfi
  3965. nop __LINE__
  3966. FNMA f64 = f92, f68, f64
  3967. nop __LINE__
  3968. }
  3969. { .mfi
  3970. nop __LINE__
  3971. FNMA f96 = f92, f100, f96
  3972. nop __LINE__
  3973. }
  3974. ;;
  3975. { .mfi
  3976. nop __LINE__
  3977. FMA_A f65 = f93, f68, f65
  3978. nop __LINE__
  3979. }
  3980. { .mfi
  3981. nop __LINE__
  3982. FMA_A f97 = f93, f100, f97
  3983. nop __LINE__
  3984. }
  3985. ;;
  3986. { .mfi
  3987. nop __LINE__
  3988. FNMA f80 = f92, f84, f80
  3989. nop __LINE__
  3990. }
  3991. { .mfi
  3992. nop __LINE__
  3993. FNMA f112 = f92, f116, f112
  3994. nop __LINE__
  3995. }
  3996. ;;
  3997. { .mfi
  3998. nop __LINE__
  3999. FMA_A f81 = f93, f84, f81
  4000. nop __LINE__
  4001. }
  4002. { .mfi
  4003. nop __LINE__
  4004. FMA_A f113 = f93, f116, f113
  4005. nop __LINE__
  4006. }
  4007. ;;
  4008. { .mfi
  4009. nop __LINE__
  4010. FMA_B f64 = f93, f69, f64
  4011. nop __LINE__
  4012. }
  4013. { .mfi
  4014. nop __LINE__
  4015. FMA_B f96 = f93, f101, f96
  4016. nop __LINE__
  4017. }
  4018. ;;
  4019. { .mfi
  4020. nop __LINE__
  4021. FNMA f65 = f92, f69, f65
  4022. nop __LINE__
  4023. }
  4024. { .mfi
  4025. nop __LINE__
  4026. FNMA f97 = f92, f101, f97
  4027. nop __LINE__
  4028. }
  4029. ;;
  4030. { .mfi
  4031. nop __LINE__
  4032. FMA_B f80 = f93, f85, f80
  4033. nop __LINE__
  4034. }
  4035. { .mfi
  4036. nop __LINE__
  4037. FMA_B f112 = f93, f117, f112
  4038. nop __LINE__
  4039. }
  4040. ;;
  4041. { .mfi
  4042. nop __LINE__
  4043. FNMA f81 = f92, f85, f81
  4044. nop __LINE__
  4045. }
  4046. { .mfi
  4047. nop __LINE__
  4048. FNMA f113 = f92, f117, f113
  4049. nop __LINE__
  4050. }
  4051. ;;
  4052. { .mfi
  4053. nop __LINE__
  4054. FMPY f32 = f104, f66
  4055. nop __LINE__
  4056. }
  4057. { .mfi
  4058. nop __LINE__
  4059. FMPY f36 = f104, f98
  4060. nop __LINE__
  4061. }
  4062. ;;
  4063. { .mfi
  4064. nop __LINE__
  4065. FMPY f33 = f105, f66
  4066. nop __LINE__
  4067. }
  4068. { .mfi
  4069. nop __LINE__
  4070. FMPY f37 = f105, f98
  4071. nop __LINE__
  4072. }
  4073. ;;
  4074. { .mfi
  4075. nop __LINE__
  4076. FMPY f34 = f104, f82
  4077. nop __LINE__
  4078. }
  4079. { .mfi
  4080. nop __LINE__
  4081. FMPY f38 = f104, f114
  4082. nop __LINE__
  4083. }
  4084. ;;
  4085. { .mfi
  4086. nop __LINE__
  4087. FMPY f35 = f105, f82
  4088. nop __LINE__
  4089. }
  4090. { .mfi
  4091. nop __LINE__
  4092. FMPY f39 = f105, f114
  4093. nop __LINE__
  4094. }
  4095. ;;
  4096. { .mfi
  4097. nop __LINE__
  4098. FMA_C f66 = f105, f67, f32
  4099. nop __LINE__
  4100. }
  4101. { .mfi
  4102. nop __LINE__
  4103. FMA_C f98 = f105, f99, f36
  4104. nop __LINE__
  4105. }
  4106. ;;
  4107. { .mfi
  4108. nop __LINE__
  4109. FMA_D f67 = f104, f67, f33
  4110. nop __LINE__
  4111. }
  4112. { .mfi
  4113. nop __LINE__
  4114. FMA_D f99 = f104, f99, f37
  4115. nop __LINE__
  4116. }
  4117. ;;
  4118. { .mfi
  4119. nop __LINE__
  4120. FMA_C f82 = f105, f83, f34
  4121. nop __LINE__
  4122. }
  4123. { .mfi
  4124. nop __LINE__
  4125. FMA_C f114 = f105, f115, f38
  4126. nop __LINE__
  4127. }
  4128. ;;
  4129. { .mfi
  4130. nop __LINE__
  4131. FMA_D f83 = f104, f83, f35
  4132. nop __LINE__
  4133. }
  4134. { .mfi
  4135. nop __LINE__
  4136. FMA_D f115 = f104, f115, f39
  4137. nop __LINE__
  4138. }
  4139. ;;
  4140. { .mfi
  4141. STFD [BOFFSET] = f66, SIZE
  4142. FNMA f64 = f106, f66, f64
  4143. nop __LINE__
  4144. }
  4145. { .mfi
  4146. STFD [BOFFSET2] = f98, SIZE
  4147. FNMA f96 = f106, f98, f96
  4148. nop __LINE__
  4149. }
  4150. ;;
  4151. { .mfi
  4152. STFD [BOFFSET] = f67, SIZE
  4153. FMA_A f65 = f107, f66, f65
  4154. nop __LINE__
  4155. }
  4156. { .mfi
  4157. STFD [BOFFSET2] = f99, SIZE
  4158. FMA_A f97 = f107, f98, f97
  4159. nop __LINE__
  4160. }
  4161. ;;
  4162. { .mfi
  4163. STFD [BOFFSET] = f82, SIZE
  4164. FNMA f80 = f106, f82, f80
  4165. nop __LINE__
  4166. }
  4167. { .mfi
  4168. STFD [BOFFSET2] = f114, SIZE
  4169. FNMA f112 = f106, f114, f112
  4170. nop __LINE__
  4171. }
  4172. ;;
  4173. { .mfi
  4174. STFD [BOFFSET] = f83, -11 * SIZE
  4175. FMA_A f81 = f107, f82, f81
  4176. nop __LINE__
  4177. }
  4178. { .mfi
  4179. STFD [BOFFSET2] = f115, -11 * SIZE
  4180. FMA_A f113 = f107, f114, f113
  4181. nop __LINE__
  4182. }
  4183. ;;
  4184. { .mfi
  4185. STFD [C1 ] = f66, SIZE
  4186. FMA_B f64 = f107, f67, f64
  4187. nop __LINE__
  4188. }
  4189. { .mfi
  4190. STFD [C3 ] = f98, SIZE
  4191. FMA_B f96 = f107, f99, f96
  4192. nop __LINE__
  4193. }
  4194. ;;
  4195. { .mfi
  4196. STFD [C1 ] = f67, -3 * SIZE
  4197. FNMA f65 = f106, f67, f65
  4198. nop __LINE__
  4199. }
  4200. { .mfi
  4201. STFD [C3 ] = f99, -3 * SIZE
  4202. FNMA f97 = f106, f99, f97
  4203. nop __LINE__
  4204. }
  4205. ;;
  4206. { .mfi
  4207. STFD [C2 ] = f82, SIZE
  4208. FMA_B f80 = f107, f83, f80
  4209. nop __LINE__
  4210. }
  4211. { .mfi
  4212. STFD [C4 ] = f114, SIZE
  4213. FMA_B f112 = f107, f115, f112
  4214. nop __LINE__
  4215. }
  4216. ;;
  4217. { .mfi
  4218. STFD [C2 ] = f83, -3 * SIZE
  4219. FNMA f81 = f106, f83, f81
  4220. nop __LINE__
  4221. }
  4222. { .mfi
  4223. STFD [C4 ] = f115, -3 * SIZE
  4224. FNMA f113 = f106, f115, f113
  4225. nop __LINE__
  4226. }
  4227. ;;
  4228. { .mfi
  4229. nop __LINE__
  4230. FMPY f32 = f120, f64
  4231. nop __LINE__
  4232. }
  4233. { .mfi
  4234. nop __LINE__
  4235. FMPY f36 = f120, f96
  4236. nop __LINE__
  4237. }
  4238. ;;
  4239. { .mfi
  4240. nop __LINE__
  4241. FMPY f33 = f121, f64
  4242. nop __LINE__
  4243. }
  4244. { .mfi
  4245. nop __LINE__
  4246. FMPY f37 = f121, f96
  4247. nop __LINE__
  4248. }
  4249. ;;
  4250. { .mfi
  4251. nop __LINE__
  4252. FMPY f34 = f120, f80
  4253. nop __LINE__
  4254. }
  4255. { .mfi
  4256. nop __LINE__
  4257. FMPY f38 = f120, f112
  4258. nop __LINE__
  4259. }
  4260. ;;
  4261. { .mfi
  4262. nop __LINE__
  4263. FMPY f35 = f121, f80
  4264. nop __LINE__
  4265. }
  4266. { .mfi
  4267. nop __LINE__
  4268. FMPY f39 = f121, f112
  4269. nop __LINE__
  4270. }
  4271. ;;
  4272. { .mfi
  4273. nop __LINE__
  4274. FMA_C f64 = f121, f65, f32
  4275. nop __LINE__
  4276. }
  4277. { .mfi
  4278. nop __LINE__
  4279. FMA_C f96 = f121, f97, f36
  4280. nop __LINE__
  4281. }
  4282. ;;
  4283. { .mfi
  4284. nop __LINE__
  4285. FMA_D f65 = f120, f65, f33
  4286. nop __LINE__
  4287. }
  4288. { .mfi
  4289. nop __LINE__
  4290. FMA_D f97 = f120, f97, f37
  4291. nop __LINE__
  4292. }
  4293. ;;
  4294. { .mfi
  4295. nop __LINE__
  4296. FMA_C f80 = f121, f81, f34
  4297. nop __LINE__
  4298. }
  4299. { .mfi
  4300. nop __LINE__
  4301. FMA_C f112 = f121, f113, f38
  4302. nop __LINE__
  4303. }
  4304. ;;
  4305. { .mfi
  4306. nop __LINE__
  4307. FMA_D f81 = f120, f81, f35
  4308. nop __LINE__
  4309. }
  4310. { .mfi
  4311. nop __LINE__
  4312. FMA_D f113 = f120, f113, f39
  4313. nop __LINE__
  4314. }
  4315. ;;
  4316. { .mmi
  4317. STFD [BOFFSET] = f64, SIZE
  4318. STFD [BOFFSET2] = f96, SIZE
  4319. nop __LINE__
  4320. }
  4321. ;;
  4322. { .mmi
  4323. STFD [BOFFSET] = f65, SIZE
  4324. STFD [BOFFSET2] = f97, SIZE
  4325. nop __LINE__
  4326. }
  4327. ;;
  4328. { .mmi
  4329. STFD [BOFFSET] = f80, SIZE
  4330. STFD [BOFFSET2] = f112, SIZE
  4331. nop __LINE__
  4332. }
  4333. ;;
  4334. { .mmi
  4335. STFD [BOFFSET] = f81, -3 * SIZE
  4336. STFD [BOFFSET2] = f113, -3 * SIZE
  4337. nop __LINE__
  4338. }
  4339. ;;
  4340. { .mfi
  4341. STFD [C1 ] = f64, SIZE
  4342. mov f64 = f0
  4343. nop __LINE__
  4344. }
  4345. { .mfi
  4346. STFD [C3 ] = f96, SIZE
  4347. mov f96 = f0
  4348. nop __LINE__
  4349. }
  4350. ;;
  4351. { .mfi
  4352. STFD [C1 ] = f65, -1 * SIZE
  4353. mov f65 = f0
  4354. adds KK = -4, KK
  4355. }
  4356. { .mfi
  4357. STFD [C3 ] = f97, -1 * SIZE
  4358. mov f97 = f0
  4359. nop __LINE__
  4360. }
  4361. ;;
  4362. { .mfi
  4363. STFD [C2 ] = f80, SIZE
  4364. mov f80 = f0
  4365. cmp.ne p6, p0 = 1, I
  4366. }
  4367. { .mfi
  4368. STFD [C4 ] = f112, SIZE
  4369. mov f112 = f0
  4370. sub L = K, KK
  4371. }
  4372. ;;
  4373. { .mfi
  4374. STFD [C2 ] = f81, -1 * SIZE
  4375. mov f81 = f0
  4376. adds I = -1, I
  4377. }
  4378. { .mfb
  4379. STFD [C4 ] = f113, -1 * SIZE
  4380. mov f113 = f0
  4381. (p6) br.cond.dptk .L011
  4382. }
  4383. ;;
  4384. #endif
  4385. #ifdef LT
  4386. { .mfi
  4387. LDFPD f76, f77 = [AOFFSET], 2 * SIZE
  4388. FMPY f32 = f72, f64
  4389. nop __LINE__
  4390. }
  4391. { .mfi
  4392. nop __LINE__
  4393. FMPY f36 = f72, f96
  4394. nop __LINE__
  4395. }
  4396. ;;
  4397. { .mfi
  4398. LDFPD f78, f79 = [AOFFSET]
  4399. FMPY f33 = f73, f64
  4400. adds AOFFSET = 4 * SIZE, AOFFSET
  4401. }
  4402. { .mfi
  4403. nop __LINE__
  4404. FMPY f37 = f73, f96
  4405. nop __LINE__
  4406. }
  4407. ;;
  4408. { .mfi
  4409. LDFPD f90, f91 = [AOFFSET], 2 * SIZE
  4410. FMPY f34 = f72, f80
  4411. nop __LINE__
  4412. }
  4413. { .mfi
  4414. nop __LINE__
  4415. FMPY f38 = f72, f112
  4416. nop __LINE__
  4417. }
  4418. ;;
  4419. { .mfi
  4420. LDFPD f92, f93 = [AOFFSET], 2 * SIZE
  4421. FMPY f35 = f73, f80
  4422. nop __LINE__
  4423. }
  4424. { .mfi
  4425. nop __LINE__
  4426. FMPY f39 = f73, f112
  4427. nop __LINE__
  4428. }
  4429. ;;
  4430. { .mfi
  4431. LDFPD f94, f95 = [AOFFSET]
  4432. FMA_C f64 = f73, f65, f32
  4433. adds AOFFSET = 6 * SIZE, AOFFSET
  4434. }
  4435. { .mfi
  4436. nop __LINE__
  4437. FMA_C f96 = f73, f97, f36
  4438. nop __LINE__
  4439. }
  4440. ;;
  4441. { .mfi
  4442. LDFPD f108, f109 = [AOFFSET], 2 * SIZE
  4443. FMA_D f65 = f72, f65, f33
  4444. nop __LINE__
  4445. }
  4446. { .mfi
  4447. nop __LINE__
  4448. FMA_D f97 = f72, f97, f37
  4449. nop __LINE__
  4450. }
  4451. ;;
  4452. { .mfi
  4453. LDFPD f110, f111 = [AOFFSET]
  4454. FMA_C f80 = f73, f81, f34
  4455. adds AOFFSET = 8 * SIZE, AOFFSET
  4456. }
  4457. { .mfi
  4458. nop __LINE__
  4459. FMA_C f112 = f73, f113, f38
  4460. nop __LINE__
  4461. }
  4462. ;;
  4463. { .mfi
  4464. LDFPD f126, f127 = [AOFFSET]
  4465. FMA_D f81 = f72, f81, f35
  4466. adds AOFFSET = - 30 * SIZE, AOFFSET
  4467. }
  4468. { .mfi
  4469. nop __LINE__
  4470. FMA_D f113 = f72, f113, f39
  4471. adds BOFFSET2 = 4 * SIZE, BOFFSET
  4472. }
  4473. ;;
  4474. { .mfi
  4475. STFD [BOFFSET] = f64, SIZE
  4476. FNMA f66 = f74, f64, f66
  4477. nop __LINE__
  4478. }
  4479. { .mfi
  4480. STFD [BOFFSET2] = f96, SIZE
  4481. FNMA f98 = f74, f96, f98
  4482. nop __LINE__
  4483. }
  4484. ;;
  4485. { .mfi
  4486. STFD [BOFFSET] = f65, SIZE
  4487. FMA_A f67 = f75, f64, f67
  4488. nop __LINE__
  4489. }
  4490. { .mfi
  4491. STFD [BOFFSET2] = f97, SIZE
  4492. FMA_A f99 = f75, f96, f99
  4493. nop __LINE__
  4494. }
  4495. ;;
  4496. { .mfi
  4497. STFD [BOFFSET] = f80, SIZE
  4498. FNMA f82 = f74, f80, f82
  4499. nop __LINE__
  4500. }
  4501. { .mfi
  4502. STFD [BOFFSET2] = f112, SIZE
  4503. FNMA f114 = f74, f112, f114
  4504. nop __LINE__
  4505. }
  4506. ;;
  4507. { .mfi
  4508. STFD [BOFFSET] = f81, 5 * SIZE
  4509. FMA_A f83 = f75, f80, f83
  4510. nop __LINE__
  4511. }
  4512. { .mfi
  4513. STFD [BOFFSET2] = f113, 5 * SIZE
  4514. FMA_A f115 = f75, f112, f115
  4515. nop __LINE__
  4516. }
  4517. ;;
  4518. { .mfi
  4519. STFD [C1 ] = f64, SIZE
  4520. FMA_B f66 = f75, f65, f66
  4521. nop __LINE__
  4522. }
  4523. { .mfi
  4524. STFD [C3 ] = f96, SIZE
  4525. FMA_B f98 = f75, f97, f98
  4526. nop __LINE__
  4527. }
  4528. ;;
  4529. { .mfi
  4530. STFD [C1 ] = f65, SIZE
  4531. FNMA f67 = f74, f65, f67
  4532. nop __LINE__
  4533. }
  4534. { .mfi
  4535. STFD [C3 ] = f97, SIZE
  4536. FNMA f99 = f74, f97, f99
  4537. nop __LINE__
  4538. }
  4539. ;;
  4540. { .mfi
  4541. STFD [C2 ] = f80, SIZE
  4542. FMA_B f82 = f75, f81, f82
  4543. nop __LINE__
  4544. }
  4545. { .mfi
  4546. STFD [C4 ] = f112, SIZE
  4547. FMA_B f114 = f75, f113, f114
  4548. nop __LINE__
  4549. }
  4550. ;;
  4551. { .mfi
  4552. STFD [C2 ] = f81, SIZE
  4553. FNMA f83 = f74, f81, f83
  4554. nop __LINE__
  4555. }
  4556. { .mfi
  4557. STFD [C4 ] = f113, SIZE
  4558. FNMA f115 = f74, f113, f115
  4559. nop __LINE__
  4560. }
  4561. ;;
  4562. { .mfi
  4563. nop __LINE__
  4564. FNMA f68 = f76, f64, f68
  4565. nop __LINE__
  4566. }
  4567. { .mfi
  4568. nop __LINE__
  4569. FNMA f100 = f76, f96, f100
  4570. nop __LINE__
  4571. }
  4572. ;;
  4573. { .mfi
  4574. nop __LINE__
  4575. FMA_A f69 = f77, f64, f69
  4576. nop __LINE__
  4577. }
  4578. { .mfi
  4579. nop __LINE__
  4580. FMA_A f101 = f77, f96, f101
  4581. nop __LINE__
  4582. }
  4583. ;;
  4584. { .mfi
  4585. nop __LINE__
  4586. FNMA f84 = f76, f80, f84
  4587. nop __LINE__
  4588. }
  4589. { .mfi
  4590. nop __LINE__
  4591. FNMA f116 = f76, f112, f116
  4592. nop __LINE__
  4593. }
  4594. ;;
  4595. { .mfi
  4596. nop __LINE__
  4597. FMA_A f85 = f77, f80, f85
  4598. nop __LINE__
  4599. }
  4600. { .mfi
  4601. nop __LINE__
  4602. FMA_A f117 = f77, f112, f117
  4603. nop __LINE__
  4604. }
  4605. ;;
  4606. { .mfi
  4607. nop __LINE__
  4608. FMA_B f68 = f77, f65, f68
  4609. nop __LINE__
  4610. }
  4611. { .mfi
  4612. nop __LINE__
  4613. FMA_B f100 = f77, f97, f100
  4614. nop __LINE__
  4615. }
  4616. ;;
  4617. { .mfi
  4618. nop __LINE__
  4619. FNMA f69 = f76, f65, f69
  4620. nop __LINE__
  4621. }
  4622. { .mfi
  4623. nop __LINE__
  4624. FNMA f101 = f76, f97, f101
  4625. nop __LINE__
  4626. }
  4627. ;;
  4628. { .mfi
  4629. nop __LINE__
  4630. FMA_B f84 = f77, f81, f84
  4631. nop __LINE__
  4632. }
  4633. { .mfi
  4634. nop __LINE__
  4635. FMA_B f116 = f77, f113, f116
  4636. nop __LINE__
  4637. }
  4638. ;;
  4639. { .mfi
  4640. nop __LINE__
  4641. FNMA f85 = f76, f81, f85
  4642. nop __LINE__
  4643. }
  4644. { .mfi
  4645. nop __LINE__
  4646. FNMA f117 = f76, f113, f117
  4647. nop __LINE__
  4648. }
  4649. ;;
  4650. { .mfi
  4651. nop __LINE__
  4652. FNMA f70 = f78, f64, f70
  4653. nop __LINE__
  4654. }
  4655. { .mfi
  4656. nop __LINE__
  4657. FNMA f102 = f78, f96, f102
  4658. nop __LINE__
  4659. }
  4660. ;;
  4661. { .mfi
  4662. nop __LINE__
  4663. FMA_A f71 = f79, f64, f71
  4664. nop __LINE__
  4665. }
  4666. { .mfi
  4667. nop __LINE__
  4668. FMA_A f103 = f79, f96, f103
  4669. nop __LINE__
  4670. }
  4671. ;;
  4672. { .mfi
  4673. nop __LINE__
  4674. FNMA f86 = f78, f80, f86
  4675. nop __LINE__
  4676. }
  4677. { .mfi
  4678. nop __LINE__
  4679. FNMA f118 = f78, f112, f118
  4680. nop __LINE__
  4681. }
  4682. ;;
  4683. { .mfi
  4684. nop __LINE__
  4685. FMA_A f87 = f79, f80, f87
  4686. nop __LINE__
  4687. }
  4688. { .mfi
  4689. nop __LINE__
  4690. FMA_A f119 = f79, f112, f119
  4691. nop __LINE__
  4692. }
  4693. ;;
  4694. { .mfi
  4695. nop __LINE__
  4696. FMA_B f70 = f79, f65, f70
  4697. nop __LINE__
  4698. }
  4699. { .mfi
  4700. nop __LINE__
  4701. FMA_B f102 = f79, f97, f102
  4702. nop __LINE__
  4703. }
  4704. ;;
  4705. { .mfi
  4706. nop __LINE__
  4707. FNMA f71 = f78, f65, f71
  4708. nop __LINE__
  4709. }
  4710. { .mfi
  4711. nop __LINE__
  4712. FNMA f103 = f78, f97, f103
  4713. nop __LINE__
  4714. }
  4715. ;;
  4716. { .mfi
  4717. nop __LINE__
  4718. FMA_B f86 = f79, f81, f86
  4719. nop __LINE__
  4720. }
  4721. { .mfi
  4722. nop __LINE__
  4723. FMA_B f118 = f79, f113, f118
  4724. nop __LINE__
  4725. }
  4726. ;;
  4727. { .mfi
  4728. nop __LINE__
  4729. FNMA f87 = f78, f81, f87
  4730. nop __LINE__
  4731. }
  4732. { .mfi
  4733. nop __LINE__
  4734. FNMA f119 = f78, f113, f119
  4735. nop __LINE__
  4736. }
  4737. ;;
  4738. { .mfi
  4739. nop __LINE__
  4740. FMPY f32 = f90, f66
  4741. nop __LINE__
  4742. }
  4743. { .mfi
  4744. nop __LINE__
  4745. FMPY f36 = f90, f98
  4746. nop __LINE__
  4747. }
  4748. ;;
  4749. { .mfi
  4750. nop __LINE__
  4751. FMPY f33 = f91, f66
  4752. nop __LINE__
  4753. }
  4754. { .mfi
  4755. nop __LINE__
  4756. FMPY f37 = f91, f98
  4757. nop __LINE__
  4758. }
  4759. ;;
  4760. { .mfi
  4761. nop __LINE__
  4762. FMPY f34 = f90, f82
  4763. nop __LINE__
  4764. }
  4765. { .mfi
  4766. nop __LINE__
  4767. FMPY f38 = f90, f114
  4768. nop __LINE__
  4769. }
  4770. ;;
  4771. { .mfi
  4772. nop __LINE__
  4773. FMPY f35 = f91, f82
  4774. nop __LINE__
  4775. }
  4776. { .mfi
  4777. nop __LINE__
  4778. FMPY f39 = f91, f114
  4779. nop __LINE__
  4780. }
  4781. ;;
  4782. { .mfi
  4783. nop __LINE__
  4784. FMA_C f66 = f91, f67, f32
  4785. nop __LINE__
  4786. }
  4787. { .mfi
  4788. nop __LINE__
  4789. FMA_C f98 = f91, f99, f36
  4790. nop __LINE__
  4791. }
  4792. ;;
  4793. { .mfi
  4794. nop __LINE__
  4795. FMA_D f67 = f90, f67, f33
  4796. nop __LINE__
  4797. }
  4798. { .mfi
  4799. nop __LINE__
  4800. FMA_D f99 = f90, f99, f37
  4801. nop __LINE__
  4802. }
  4803. ;;
  4804. { .mfi
  4805. nop __LINE__
  4806. FMA_C f82 = f91, f83, f34
  4807. nop __LINE__
  4808. }
  4809. { .mfi
  4810. nop __LINE__
  4811. FMA_C f114 = f91, f115, f38
  4812. nop __LINE__
  4813. }
  4814. ;;
  4815. { .mfi
  4816. nop __LINE__
  4817. FMA_D f83 = f90, f83, f35
  4818. nop __LINE__
  4819. }
  4820. { .mfi
  4821. nop __LINE__
  4822. FMA_D f115 = f90, f115, f39
  4823. nop __LINE__
  4824. }
  4825. ;;
  4826. { .mfi
  4827. STFD [BOFFSET] = f66, SIZE
  4828. FNMA f68 = f92, f66, f68
  4829. nop __LINE__
  4830. }
  4831. { .mfi
  4832. STFD [BOFFSET2] = f98, SIZE
  4833. FNMA f100 = f92, f98, f100
  4834. nop __LINE__
  4835. }
  4836. ;;
  4837. { .mfi
  4838. STFD [BOFFSET] = f67, SIZE
  4839. FMA_A f69 = f93, f66, f69
  4840. nop __LINE__
  4841. }
  4842. { .mfi
  4843. STFD [BOFFSET2] = f99, SIZE
  4844. FMA_A f101 = f93, f98, f101
  4845. nop __LINE__
  4846. }
  4847. ;;
  4848. { .mfi
  4849. STFD [BOFFSET] = f82, SIZE
  4850. FNMA f84 = f92, f82, f84
  4851. nop __LINE__
  4852. }
  4853. { .mfi
  4854. STFD [BOFFSET2] = f114, SIZE
  4855. FNMA f116 = f92, f114, f116
  4856. nop __LINE__
  4857. }
  4858. ;;
  4859. { .mfi
  4860. STFD [BOFFSET] = f83, 5 * SIZE
  4861. FMA_A f85 = f93, f82, f85
  4862. nop __LINE__
  4863. }
  4864. { .mfi
  4865. STFD [BOFFSET2] = f115, 5 * SIZE
  4866. FMA_A f117 = f93, f114, f117
  4867. nop __LINE__
  4868. }
  4869. ;;
  4870. { .mfi
  4871. STFD [C1 ] = f66, SIZE
  4872. FMA_B f68 = f93, f67, f68
  4873. nop __LINE__
  4874. }
  4875. { .mfi
  4876. STFD [C3 ] = f98, SIZE
  4877. FMA_B f100 = f93, f99, f100
  4878. nop __LINE__
  4879. }
  4880. ;;
  4881. { .mfi
  4882. STFD [C1 ] = f67, SIZE
  4883. FNMA f69 = f92, f67, f69
  4884. nop __LINE__
  4885. }
  4886. { .mfi
  4887. STFD [C3 ] = f99, SIZE
  4888. FNMA f101 = f92, f99, f101
  4889. nop __LINE__
  4890. }
  4891. ;;
  4892. { .mfi
  4893. STFD [C2 ] = f82, SIZE
  4894. FMA_B f84 = f93, f83, f84
  4895. nop __LINE__
  4896. }
  4897. { .mfi
  4898. STFD [C4 ] = f114, SIZE
  4899. FMA_B f116 = f93, f115, f116
  4900. nop __LINE__
  4901. }
  4902. ;;
  4903. { .mfi
  4904. STFD [C2 ] = f83, SIZE
  4905. FNMA f85 = f92, f83, f85
  4906. nop __LINE__
  4907. }
  4908. { .mfi
  4909. STFD [C4 ] = f115, SIZE
  4910. FNMA f117 = f92, f115, f117
  4911. nop __LINE__
  4912. }
  4913. ;;
  4914. { .mfi
  4915. nop __LINE__
  4916. FNMA f70 = f94, f66, f70
  4917. nop __LINE__
  4918. }
  4919. { .mfi
  4920. nop __LINE__
  4921. FNMA f102 = f94, f98, f102
  4922. nop __LINE__
  4923. }
  4924. ;;
  4925. { .mfi
  4926. nop __LINE__
  4927. FMA_A f71 = f95, f66, f71
  4928. nop __LINE__
  4929. }
  4930. { .mfi
  4931. nop __LINE__
  4932. FMA_A f103 = f95, f98, f103
  4933. nop __LINE__
  4934. }
  4935. ;;
  4936. { .mfi
  4937. nop __LINE__
  4938. FNMA f86 = f94, f82, f86
  4939. nop __LINE__
  4940. }
  4941. { .mfi
  4942. nop __LINE__
  4943. FNMA f118 = f94, f114, f118
  4944. nop __LINE__
  4945. }
  4946. ;;
  4947. { .mfi
  4948. nop __LINE__
  4949. FMA_A f87 = f95, f82, f87
  4950. nop __LINE__
  4951. }
  4952. { .mfi
  4953. nop __LINE__
  4954. FMA_A f119 = f95, f114, f119
  4955. nop __LINE__
  4956. }
  4957. ;;
  4958. { .mfi
  4959. nop __LINE__
  4960. FMA_B f70 = f95, f67, f70
  4961. nop __LINE__
  4962. }
  4963. { .mfi
  4964. nop __LINE__
  4965. FMA_B f102 = f95, f99, f102
  4966. nop __LINE__
  4967. }
  4968. ;;
  4969. { .mfi
  4970. nop __LINE__
  4971. FNMA f71 = f94, f67, f71
  4972. nop __LINE__
  4973. }
  4974. { .mfi
  4975. nop __LINE__
  4976. FNMA f103 = f94, f99, f103
  4977. nop __LINE__
  4978. }
  4979. ;;
  4980. { .mfi
  4981. nop __LINE__
  4982. FMA_B f86 = f95, f83, f86
  4983. nop __LINE__
  4984. }
  4985. { .mfi
  4986. nop __LINE__
  4987. FMA_B f118 = f95, f115, f118
  4988. nop __LINE__
  4989. }
  4990. ;;
  4991. { .mfi
  4992. nop __LINE__
  4993. FNMA f87 = f94, f83, f87
  4994. nop __LINE__
  4995. }
  4996. { .mfi
  4997. nop __LINE__
  4998. FNMA f119 = f94, f115, f119
  4999. nop __LINE__
  5000. }
  5001. ;;
  5002. { .mfi
  5003. nop __LINE__
  5004. FMPY f32 = f108, f68
  5005. nop __LINE__
  5006. }
  5007. { .mfi
  5008. nop __LINE__
  5009. FMPY f36 = f108, f100
  5010. nop __LINE__
  5011. }
  5012. { .mfi
  5013. nop __LINE__
  5014. FMPY f33 = f109, f68
  5015. nop __LINE__
  5016. }
  5017. { .mfi
  5018. nop __LINE__
  5019. FMPY f37 = f109, f100
  5020. nop __LINE__
  5021. }
  5022. { .mfi
  5023. nop __LINE__
  5024. FMPY f34 = f108, f84
  5025. nop __LINE__
  5026. }
  5027. { .mfi
  5028. nop __LINE__
  5029. FMPY f38 = f108, f116
  5030. nop __LINE__
  5031. }
  5032. { .mfi
  5033. nop __LINE__
  5034. FMPY f35 = f109, f84
  5035. nop __LINE__
  5036. }
  5037. { .mfi
  5038. nop __LINE__
  5039. FMPY f39 = f109, f116
  5040. nop __LINE__
  5041. }
  5042. ;;
  5043. { .mfi
  5044. nop __LINE__
  5045. FMA_C f68 = f109, f69, f32
  5046. nop __LINE__
  5047. }
  5048. { .mfi
  5049. nop __LINE__
  5050. FMA_C f100 = f109, f101, f36
  5051. nop __LINE__
  5052. }
  5053. { .mfi
  5054. nop __LINE__
  5055. FMA_D f69 = f108, f69, f33
  5056. nop __LINE__
  5057. }
  5058. { .mfi
  5059. nop __LINE__
  5060. FMA_D f101 = f108, f101, f37
  5061. nop __LINE__
  5062. }
  5063. { .mfi
  5064. nop __LINE__
  5065. FMA_C f84 = f109, f85, f34
  5066. nop __LINE__
  5067. }
  5068. { .mfi
  5069. nop __LINE__
  5070. FMA_C f116 = f109, f117, f38
  5071. nop __LINE__
  5072. }
  5073. { .mfi
  5074. nop __LINE__
  5075. FMA_D f85 = f108, f85, f35
  5076. nop __LINE__
  5077. }
  5078. { .mfi
  5079. nop __LINE__
  5080. FMA_D f117 = f108, f117, f39
  5081. nop __LINE__
  5082. }
  5083. ;;
  5084. { .mfi
  5085. STFD [BOFFSET] = f68, SIZE
  5086. FNMA f70 = f110, f68, f70
  5087. nop __LINE__
  5088. }
  5089. { .mfi
  5090. STFD [BOFFSET2] = f100, SIZE
  5091. FNMA f102 = f110, f100, f102
  5092. nop __LINE__
  5093. }
  5094. ;;
  5095. { .mfi
  5096. STFD [BOFFSET] = f69, SIZE
  5097. FMA_A f71 = f111, f68, f71
  5098. nop __LINE__
  5099. }
  5100. { .mfi
  5101. STFD [BOFFSET2] = f101, SIZE
  5102. FMA_A f103 = f111, f100, f103
  5103. nop __LINE__
  5104. }
  5105. ;;
  5106. { .mfi
  5107. STFD [BOFFSET] = f84, SIZE
  5108. FNMA f86 = f110, f84, f86
  5109. nop __LINE__
  5110. }
  5111. { .mfi
  5112. STFD [BOFFSET2] = f116, SIZE
  5113. FNMA f118 = f110, f116, f118
  5114. nop __LINE__
  5115. }
  5116. ;;
  5117. { .mfi
  5118. STFD [BOFFSET] = f85, 5 * SIZE
  5119. FMA_A f87 = f111, f84, f87
  5120. nop __LINE__
  5121. }
  5122. { .mfi
  5123. STFD [BOFFSET2] = f117, 5 * SIZE
  5124. FMA_A f119 = f111, f116, f119
  5125. nop __LINE__
  5126. }
  5127. ;;
  5128. { .mfi
  5129. STFD [C1 ] = f68, SIZE
  5130. FMA_B f70 = f111, f69, f70
  5131. nop __LINE__
  5132. }
  5133. { .mfi
  5134. STFD [C3 ] = f100, SIZE
  5135. FMA_B f102 = f111, f101, f102
  5136. nop __LINE__
  5137. }
  5138. ;;
  5139. { .mfi
  5140. STFD [C1 ] = f69, SIZE
  5141. FNMA f71 = f110, f69, f71
  5142. nop __LINE__
  5143. }
  5144. { .mfi
  5145. STFD [C3 ] = f101, SIZE
  5146. FNMA f103 = f110, f101, f103
  5147. nop __LINE__
  5148. }
  5149. ;;
  5150. { .mfi
  5151. STFD [C2 ] = f84, SIZE
  5152. FMA_B f86 = f111, f85, f86
  5153. nop __LINE__
  5154. }
  5155. { .mfi
  5156. STFD [C4 ] = f116, SIZE
  5157. FMA_B f118 = f111, f117, f118
  5158. nop __LINE__
  5159. }
  5160. ;;
  5161. { .mfi
  5162. STFD [C2 ] = f85, SIZE
  5163. FNMA f87 = f110, f85, f87
  5164. nop __LINE__
  5165. }
  5166. { .mfi
  5167. STFD [C4 ] = f117, SIZE
  5168. FNMA f119 = f110, f117, f119
  5169. nop __LINE__
  5170. }
  5171. ;;
  5172. { .mfi
  5173. nop __LINE__
  5174. FMPY f32 = f126, f70
  5175. nop __LINE__
  5176. }
  5177. { .mfi
  5178. nop __LINE__
  5179. FMPY f36 = f126, f102
  5180. nop __LINE__
  5181. }
  5182. ;;
  5183. { .mfi
  5184. nop __LINE__
  5185. FMPY f33 = f127, f70
  5186. nop __LINE__
  5187. }
  5188. { .mfi
  5189. nop __LINE__
  5190. FMPY f37 = f127, f102
  5191. nop __LINE__
  5192. }
  5193. ;;
  5194. { .mfi
  5195. nop __LINE__
  5196. FMPY f34 = f126, f86
  5197. nop __LINE__
  5198. }
  5199. { .mfi
  5200. nop __LINE__
  5201. FMPY f38 = f126, f118
  5202. nop __LINE__
  5203. }
  5204. ;;
  5205. { .mfi
  5206. nop __LINE__
  5207. FMPY f35 = f127, f86
  5208. nop __LINE__
  5209. }
  5210. { .mfi
  5211. nop __LINE__
  5212. FMPY f39 = f127, f118
  5213. nop __LINE__
  5214. }
  5215. ;;
  5216. { .mfi
  5217. nop __LINE__
  5218. FMA_C f70 = f127, f71, f32
  5219. nop __LINE__
  5220. }
  5221. { .mfi
  5222. nop __LINE__
  5223. FMA_C f102 = f127, f103, f36
  5224. nop __LINE__
  5225. }
  5226. ;;
  5227. { .mfi
  5228. nop __LINE__
  5229. FMA_D f71 = f126, f71, f33
  5230. nop __LINE__
  5231. }
  5232. { .mfi
  5233. nop __LINE__
  5234. FMA_D f103 = f126, f103, f37
  5235. nop __LINE__
  5236. }
  5237. ;;
  5238. { .mfi
  5239. nop __LINE__
  5240. FMA_C f86 = f127, f87, f34
  5241. nop __LINE__
  5242. }
  5243. { .mfi
  5244. nop __LINE__
  5245. FMA_C f118 = f127, f119, f38
  5246. nop __LINE__
  5247. }
  5248. ;;
  5249. { .mfi
  5250. nop __LINE__
  5251. FMA_D f87 = f126, f87, f35
  5252. nop __LINE__
  5253. }
  5254. { .mfi
  5255. nop __LINE__
  5256. FMA_D f119 = f126, f119, f39
  5257. nop __LINE__
  5258. }
  5259. ;;
  5260. { .mmi
  5261. STFD [BOFFSET] = f70, SIZE
  5262. STFD [BOFFSET2] = f102, SIZE
  5263. nop __LINE__
  5264. }
  5265. ;;
  5266. { .mmi
  5267. STFD [BOFFSET] = f71, SIZE
  5268. STFD [BOFFSET2] = f103, SIZE
  5269. sub r2 = K, KK
  5270. }
  5271. ;;
  5272. { .mmi
  5273. STFD [BOFFSET] = f86, SIZE
  5274. STFD [BOFFSET2] = f118, SIZE
  5275. adds KK = 4, KK
  5276. }
  5277. ;;
  5278. { .mmi
  5279. STFD [BOFFSET] = f87, -27 * SIZE
  5280. STFD [BOFFSET2] = f119
  5281. shladd r2 = r2, ZBASE_SHIFT, r0
  5282. }
  5283. ;;
  5284. { .mfi
  5285. STFD [C1 ] = f70, SIZE
  5286. mov f64 = f0
  5287. shladd AOFFSET = r2, 2, AOFFSET
  5288. }
  5289. { .mfi
  5290. STFD [C3 ] = f102, SIZE
  5291. mov f65 = f0
  5292. shladd BOFFSET = r2, 2, BOFFSET
  5293. }
  5294. ;;
  5295. { .mfi
  5296. STFD [C1 ] = f71, SIZE
  5297. mov f80 = f0
  5298. mov L = KK
  5299. }
  5300. { .mfi
  5301. STFD [C3 ] = f103, SIZE
  5302. mov f81 = f0
  5303. nop __LINE__
  5304. }
  5305. ;;
  5306. { .mfi
  5307. STFD [C2 ] = f86, SIZE
  5308. mov f96 = f0
  5309. cmp.ne p6, p0 = 1, I
  5310. }
  5311. { .mfi
  5312. STFD [C4 ] = f118, SIZE
  5313. mov f97 = f0
  5314. nop __LINE__
  5315. }
  5316. ;;
  5317. { .mfi
  5318. STFD [C2 ] = f87, SIZE
  5319. mov f112 = f0
  5320. adds I = -1, I
  5321. }
  5322. { .mfb
  5323. STFD [C4 ] = f119, SIZE
  5324. mov f113 = f0
  5325. (p6) br.cond.dptk .L011
  5326. }
  5327. ;;
  5328. #endif
  5329. #ifdef RN
  5330. { .mfi
  5331. LDFPD f76, f77 = [BOFFSET], 2 * SIZE
  5332. FMPY f32 = f72, f64
  5333. nop __LINE__
  5334. }
  5335. { .mfi
  5336. nop __LINE__
  5337. FMPY f36 = f72, f68
  5338. nop __LINE__
  5339. }
  5340. ;;
  5341. { .mfi
  5342. LDFPD f78, f79 = [BOFFSET]
  5343. FMPY f33 = f73, f64
  5344. adds BOFFSET = 4 * SIZE, BOFFSET
  5345. }
  5346. { .mfi
  5347. nop __LINE__
  5348. FMPY f37 = f73, f68
  5349. nop __LINE__
  5350. }
  5351. ;;
  5352. { .mfi
  5353. LDFPD f90, f91 = [BOFFSET], 2 * SIZE
  5354. FMPY f34 = f72, f66
  5355. nop __LINE__
  5356. }
  5357. { .mfi
  5358. nop __LINE__
  5359. FMPY f38 = f72, f70
  5360. nop __LINE__
  5361. }
  5362. ;;
  5363. { .mfi
  5364. LDFPD f92, f93 = [BOFFSET], 2 * SIZE
  5365. FMPY f35 = f73, f66
  5366. nop __LINE__
  5367. }
  5368. { .mfi
  5369. nop __LINE__
  5370. FMPY f39 = f73, f70
  5371. nop __LINE__
  5372. }
  5373. ;;
  5374. { .mfi
  5375. LDFPD f94, f95 = [BOFFSET]
  5376. FMA_C f64 = f73, f65, f32
  5377. adds BOFFSET = 6 * SIZE, BOFFSET
  5378. }
  5379. { .mfi
  5380. nop __LINE__
  5381. FMA_C f68 = f73, f69, f36
  5382. nop __LINE__
  5383. }
  5384. ;;
  5385. { .mfi
  5386. LDFPD f108, f109 = [BOFFSET], 2 * SIZE
  5387. FMA_D f65 = f72, f65, f33
  5388. nop __LINE__
  5389. }
  5390. { .mfi
  5391. nop __LINE__
  5392. FMA_D f69 = f72, f69, f37
  5393. nop __LINE__
  5394. }
  5395. ;;
  5396. { .mfi
  5397. LDFPD f110, f111 = [BOFFSET]
  5398. FMA_C f66 = f73, f67, f34
  5399. adds BOFFSET = 8 * SIZE, BOFFSET
  5400. }
  5401. { .mfi
  5402. nop __LINE__
  5403. FMA_C f70 = f73, f71, f38
  5404. nop __LINE__
  5405. }
  5406. ;;
  5407. { .mfi
  5408. LDFPD f126, f127 = [BOFFSET]
  5409. FMA_D f67 = f72, f67, f35
  5410. adds BOFFSET = - 30 * SIZE, BOFFSET
  5411. }
  5412. { .mfi
  5413. nop __LINE__
  5414. FMA_D f71 = f72, f71, f39
  5415. adds AOFFSET2 = 4 * SIZE, AOFFSET
  5416. }
  5417. ;;
  5418. { .mfi
  5419. STFD [AOFFSET] = f64, SIZE
  5420. FNMA f80 = f74, f64, f80
  5421. nop __LINE__
  5422. }
  5423. { .mfi
  5424. STFD [AOFFSET2] = f68, SIZE
  5425. FNMA f84 = f74, f68, f84
  5426. nop __LINE__
  5427. }
  5428. ;;
  5429. { .mfi
  5430. STFD [AOFFSET] = f65, SIZE
  5431. FMA_A f81 = f75, f64, f81
  5432. nop __LINE__
  5433. }
  5434. { .mfi
  5435. STFD [AOFFSET2] = f69, SIZE
  5436. FMA_A f85 = f75, f68, f85
  5437. nop __LINE__
  5438. }
  5439. ;;
  5440. { .mfi
  5441. STFD [AOFFSET] = f66, SIZE
  5442. FNMA f82 = f74, f66, f82
  5443. nop __LINE__
  5444. }
  5445. { .mfi
  5446. STFD [AOFFSET2] = f70, SIZE
  5447. FNMA f86 = f74, f70, f86
  5448. nop __LINE__
  5449. }
  5450. ;;
  5451. { .mfi
  5452. STFD [AOFFSET] = f67, 5 * SIZE
  5453. FMA_A f83 = f75, f66, f83
  5454. nop __LINE__
  5455. }
  5456. { .mfi
  5457. STFD [AOFFSET2] = f71, 5 * SIZE
  5458. FMA_A f87 = f75, f70, f87
  5459. nop __LINE__
  5460. }
  5461. ;;
  5462. { .mfi
  5463. STFD [C1 ] = f64, SIZE
  5464. FMA_B f80 = f75, f65, f80
  5465. nop __LINE__
  5466. }
  5467. { .mfi
  5468. STFD [C5 ] = f68, SIZE
  5469. FMA_B f84 = f75, f69, f84
  5470. nop __LINE__
  5471. }
  5472. ;;
  5473. { .mfi
  5474. STFD [C1 ] = f65, SIZE
  5475. FNMA f81 = f74, f65, f81
  5476. nop __LINE__
  5477. }
  5478. { .mfi
  5479. STFD [C5 ] = f69, SIZE
  5480. FNMA f85 = f74, f69, f85
  5481. nop __LINE__
  5482. }
  5483. ;;
  5484. { .mfi
  5485. STFD [C1 ] = f66, SIZE
  5486. FMA_B f82 = f75, f67, f82
  5487. nop __LINE__
  5488. }
  5489. { .mfi
  5490. STFD [C5 ] = f70, SIZE
  5491. FMA_B f86 = f75, f71, f86
  5492. nop __LINE__
  5493. }
  5494. ;;
  5495. { .mfi
  5496. STFD [C1 ] = f67, 5 * SIZE
  5497. FNMA f83 = f74, f67, f83
  5498. nop __LINE__
  5499. }
  5500. { .mfi
  5501. STFD [C5 ] = f71, 5 * SIZE
  5502. FNMA f87 = f74, f71, f87
  5503. nop __LINE__
  5504. }
  5505. ;;
  5506. { .mfi
  5507. nop __LINE__
  5508. FNMA f96 = f76, f64, f96
  5509. nop __LINE__
  5510. }
  5511. { .mfi
  5512. nop __LINE__
  5513. FNMA f100 = f76, f68, f100
  5514. nop __LINE__
  5515. }
  5516. ;;
  5517. { .mfi
  5518. nop __LINE__
  5519. FMA_A f97 = f77, f64, f97
  5520. nop __LINE__
  5521. }
  5522. { .mfi
  5523. nop __LINE__
  5524. FMA_A f101 = f77, f68, f101
  5525. nop __LINE__
  5526. }
  5527. ;;
  5528. { .mfi
  5529. nop __LINE__
  5530. FNMA f98 = f76, f66, f98
  5531. nop __LINE__
  5532. }
  5533. { .mfi
  5534. nop __LINE__
  5535. FNMA f102 = f76, f70, f102
  5536. nop __LINE__
  5537. }
  5538. ;;
  5539. { .mfi
  5540. nop __LINE__
  5541. FMA_A f99 = f77, f66, f99
  5542. nop __LINE__
  5543. }
  5544. { .mfi
  5545. nop __LINE__
  5546. FMA_A f103 = f77, f70, f103
  5547. nop __LINE__
  5548. }
  5549. ;;
  5550. { .mfi
  5551. nop __LINE__
  5552. FMA_B f96 = f77, f65, f96
  5553. nop __LINE__
  5554. }
  5555. { .mfi
  5556. nop __LINE__
  5557. FMA_B f100 = f77, f69, f100
  5558. nop __LINE__
  5559. }
  5560. ;;
  5561. { .mfi
  5562. nop __LINE__
  5563. FNMA f97 = f76, f65, f97
  5564. nop __LINE__
  5565. }
  5566. { .mfi
  5567. nop __LINE__
  5568. FNMA f101 = f76, f69, f101
  5569. nop __LINE__
  5570. }
  5571. ;;
  5572. { .mfi
  5573. nop __LINE__
  5574. FMA_B f98 = f77, f67, f98
  5575. nop __LINE__
  5576. }
  5577. { .mfi
  5578. nop __LINE__
  5579. FMA_B f102 = f77, f71, f102
  5580. nop __LINE__
  5581. }
  5582. ;;
  5583. { .mfi
  5584. nop __LINE__
  5585. FNMA f99 = f76, f67, f99
  5586. nop __LINE__
  5587. }
  5588. { .mfi
  5589. nop __LINE__
  5590. FNMA f103 = f76, f71, f103
  5591. nop __LINE__
  5592. }
  5593. ;;
  5594. { .mfi
  5595. nop __LINE__
  5596. FNMA f112 = f78, f64, f112
  5597. nop __LINE__
  5598. }
  5599. { .mfi
  5600. nop __LINE__
  5601. FNMA f116 = f78, f68, f116
  5602. nop __LINE__
  5603. }
  5604. ;;
  5605. { .mfi
  5606. nop __LINE__
  5607. FMA_A f113 = f79, f64, f113
  5608. nop __LINE__
  5609. }
  5610. { .mfi
  5611. nop __LINE__
  5612. FMA_A f117 = f79, f68, f117
  5613. nop __LINE__
  5614. }
  5615. ;;
  5616. { .mfi
  5617. nop __LINE__
  5618. FNMA f114 = f78, f66, f114
  5619. nop __LINE__
  5620. }
  5621. { .mfi
  5622. nop __LINE__
  5623. FNMA f118 = f78, f70, f118
  5624. nop __LINE__
  5625. }
  5626. ;;
  5627. { .mfi
  5628. nop __LINE__
  5629. FMA_A f115 = f79, f66, f115
  5630. nop __LINE__
  5631. }
  5632. { .mfi
  5633. nop __LINE__
  5634. FMA_A f119 = f79, f70, f119
  5635. nop __LINE__
  5636. }
  5637. ;;
  5638. { .mfi
  5639. nop __LINE__
  5640. FMA_B f112 = f79, f65, f112
  5641. nop __LINE__
  5642. }
  5643. { .mfi
  5644. nop __LINE__
  5645. FMA_B f116 = f79, f69, f116
  5646. nop __LINE__
  5647. }
  5648. ;;
  5649. { .mfi
  5650. nop __LINE__
  5651. FNMA f113 = f78, f65, f113
  5652. nop __LINE__
  5653. }
  5654. { .mfi
  5655. nop __LINE__
  5656. FNMA f117 = f78, f69, f117
  5657. nop __LINE__
  5658. }
  5659. ;;
  5660. { .mfi
  5661. nop __LINE__
  5662. FMA_B f114 = f79, f67, f114
  5663. nop __LINE__
  5664. }
  5665. { .mfi
  5666. nop __LINE__
  5667. FMA_B f118 = f79, f71, f118
  5668. nop __LINE__
  5669. }
  5670. ;;
  5671. { .mfi
  5672. nop __LINE__
  5673. FNMA f115 = f78, f67, f115
  5674. nop __LINE__
  5675. }
  5676. { .mfi
  5677. nop __LINE__
  5678. FNMA f119 = f78, f71, f119
  5679. nop __LINE__
  5680. }
  5681. ;;
  5682. { .mfi
  5683. nop __LINE__
  5684. FMPY f32 = f90, f80
  5685. nop __LINE__
  5686. }
  5687. { .mfi
  5688. nop __LINE__
  5689. FMPY f36 = f90, f84
  5690. nop __LINE__
  5691. }
  5692. { .mfi
  5693. nop __LINE__
  5694. FMPY f33 = f91, f80
  5695. nop __LINE__
  5696. }
  5697. { .mfi
  5698. nop __LINE__
  5699. FMPY f37 = f91, f84
  5700. nop __LINE__
  5701. }
  5702. { .mfi
  5703. nop __LINE__
  5704. FMPY f34 = f90, f82
  5705. nop __LINE__
  5706. }
  5707. { .mfi
  5708. nop __LINE__
  5709. FMPY f38 = f90, f86
  5710. nop __LINE__
  5711. }
  5712. { .mfi
  5713. nop __LINE__
  5714. FMPY f35 = f91, f82
  5715. nop __LINE__
  5716. }
  5717. { .mfi
  5718. nop __LINE__
  5719. FMPY f39 = f91, f86
  5720. nop __LINE__
  5721. }
  5722. ;;
  5723. { .mfi
  5724. nop __LINE__
  5725. FMA_C f80 = f91, f81, f32
  5726. nop __LINE__
  5727. }
  5728. { .mfi
  5729. nop __LINE__
  5730. FMA_C f84 = f91, f85, f36
  5731. nop __LINE__
  5732. }
  5733. { .mfi
  5734. nop __LINE__
  5735. FMA_D f81 = f90, f81, f33
  5736. nop __LINE__
  5737. }
  5738. { .mfi
  5739. nop __LINE__
  5740. FMA_D f85 = f90, f85, f37
  5741. nop __LINE__
  5742. }
  5743. { .mfi
  5744. nop __LINE__
  5745. FMA_C f82 = f91, f83, f34
  5746. nop __LINE__
  5747. }
  5748. { .mfi
  5749. nop __LINE__
  5750. FMA_C f86 = f91, f87, f38
  5751. nop __LINE__
  5752. }
  5753. { .mfi
  5754. nop __LINE__
  5755. FMA_D f83 = f90, f83, f35
  5756. nop __LINE__
  5757. }
  5758. { .mfi
  5759. nop __LINE__
  5760. FMA_D f87 = f90, f87, f39
  5761. nop __LINE__
  5762. }
  5763. ;;
  5764. { .mfi
  5765. STFD [AOFFSET] = f80, SIZE
  5766. FNMA f96 = f92, f80, f96
  5767. nop __LINE__
  5768. }
  5769. { .mfi
  5770. STFD [AOFFSET2] = f84, SIZE
  5771. FNMA f100 = f92, f84, f100
  5772. nop __LINE__
  5773. }
  5774. ;;
  5775. { .mfi
  5776. STFD [AOFFSET] = f81, SIZE
  5777. FMA_A f97 = f93, f80, f97
  5778. nop __LINE__
  5779. }
  5780. { .mfi
  5781. STFD [AOFFSET2] = f85, SIZE
  5782. FMA_A f101 = f93, f84, f101
  5783. nop __LINE__
  5784. }
  5785. ;;
  5786. { .mfi
  5787. STFD [AOFFSET] = f82, SIZE
  5788. FNMA f98 = f92, f82, f98
  5789. nop __LINE__
  5790. }
  5791. { .mfi
  5792. STFD [AOFFSET2] = f86, SIZE
  5793. FNMA f102 = f92, f86, f102
  5794. nop __LINE__
  5795. }
  5796. ;;
  5797. { .mfi
  5798. STFD [AOFFSET] = f83, 5 * SIZE
  5799. FMA_A f99 = f93, f82, f99
  5800. nop __LINE__
  5801. }
  5802. { .mfi
  5803. STFD [AOFFSET2] = f87, 5 * SIZE
  5804. FMA_A f103 = f93, f86, f103
  5805. nop __LINE__
  5806. }
  5807. ;;
  5808. { .mfi
  5809. STFD [C2 ] = f80, SIZE
  5810. FMA_B f96 = f93, f81, f96
  5811. nop __LINE__
  5812. }
  5813. { .mfi
  5814. STFD [C6 ] = f84, SIZE
  5815. FMA_B f100 = f93, f85, f100
  5816. nop __LINE__
  5817. }
  5818. ;;
  5819. { .mfi
  5820. STFD [C2 ] = f81, SIZE
  5821. FNMA f97 = f92, f81, f97
  5822. nop __LINE__
  5823. }
  5824. { .mfi
  5825. STFD [C6 ] = f85, SIZE
  5826. FNMA f101 = f92, f85, f101
  5827. nop __LINE__
  5828. }
  5829. ;;
  5830. { .mfi
  5831. STFD [C2 ] = f82, SIZE
  5832. FMA_B f98 = f93, f83, f98
  5833. nop __LINE__
  5834. }
  5835. { .mfi
  5836. STFD [C6 ] = f86, SIZE
  5837. FMA_B f102 = f93, f87, f102
  5838. nop __LINE__
  5839. }
  5840. ;;
  5841. { .mfi
  5842. STFD [C2 ] = f83, 5 * SIZE
  5843. FNMA f99 = f92, f83, f99
  5844. nop __LINE__
  5845. }
  5846. { .mfi
  5847. STFD [C6 ] = f87, 5 * SIZE
  5848. FNMA f103 = f92, f87, f103
  5849. nop __LINE__
  5850. }
  5851. ;;
  5852. { .mfi
  5853. nop __LINE__
  5854. FNMA f112 = f94, f80, f112
  5855. nop __LINE__
  5856. }
  5857. { .mfi
  5858. nop __LINE__
  5859. FNMA f116 = f94, f84, f116
  5860. nop __LINE__
  5861. }
  5862. ;;
  5863. { .mfi
  5864. nop __LINE__
  5865. FMA_A f113 = f95, f80, f113
  5866. nop __LINE__
  5867. }
  5868. { .mfi
  5869. nop __LINE__
  5870. FMA_A f117 = f95, f84, f117
  5871. nop __LINE__
  5872. }
  5873. ;;
  5874. { .mfi
  5875. nop __LINE__
  5876. FNMA f114 = f94, f82, f114
  5877. nop __LINE__
  5878. }
  5879. { .mfi
  5880. nop __LINE__
  5881. FNMA f118 = f94, f86, f118
  5882. nop __LINE__
  5883. }
  5884. ;;
  5885. { .mfi
  5886. nop __LINE__
  5887. FMA_A f115 = f95, f82, f115
  5888. nop __LINE__
  5889. }
  5890. { .mfi
  5891. nop __LINE__
  5892. FMA_A f119 = f95, f86, f119
  5893. nop __LINE__
  5894. }
  5895. ;;
  5896. { .mfi
  5897. nop __LINE__
  5898. FMA_B f112 = f95, f81, f112
  5899. nop __LINE__
  5900. }
  5901. { .mfi
  5902. nop __LINE__
  5903. FMA_B f116 = f95, f85, f116
  5904. nop __LINE__
  5905. }
  5906. ;;
  5907. { .mfi
  5908. nop __LINE__
  5909. FNMA f113 = f94, f81, f113
  5910. nop __LINE__
  5911. }
  5912. { .mfi
  5913. nop __LINE__
  5914. FNMA f117 = f94, f85, f117
  5915. nop __LINE__
  5916. }
  5917. ;;
  5918. { .mfi
  5919. nop __LINE__
  5920. FMA_B f114 = f95, f83, f114
  5921. nop __LINE__
  5922. }
  5923. { .mfi
  5924. nop __LINE__
  5925. FMA_B f118 = f95, f87, f118
  5926. nop __LINE__
  5927. }
  5928. ;;
  5929. { .mfi
  5930. nop __LINE__
  5931. FNMA f115 = f94, f83, f115
  5932. nop __LINE__
  5933. }
  5934. { .mfi
  5935. nop __LINE__
  5936. FNMA f119 = f94, f87, f119
  5937. nop __LINE__
  5938. }
  5939. ;;
  5940. { .mfi
  5941. nop __LINE__
  5942. FMPY f32 = f108, f96
  5943. nop __LINE__
  5944. }
  5945. { .mfi
  5946. nop __LINE__
  5947. FMPY f36 = f108, f100
  5948. nop __LINE__
  5949. }
  5950. ;;
  5951. { .mfi
  5952. nop __LINE__
  5953. FMPY f33 = f109, f96
  5954. nop __LINE__
  5955. }
  5956. { .mfi
  5957. nop __LINE__
  5958. FMPY f37 = f109, f100
  5959. nop __LINE__
  5960. }
  5961. ;;
  5962. { .mfi
  5963. nop __LINE__
  5964. FMPY f34 = f108, f98
  5965. nop __LINE__
  5966. }
  5967. { .mfi
  5968. nop __LINE__
  5969. FMPY f38 = f108, f102
  5970. nop __LINE__
  5971. }
  5972. ;;
  5973. { .mfi
  5974. nop __LINE__
  5975. FMPY f35 = f109, f98
  5976. nop __LINE__
  5977. }
  5978. { .mfi
  5979. nop __LINE__
  5980. FMPY f39 = f109, f102
  5981. nop __LINE__
  5982. }
  5983. ;;
  5984. { .mfi
  5985. nop __LINE__
  5986. FMA_C f96 = f109, f97, f32
  5987. nop __LINE__
  5988. }
  5989. { .mfi
  5990. nop __LINE__
  5991. FMA_C f100 = f109, f101, f36
  5992. nop __LINE__
  5993. }
  5994. ;;
  5995. { .mfi
  5996. nop __LINE__
  5997. FMA_D f97 = f108, f97, f33
  5998. nop __LINE__
  5999. }
  6000. { .mfi
  6001. nop __LINE__
  6002. FMA_D f101 = f108, f101, f37
  6003. nop __LINE__
  6004. }
  6005. ;;
  6006. { .mfi
  6007. nop __LINE__
  6008. FMA_C f98 = f109, f99, f34
  6009. nop __LINE__
  6010. }
  6011. { .mfi
  6012. nop __LINE__
  6013. FMA_C f102 = f109, f103, f38
  6014. nop __LINE__
  6015. }
  6016. ;;
  6017. { .mfi
  6018. nop __LINE__
  6019. FMA_D f99 = f108, f99, f35
  6020. nop __LINE__
  6021. }
  6022. { .mfi
  6023. nop __LINE__
  6024. FMA_D f103 = f108, f103, f39
  6025. nop __LINE__
  6026. }
  6027. ;;
  6028. { .mfi
  6029. STFD [AOFFSET] = f96, SIZE
  6030. FNMA f112 = f110, f96, f112
  6031. nop __LINE__
  6032. }
  6033. { .mfi
  6034. STFD [AOFFSET2] = f100, SIZE
  6035. FNMA f116 = f110, f100, f116
  6036. nop __LINE__
  6037. }
  6038. ;;
  6039. { .mfi
  6040. STFD [AOFFSET] = f97, SIZE
  6041. FMA_A f113 = f111, f96, f113
  6042. nop __LINE__
  6043. }
  6044. { .mfi
  6045. STFD [AOFFSET2] = f101, SIZE
  6046. FMA_A f117 = f111, f100, f117
  6047. nop __LINE__
  6048. }
  6049. ;;
  6050. { .mfi
  6051. STFD [AOFFSET] = f98, SIZE
  6052. FNMA f114 = f110, f98, f114
  6053. nop __LINE__
  6054. }
  6055. { .mfi
  6056. STFD [AOFFSET2] = f102, SIZE
  6057. FNMA f118 = f110, f102, f118
  6058. nop __LINE__
  6059. }
  6060. ;;
  6061. { .mfi
  6062. STFD [AOFFSET] = f99, 5 * SIZE
  6063. FMA_A f115 = f111, f98, f115
  6064. nop __LINE__
  6065. }
  6066. { .mfi
  6067. STFD [AOFFSET2] = f103, 5 * SIZE
  6068. FMA_A f119 = f111, f102, f119
  6069. nop __LINE__
  6070. }
  6071. ;;
  6072. { .mfi
  6073. STFD [C3 ] = f96, SIZE
  6074. FMA_B f112 = f111, f97, f112
  6075. nop __LINE__
  6076. }
  6077. { .mfi
  6078. STFD [C7 ] = f100, SIZE
  6079. FMA_B f116 = f111, f101, f116
  6080. nop __LINE__
  6081. }
  6082. ;;
  6083. { .mfi
  6084. STFD [C3 ] = f97, SIZE
  6085. FNMA f113 = f110, f97, f113
  6086. nop __LINE__
  6087. }
  6088. { .mfi
  6089. STFD [C7 ] = f101, SIZE
  6090. FNMA f117 = f110, f101, f117
  6091. nop __LINE__
  6092. }
  6093. ;;
  6094. { .mfi
  6095. STFD [C3 ] = f98, SIZE
  6096. FMA_B f114 = f111, f99, f114
  6097. nop __LINE__
  6098. }
  6099. { .mfi
  6100. STFD [C7 ] = f102, SIZE
  6101. FMA_B f118 = f111, f103, f118
  6102. nop __LINE__
  6103. }
  6104. ;;
  6105. { .mfi
  6106. STFD [C3 ] = f99, 5 * SIZE
  6107. FNMA f115 = f110, f99, f115
  6108. nop __LINE__
  6109. }
  6110. { .mfi
  6111. STFD [C7 ] = f103, 5 * SIZE
  6112. FNMA f119 = f110, f103, f119
  6113. nop __LINE__
  6114. }
  6115. ;;
  6116. { .mfi
  6117. nop __LINE__
  6118. FMPY f32 = f126, f112
  6119. nop __LINE__
  6120. }
  6121. { .mfi
  6122. nop __LINE__
  6123. FMPY f36 = f126, f116
  6124. nop __LINE__
  6125. }
  6126. ;;
  6127. { .mfi
  6128. nop __LINE__
  6129. FMPY f33 = f127, f112
  6130. nop __LINE__
  6131. }
  6132. { .mfi
  6133. nop __LINE__
  6134. FMPY f37 = f127, f116
  6135. nop __LINE__
  6136. }
  6137. ;;
  6138. { .mfi
  6139. nop __LINE__
  6140. FMPY f34 = f126, f114
  6141. nop __LINE__
  6142. }
  6143. { .mfi
  6144. nop __LINE__
  6145. FMPY f38 = f126, f118
  6146. nop __LINE__
  6147. }
  6148. ;;
  6149. { .mfi
  6150. nop __LINE__
  6151. FMPY f35 = f127, f114
  6152. nop __LINE__
  6153. }
  6154. { .mfi
  6155. nop __LINE__
  6156. FMPY f39 = f127, f118
  6157. nop __LINE__
  6158. }
  6159. ;;
  6160. { .mfi
  6161. nop __LINE__
  6162. FMA_C f112 = f127, f113, f32
  6163. nop __LINE__
  6164. }
  6165. { .mfi
  6166. nop __LINE__
  6167. FMA_C f116 = f127, f117, f36
  6168. nop __LINE__
  6169. }
  6170. ;;
  6171. { .mfi
  6172. nop __LINE__
  6173. FMA_D f113 = f126, f113, f33
  6174. nop __LINE__
  6175. }
  6176. { .mfi
  6177. nop __LINE__
  6178. FMA_D f117 = f126, f117, f37
  6179. nop __LINE__
  6180. }
  6181. ;;
  6182. { .mfi
  6183. nop __LINE__
  6184. FMA_C f114 = f127, f115, f34
  6185. nop __LINE__
  6186. }
  6187. { .mfi
  6188. nop __LINE__
  6189. FMA_C f118 = f127, f119, f38
  6190. nop __LINE__
  6191. }
  6192. ;;
  6193. { .mfi
  6194. nop __LINE__
  6195. FMA_D f115 = f126, f115, f35
  6196. nop __LINE__
  6197. }
  6198. { .mfi
  6199. nop __LINE__
  6200. FMA_D f119 = f126, f119, f39
  6201. nop __LINE__
  6202. }
  6203. ;;
  6204. { .mmi
  6205. STFD [AOFFSET] = f112, SIZE
  6206. STFD [AOFFSET2] = f116, SIZE
  6207. sub r2 = K, KK
  6208. }
  6209. ;;
  6210. { .mmi
  6211. STFD [AOFFSET] = f113, SIZE
  6212. STFD [AOFFSET2] = f117, SIZE
  6213. mov L = KK
  6214. }
  6215. ;;
  6216. { .mmi
  6217. STFD [AOFFSET] = f114, SIZE
  6218. STFD [AOFFSET2] = f118, SIZE
  6219. shladd r2 = r2, ZBASE_SHIFT, r0
  6220. }
  6221. ;;
  6222. { .mmi
  6223. STFD [AOFFSET] = f115, -27 * SIZE
  6224. STFD [AOFFSET2] = f119
  6225. nop __LINE__
  6226. }
  6227. ;;
  6228. { .mfi
  6229. STFD [C4 ] = f112, SIZE
  6230. mov f64 = f0
  6231. shladd BOFFSET = r2, 2, BOFFSET
  6232. }
  6233. { .mfi
  6234. STFD [C8 ] = f116, SIZE
  6235. mov f65 = f0
  6236. shladd AOFFSET = r2, 2, AOFFSET
  6237. }
  6238. ;;
  6239. { .mfi
  6240. STFD [C4 ] = f113, SIZE
  6241. mov f80 = f0
  6242. cmp.ne p6, p0 = 1, I
  6243. }
  6244. { .mfi
  6245. STFD [C8 ] = f117, SIZE
  6246. mov f81 = f0
  6247. nop __LINE__
  6248. }
  6249. ;;
  6250. { .mfi
  6251. STFD [C4 ] = f114, SIZE
  6252. mov f96 = f0
  6253. adds I = -1, I
  6254. }
  6255. { .mfi
  6256. STFD [C8 ] = f118, SIZE
  6257. mov f97 = f0
  6258. nop __LINE__
  6259. }
  6260. ;;
  6261. { .mfi
  6262. STFD [C4 ] = f115, 5 * SIZE
  6263. mov f112 = f0
  6264. nop __LINE__
  6265. }
  6266. { .mfb
  6267. STFD [C8 ] = f119, 5 * SIZE
  6268. mov f113 = f0
  6269. (p6) br.cond.dptk .L011
  6270. }
  6271. #endif
  6272. #ifdef RT
  6273. { .mfi
  6274. LDFPD f76, f77 = [BOFFSET]
  6275. FMPY f32 = f72, f112
  6276. adds BOFFSET = - 2 * SIZE, BOFFSET
  6277. }
  6278. { .mfi
  6279. nop __LINE__
  6280. FMPY f36 = f72, f116
  6281. nop __LINE__
  6282. }
  6283. ;;
  6284. { .mfi
  6285. LDFPD f78, f79 = [BOFFSET]
  6286. FMPY f33 = f73, f112
  6287. adds BOFFSET = - 4 * SIZE, BOFFSET
  6288. }
  6289. { .mfi
  6290. nop __LINE__
  6291. FMPY f37 = f73, f116
  6292. nop __LINE__
  6293. }
  6294. ;;
  6295. { .mfi
  6296. LDFPD f88, f89 = [BOFFSET]
  6297. FMPY f34 = f72, f114
  6298. adds BOFFSET = - 2 * SIZE, BOFFSET
  6299. }
  6300. { .mfi
  6301. nop __LINE__
  6302. FMPY f38 = f72, f118
  6303. nop __LINE__
  6304. }
  6305. ;;
  6306. { .mfi
  6307. LDFPD f90, f91 = [BOFFSET]
  6308. FMPY f35 = f73, f114
  6309. adds BOFFSET = - 2 * SIZE, BOFFSET
  6310. }
  6311. { .mfi
  6312. nop __LINE__
  6313. FMPY f39 = f73, f118
  6314. nop __LINE__
  6315. }
  6316. ;;
  6317. { .mfi
  6318. LDFPD f92, f93 = [BOFFSET]
  6319. FMA_C f112 = f73, f113, f32
  6320. adds BOFFSET = - 6 * SIZE, BOFFSET
  6321. }
  6322. { .mfi
  6323. nop __LINE__
  6324. FMA_C f116 = f73, f117, f36
  6325. nop __LINE__
  6326. }
  6327. ;;
  6328. { .mfi
  6329. LDFPD f104, f105 = [BOFFSET]
  6330. FMA_D f113 = f72, f113, f33
  6331. adds BOFFSET = - 2 * SIZE, BOFFSET
  6332. }
  6333. { .mfi
  6334. nop __LINE__
  6335. FMA_D f117 = f72, f117, f37
  6336. nop __LINE__
  6337. }
  6338. ;;
  6339. { .mfi
  6340. LDFPD f106, f107 = [BOFFSET]
  6341. FMA_C f114 = f73, f115, f34
  6342. adds BOFFSET = - 8 * SIZE, BOFFSET
  6343. }
  6344. { .mfi
  6345. nop __LINE__
  6346. FMA_C f118 = f73, f119, f38
  6347. nop __LINE__
  6348. }
  6349. ;;
  6350. { .mfi
  6351. LDFPD f120, f121 = [BOFFSET]
  6352. FMA_D f115 = f72, f115, f35
  6353. adds AOFFSET2 = 28 * SIZE, AOFFSET
  6354. }
  6355. { .mfi
  6356. nop __LINE__
  6357. FMA_D f119 = f72, f119, f39
  6358. adds AOFFSET = 24 * SIZE, AOFFSET
  6359. }
  6360. ;;
  6361. { .mfi
  6362. STFD [AOFFSET] = f112, SIZE
  6363. FNMA f96 = f74, f112, f96
  6364. nop __LINE__
  6365. }
  6366. { .mfi
  6367. STFD [AOFFSET2] = f116, SIZE
  6368. FNMA f100 = f74, f116, f100
  6369. nop __LINE__
  6370. }
  6371. ;;
  6372. { .mfi
  6373. STFD [AOFFSET] = f113, SIZE
  6374. FMA_A f97 = f75, f112, f97
  6375. nop __LINE__
  6376. }
  6377. { .mfi
  6378. STFD [AOFFSET2] = f117, SIZE
  6379. FMA_A f101 = f75, f116, f101
  6380. nop __LINE__
  6381. }
  6382. ;;
  6383. { .mfi
  6384. STFD [AOFFSET] = f114, SIZE
  6385. FNMA f98 = f74, f114, f98
  6386. nop __LINE__
  6387. }
  6388. { .mfi
  6389. STFD [AOFFSET2] = f118, SIZE
  6390. FNMA f102 = f74, f118, f102
  6391. nop __LINE__
  6392. }
  6393. ;;
  6394. { .mfi
  6395. STFD [AOFFSET] = f115, -11 * SIZE
  6396. FMA_A f99 = f75, f114, f99
  6397. nop __LINE__
  6398. }
  6399. { .mfi
  6400. STFD [AOFFSET2] = f119, -11 * SIZE
  6401. FMA_A f103 = f75, f118, f103
  6402. nop __LINE__
  6403. }
  6404. ;;
  6405. { .mfi
  6406. STFD [C4 ] = f112, SIZE
  6407. FMA_B f96 = f75, f113, f96
  6408. nop __LINE__
  6409. }
  6410. { .mfi
  6411. STFD [C8 ] = f116, SIZE
  6412. FMA_B f100 = f75, f117, f100
  6413. nop __LINE__
  6414. }
  6415. ;;
  6416. { .mfi
  6417. STFD [C4 ] = f113, SIZE
  6418. FNMA f97 = f74, f113, f97
  6419. nop __LINE__
  6420. }
  6421. { .mfi
  6422. STFD [C8 ] = f117, SIZE
  6423. FNMA f101 = f74, f117, f101
  6424. nop __LINE__
  6425. }
  6426. ;;
  6427. { .mfi
  6428. STFD [C4 ] = f114, SIZE
  6429. FMA_B f98 = f75, f115, f98
  6430. nop __LINE__
  6431. }
  6432. { .mfi
  6433. STFD [C8 ] = f118, SIZE
  6434. FMA_B f102 = f75, f119, f102
  6435. nop __LINE__
  6436. }
  6437. ;;
  6438. { .mfi
  6439. STFD [C4 ] = f115, 5 * SIZE
  6440. FNMA f99 = f74, f115, f99
  6441. nop __LINE__
  6442. }
  6443. { .mfi
  6444. STFD [C8 ] = f119, 5 * SIZE
  6445. FNMA f103 = f74, f119, f103
  6446. nop __LINE__
  6447. }
  6448. ;;
  6449. { .mfi
  6450. nop __LINE__
  6451. FNMA f80 = f76, f112, f80
  6452. nop __LINE__
  6453. }
  6454. { .mfi
  6455. nop __LINE__
  6456. FNMA f84 = f76, f116, f84
  6457. nop __LINE__
  6458. }
  6459. ;;
  6460. { .mfi
  6461. nop __LINE__
  6462. FMA_A f81 = f77, f112, f81
  6463. nop __LINE__
  6464. }
  6465. { .mfi
  6466. nop __LINE__
  6467. FMA_A f85 = f77, f116, f85
  6468. nop __LINE__
  6469. }
  6470. ;;
  6471. { .mfi
  6472. nop __LINE__
  6473. FNMA f82 = f76, f114, f82
  6474. nop __LINE__
  6475. }
  6476. { .mfi
  6477. nop __LINE__
  6478. FNMA f86 = f76, f118, f86
  6479. nop __LINE__
  6480. }
  6481. ;;
  6482. { .mfi
  6483. nop __LINE__
  6484. FMA_A f83 = f77, f114, f83
  6485. nop __LINE__
  6486. }
  6487. { .mfi
  6488. nop __LINE__
  6489. FMA_A f87 = f77, f118, f87
  6490. nop __LINE__
  6491. }
  6492. ;;
  6493. { .mfi
  6494. nop __LINE__
  6495. FMA_B f80 = f77, f113, f80
  6496. nop __LINE__
  6497. }
  6498. { .mfi
  6499. nop __LINE__
  6500. FMA_B f84 = f77, f117, f84
  6501. nop __LINE__
  6502. }
  6503. ;;
  6504. { .mfi
  6505. nop __LINE__
  6506. FNMA f81 = f76, f113, f81
  6507. nop __LINE__
  6508. }
  6509. { .mfi
  6510. nop __LINE__
  6511. FNMA f85 = f76, f117, f85
  6512. nop __LINE__
  6513. }
  6514. ;;
  6515. { .mfi
  6516. nop __LINE__
  6517. FMA_B f82 = f77, f115, f82
  6518. nop __LINE__
  6519. }
  6520. { .mfi
  6521. nop __LINE__
  6522. FMA_B f86 = f77, f119, f86
  6523. nop __LINE__
  6524. }
  6525. ;;
  6526. { .mfi
  6527. nop __LINE__
  6528. FNMA f83 = f76, f115, f83
  6529. nop __LINE__
  6530. }
  6531. { .mfi
  6532. nop __LINE__
  6533. FNMA f87 = f76, f119, f87
  6534. nop __LINE__
  6535. }
  6536. ;;
  6537. { .mfi
  6538. nop __LINE__
  6539. FNMA f64 = f78, f112, f64
  6540. nop __LINE__
  6541. }
  6542. { .mfi
  6543. nop __LINE__
  6544. FNMA f68 = f78, f116, f68
  6545. nop __LINE__
  6546. }
  6547. ;;
  6548. { .mfi
  6549. nop __LINE__
  6550. FMA_A f65 = f79, f112, f65
  6551. nop __LINE__
  6552. }
  6553. { .mfi
  6554. nop __LINE__
  6555. FMA_A f69 = f79, f116, f69
  6556. nop __LINE__
  6557. }
  6558. ;;
  6559. { .mfi
  6560. nop __LINE__
  6561. FNMA f66 = f78, f114, f66
  6562. nop __LINE__
  6563. }
  6564. { .mfi
  6565. nop __LINE__
  6566. FNMA f70 = f78, f118, f70
  6567. nop __LINE__
  6568. }
  6569. ;;
  6570. { .mfi
  6571. nop __LINE__
  6572. FMA_A f67 = f79, f114, f67
  6573. nop __LINE__
  6574. }
  6575. { .mfi
  6576. nop __LINE__
  6577. FMA_A f71 = f79, f118, f71
  6578. nop __LINE__
  6579. }
  6580. ;;
  6581. { .mfi
  6582. nop __LINE__
  6583. FMA_B f64 = f79, f113, f64
  6584. nop __LINE__
  6585. }
  6586. { .mfi
  6587. nop __LINE__
  6588. FMA_B f68 = f79, f117, f68
  6589. nop __LINE__
  6590. }
  6591. ;;
  6592. { .mfi
  6593. nop __LINE__
  6594. FNMA f65 = f78, f113, f65
  6595. nop __LINE__
  6596. }
  6597. { .mfi
  6598. nop __LINE__
  6599. FNMA f69 = f78, f117, f69
  6600. nop __LINE__
  6601. }
  6602. ;;
  6603. { .mfi
  6604. nop __LINE__
  6605. FMA_B f66 = f79, f115, f66
  6606. nop __LINE__
  6607. }
  6608. { .mfi
  6609. nop __LINE__
  6610. FMA_B f70 = f79, f119, f70
  6611. nop __LINE__
  6612. }
  6613. ;;
  6614. { .mfi
  6615. nop __LINE__
  6616. FNMA f67 = f78, f115, f67
  6617. nop __LINE__
  6618. }
  6619. { .mfi
  6620. nop __LINE__
  6621. FNMA f71 = f78, f119, f71
  6622. nop __LINE__
  6623. }
  6624. ;;
  6625. { .mfi
  6626. nop __LINE__
  6627. FMPY f32 = f88, f96
  6628. nop __LINE__
  6629. }
  6630. { .mfi
  6631. nop __LINE__
  6632. FMPY f36 = f88, f100
  6633. nop __LINE__
  6634. }
  6635. ;;
  6636. { .mfi
  6637. nop __LINE__
  6638. FMPY f33 = f89, f96
  6639. nop __LINE__
  6640. }
  6641. { .mfi
  6642. nop __LINE__
  6643. FMPY f37 = f89, f100
  6644. nop __LINE__
  6645. }
  6646. ;;
  6647. { .mfi
  6648. nop __LINE__
  6649. FMPY f34 = f88, f98
  6650. nop __LINE__
  6651. }
  6652. { .mfi
  6653. nop __LINE__
  6654. FMPY f38 = f88, f102
  6655. nop __LINE__
  6656. }
  6657. ;;
  6658. { .mfi
  6659. nop __LINE__
  6660. FMPY f35 = f89, f98
  6661. nop __LINE__
  6662. }
  6663. { .mfi
  6664. nop __LINE__
  6665. FMPY f39 = f89, f102
  6666. nop __LINE__
  6667. }
  6668. ;;
  6669. { .mfi
  6670. nop __LINE__
  6671. FMA_C f96 = f89, f97, f32
  6672. nop __LINE__
  6673. }
  6674. { .mfi
  6675. nop __LINE__
  6676. FMA_C f100 = f89, f101, f36
  6677. nop __LINE__
  6678. }
  6679. ;;
  6680. { .mfi
  6681. nop __LINE__
  6682. FMA_D f97 = f88, f97, f33
  6683. nop __LINE__
  6684. }
  6685. { .mfi
  6686. nop __LINE__
  6687. FMA_D f101 = f88, f101, f37
  6688. nop __LINE__
  6689. }
  6690. ;;
  6691. { .mfi
  6692. nop __LINE__
  6693. FMA_C f98 = f89, f99, f34
  6694. nop __LINE__
  6695. }
  6696. { .mfi
  6697. nop __LINE__
  6698. FMA_C f102 = f89, f103, f38
  6699. nop __LINE__
  6700. }
  6701. ;;
  6702. { .mfi
  6703. nop __LINE__
  6704. FMA_D f99 = f88, f99, f35
  6705. nop __LINE__
  6706. }
  6707. { .mfi
  6708. nop __LINE__
  6709. FMA_D f103 = f88, f103, f39
  6710. nop __LINE__
  6711. }
  6712. ;;
  6713. { .mfi
  6714. STFD [AOFFSET] = f96, SIZE
  6715. FNMA f80 = f90, f96, f80
  6716. nop __LINE__
  6717. }
  6718. { .mfi
  6719. STFD [AOFFSET2] = f100, SIZE
  6720. FNMA f84 = f90, f100, f84
  6721. nop __LINE__
  6722. }
  6723. ;;
  6724. { .mfi
  6725. STFD [AOFFSET] = f97, SIZE
  6726. FMA_A f81 = f91, f96, f81
  6727. nop __LINE__
  6728. }
  6729. { .mfi
  6730. STFD [AOFFSET2] = f101, SIZE
  6731. FMA_A f85 = f91, f100, f85
  6732. nop __LINE__
  6733. }
  6734. ;;
  6735. { .mfi
  6736. STFD [AOFFSET] = f98, SIZE
  6737. FNMA f82 = f90, f98, f82
  6738. nop __LINE__
  6739. }
  6740. { .mfi
  6741. STFD [AOFFSET2] = f102, SIZE
  6742. FNMA f86 = f90, f102, f86
  6743. nop __LINE__
  6744. }
  6745. ;;
  6746. { .mfi
  6747. STFD [AOFFSET] = f99, -11 * SIZE
  6748. FMA_A f83 = f91, f98, f83
  6749. nop __LINE__
  6750. }
  6751. { .mfi
  6752. STFD [AOFFSET2] = f103, -11 * SIZE
  6753. FMA_A f87 = f91, f102, f87
  6754. nop __LINE__
  6755. }
  6756. ;;
  6757. { .mfi
  6758. STFD [C3 ] = f96, SIZE
  6759. FMA_B f80 = f91, f97, f80
  6760. nop __LINE__
  6761. }
  6762. { .mfi
  6763. STFD [C7 ] = f100, SIZE
  6764. FMA_B f84 = f91, f101, f84
  6765. nop __LINE__
  6766. }
  6767. ;;
  6768. { .mfi
  6769. STFD [C3 ] = f97, SIZE
  6770. FNMA f81 = f90, f97, f81
  6771. nop __LINE__
  6772. }
  6773. { .mfi
  6774. STFD [C7 ] = f101, SIZE
  6775. FNMA f85 = f90, f101, f85
  6776. nop __LINE__
  6777. }
  6778. ;;
  6779. { .mfi
  6780. STFD [C3 ] = f98, SIZE
  6781. FMA_B f82 = f91, f99, f82
  6782. nop __LINE__
  6783. }
  6784. { .mfi
  6785. STFD [C7 ] = f102, SIZE
  6786. FMA_B f86 = f91, f103, f86
  6787. nop __LINE__
  6788. }
  6789. ;;
  6790. { .mfi
  6791. STFD [C3 ] = f99, 5 * SIZE
  6792. FNMA f83 = f90, f99, f83
  6793. nop __LINE__
  6794. }
  6795. { .mfi
  6796. STFD [C7 ] = f103, 5 * SIZE
  6797. FNMA f87 = f90, f103, f87
  6798. nop __LINE__
  6799. }
  6800. ;;
  6801. { .mfi
  6802. nop __LINE__
  6803. FNMA f64 = f92, f96, f64
  6804. nop __LINE__
  6805. }
  6806. { .mfi
  6807. nop __LINE__
  6808. FNMA f68 = f92, f100, f68
  6809. nop __LINE__
  6810. }
  6811. ;;
  6812. { .mfi
  6813. nop __LINE__
  6814. FMA_A f65 = f93, f96, f65
  6815. nop __LINE__
  6816. }
  6817. { .mfi
  6818. nop __LINE__
  6819. FMA_A f69 = f93, f100, f69
  6820. nop __LINE__
  6821. }
  6822. ;;
  6823. { .mfi
  6824. nop __LINE__
  6825. FNMA f66 = f92, f98, f66
  6826. nop __LINE__
  6827. }
  6828. { .mfi
  6829. nop __LINE__
  6830. FNMA f70 = f92, f102, f70
  6831. nop __LINE__
  6832. }
  6833. ;;
  6834. { .mfi
  6835. nop __LINE__
  6836. FMA_A f67 = f93, f98, f67
  6837. nop __LINE__
  6838. }
  6839. { .mfi
  6840. nop __LINE__
  6841. FMA_A f71 = f93, f102, f71
  6842. nop __LINE__
  6843. }
  6844. ;;
  6845. { .mfi
  6846. nop __LINE__
  6847. FMA_B f64 = f93, f97, f64
  6848. nop __LINE__
  6849. }
  6850. { .mfi
  6851. nop __LINE__
  6852. FMA_B f68 = f93, f101, f68
  6853. nop __LINE__
  6854. }
  6855. ;;
  6856. { .mfi
  6857. nop __LINE__
  6858. FNMA f65 = f92, f97, f65
  6859. nop __LINE__
  6860. }
  6861. { .mfi
  6862. nop __LINE__
  6863. FNMA f69 = f92, f101, f69
  6864. nop __LINE__
  6865. }
  6866. ;;
  6867. { .mfi
  6868. nop __LINE__
  6869. FMA_B f66 = f93, f99, f66
  6870. nop __LINE__
  6871. }
  6872. { .mfi
  6873. nop __LINE__
  6874. FMA_B f70 = f93, f103, f70
  6875. nop __LINE__
  6876. }
  6877. ;;
  6878. { .mfi
  6879. nop __LINE__
  6880. FNMA f67 = f92, f99, f67
  6881. nop __LINE__
  6882. }
  6883. { .mfi
  6884. nop __LINE__
  6885. FNMA f71 = f92, f103, f71
  6886. nop __LINE__
  6887. }
  6888. ;;
  6889. { .mfi
  6890. nop __LINE__
  6891. FMPY f32 = f104, f80
  6892. nop __LINE__
  6893. }
  6894. { .mfi
  6895. nop __LINE__
  6896. FMPY f36 = f104, f84
  6897. nop __LINE__
  6898. }
  6899. ;;
  6900. { .mfi
  6901. nop __LINE__
  6902. FMPY f33 = f105, f80
  6903. nop __LINE__
  6904. }
  6905. { .mfi
  6906. nop __LINE__
  6907. FMPY f37 = f105, f84
  6908. nop __LINE__
  6909. }
  6910. ;;
  6911. { .mfi
  6912. nop __LINE__
  6913. FMPY f34 = f104, f82
  6914. nop __LINE__
  6915. }
  6916. { .mfi
  6917. nop __LINE__
  6918. FMPY f38 = f104, f86
  6919. nop __LINE__
  6920. }
  6921. ;;
  6922. { .mfi
  6923. nop __LINE__
  6924. FMPY f35 = f105, f82
  6925. nop __LINE__
  6926. }
  6927. { .mfi
  6928. nop __LINE__
  6929. FMPY f39 = f105, f86
  6930. nop __LINE__
  6931. }
  6932. ;;
  6933. { .mfi
  6934. nop __LINE__
  6935. FMA_C f80 = f105, f81, f32
  6936. nop __LINE__
  6937. }
  6938. { .mfi
  6939. nop __LINE__
  6940. FMA_C f84 = f105, f85, f36
  6941. nop __LINE__
  6942. }
  6943. ;;
  6944. { .mfi
  6945. nop __LINE__
  6946. FMA_D f81 = f104, f81, f33
  6947. nop __LINE__
  6948. }
  6949. { .mfi
  6950. nop __LINE__
  6951. FMA_D f85 = f104, f85, f37
  6952. nop __LINE__
  6953. }
  6954. ;;
  6955. { .mfi
  6956. nop __LINE__
  6957. FMA_C f82 = f105, f83, f34
  6958. nop __LINE__
  6959. }
  6960. { .mfi
  6961. nop __LINE__
  6962. FMA_C f86 = f105, f87, f38
  6963. nop __LINE__
  6964. }
  6965. ;;
  6966. { .mfi
  6967. nop __LINE__
  6968. FMA_D f83 = f104, f83, f35
  6969. nop __LINE__
  6970. }
  6971. { .mfi
  6972. nop __LINE__
  6973. FMA_D f87 = f104, f87, f39
  6974. nop __LINE__
  6975. }
  6976. ;;
  6977. { .mfi
  6978. STFD [AOFFSET] = f80, SIZE
  6979. FNMA f64 = f106, f80, f64
  6980. nop __LINE__
  6981. }
  6982. { .mfi
  6983. STFD [AOFFSET2] = f84, SIZE
  6984. FNMA f68 = f106, f84, f68
  6985. nop __LINE__
  6986. }
  6987. ;;
  6988. { .mfi
  6989. STFD [AOFFSET] = f81, SIZE
  6990. FMA_A f65 = f107, f80, f65
  6991. nop __LINE__
  6992. }
  6993. { .mfi
  6994. STFD [AOFFSET2] = f85, SIZE
  6995. FMA_A f69 = f107, f84, f69
  6996. nop __LINE__
  6997. }
  6998. ;;
  6999. { .mfi
  7000. STFD [AOFFSET] = f82, SIZE
  7001. FNMA f66 = f106, f82, f66
  7002. nop __LINE__
  7003. }
  7004. { .mfi
  7005. STFD [AOFFSET2] = f86, SIZE
  7006. FNMA f70 = f106, f86, f70
  7007. nop __LINE__
  7008. }
  7009. ;;
  7010. { .mfi
  7011. STFD [AOFFSET] = f83, -11 * SIZE
  7012. FMA_A f67 = f107, f82, f67
  7013. nop __LINE__
  7014. }
  7015. { .mfi
  7016. STFD [AOFFSET2] = f87, -11 * SIZE
  7017. FMA_A f71 = f107, f86, f71
  7018. nop __LINE__
  7019. }
  7020. ;;
  7021. { .mfi
  7022. STFD [C2 ] = f80, SIZE
  7023. FMA_B f64 = f107, f81, f64
  7024. nop __LINE__
  7025. }
  7026. { .mfi
  7027. STFD [C6 ] = f84, SIZE
  7028. FMA_B f68 = f107, f85, f68
  7029. nop __LINE__
  7030. }
  7031. ;;
  7032. { .mfi
  7033. STFD [C2 ] = f81, SIZE
  7034. FNMA f65 = f106, f81, f65
  7035. nop __LINE__
  7036. }
  7037. { .mfi
  7038. STFD [C6 ] = f85, SIZE
  7039. FNMA f69 = f106, f85, f69
  7040. nop __LINE__
  7041. }
  7042. ;;
  7043. { .mfi
  7044. STFD [C2 ] = f82, SIZE
  7045. FMA_B f66 = f107, f83, f66
  7046. nop __LINE__
  7047. }
  7048. { .mfi
  7049. STFD [C6 ] = f86, SIZE
  7050. FMA_B f70 = f107, f87, f70
  7051. nop __LINE__
  7052. }
  7053. ;;
  7054. { .mfi
  7055. STFD [C2 ] = f83, 5 * SIZE
  7056. FNMA f67 = f106, f83, f67
  7057. nop __LINE__
  7058. }
  7059. { .mfi
  7060. STFD [C6 ] = f87, 5 * SIZE
  7061. FNMA f71 = f106, f87, f71
  7062. nop __LINE__
  7063. }
  7064. ;;
  7065. { .mfi
  7066. nop __LINE__
  7067. FMPY f32 = f120, f64
  7068. nop __LINE__
  7069. }
  7070. { .mfi
  7071. nop __LINE__
  7072. FMPY f36 = f120, f68
  7073. nop __LINE__
  7074. }
  7075. ;;
  7076. { .mfi
  7077. nop __LINE__
  7078. FMPY f33 = f121, f64
  7079. nop __LINE__
  7080. }
  7081. { .mfi
  7082. nop __LINE__
  7083. FMPY f37 = f121, f68
  7084. nop __LINE__
  7085. }
  7086. ;;
  7087. { .mfi
  7088. nop __LINE__
  7089. FMPY f34 = f120, f66
  7090. nop __LINE__
  7091. }
  7092. { .mfi
  7093. nop __LINE__
  7094. FMPY f38 = f120, f70
  7095. nop __LINE__
  7096. }
  7097. ;;
  7098. { .mfi
  7099. nop __LINE__
  7100. FMPY f35 = f121, f66
  7101. nop __LINE__
  7102. }
  7103. { .mfi
  7104. nop __LINE__
  7105. FMPY f39 = f121, f70
  7106. nop __LINE__
  7107. }
  7108. ;;
  7109. { .mfi
  7110. nop __LINE__
  7111. FMA_C f64 = f121, f65, f32
  7112. nop __LINE__
  7113. }
  7114. { .mfi
  7115. nop __LINE__
  7116. FMA_C f68 = f121, f69, f36
  7117. nop __LINE__
  7118. }
  7119. ;;
  7120. { .mfi
  7121. nop __LINE__
  7122. FMA_D f65 = f120, f65, f33
  7123. nop __LINE__
  7124. }
  7125. { .mfi
  7126. nop __LINE__
  7127. FMA_D f69 = f120, f69, f37
  7128. nop __LINE__
  7129. }
  7130. ;;
  7131. { .mfi
  7132. nop __LINE__
  7133. FMA_C f66 = f121, f67, f34
  7134. nop __LINE__
  7135. }
  7136. { .mfi
  7137. nop __LINE__
  7138. FMA_C f70 = f121, f71, f38
  7139. nop __LINE__
  7140. }
  7141. ;;
  7142. { .mfi
  7143. nop __LINE__
  7144. FMA_D f67 = f120, f67, f35
  7145. nop __LINE__
  7146. }
  7147. { .mfi
  7148. nop __LINE__
  7149. FMA_D f71 = f120, f71, f39
  7150. nop __LINE__
  7151. }
  7152. ;;
  7153. { .mmi
  7154. STFD [AOFFSET] = f64, SIZE
  7155. STFD [AOFFSET2] = f68, SIZE
  7156. shladd r2 = K, ZBASE_SHIFT, r0
  7157. }
  7158. ;;
  7159. { .mmi
  7160. STFD [AOFFSET] = f65, SIZE
  7161. STFD [AOFFSET2] = f69, SIZE
  7162. shladd AORIG = r2, 2, AORIG
  7163. }
  7164. ;;
  7165. { .mmi
  7166. STFD [AOFFSET] = f66, SIZE
  7167. STFD [AOFFSET2] = f70, SIZE
  7168. nop __LINE__
  7169. }
  7170. ;;
  7171. { .mmi
  7172. STFD [AOFFSET] = f67, -3 * SIZE
  7173. STFD [AOFFSET2] = f71
  7174. nop __LINE__
  7175. }
  7176. ;;
  7177. { .mfi
  7178. STFD [C1 ] = f64, SIZE
  7179. mov f64 = f0
  7180. cmp.ne p6, p0 = 1, I
  7181. }
  7182. { .mfi
  7183. STFD [C5 ] = f68, SIZE
  7184. mov f81 = f0
  7185. nop __LINE__
  7186. }
  7187. ;;
  7188. { .mfi
  7189. STFD [C1 ] = f65, SIZE
  7190. mov f65 = f0
  7191. nop __LINE__
  7192. }
  7193. { .mfi
  7194. STFD [C5 ] = f69, SIZE
  7195. mov f96 = f0
  7196. nop __LINE__
  7197. }
  7198. ;;
  7199. { .mfi
  7200. STFD [C1 ] = f66, SIZE
  7201. mov f80 = f0
  7202. sub L = K, KK
  7203. }
  7204. { .mfi
  7205. STFD [C5 ] = f70, SIZE
  7206. mov f97 = f0
  7207. nop __LINE__
  7208. }
  7209. ;;
  7210. { .mfi
  7211. STFD [C1 ] = f67, 5 * SIZE
  7212. mov f112 = f0
  7213. adds I = -1, I
  7214. }
  7215. { .mfb
  7216. STFD [C5 ] = f71, 5 * SIZE
  7217. mov f113 = f0
  7218. (p6) br.cond.dptk .L011
  7219. }
  7220. ;;
  7221. #endif
  7222. .L049:
  7223. #ifdef LN
  7224. shladd KK8 = K, ZBASE_SHIFT, r0
  7225. ;;
  7226. shladd B = KK8, 2, B
  7227. #endif
  7228. #if defined(LT) || defined(RN)
  7229. mov B = BOFFSET
  7230. #endif
  7231. #ifdef RN
  7232. adds KK = 4, KK
  7233. #endif
  7234. #ifdef RT
  7235. adds KK = -4, KK
  7236. #endif
  7237. ;;
  7238. { .mmb
  7239. mov AOFFSET = A
  7240. cmp.lt p6, p0 = 0, J
  7241. (p6) br.cond.dptk .L010
  7242. }
  7243. ;;
  7244. .align 16
  7245. .L050:
  7246. { .mib
  7247. tbit.z p6, p0 = N, 1
  7248. (p6) br.cond.dpnt .L090
  7249. }
  7250. ;;
  7251. #ifdef RT
  7252. { .mmi
  7253. shladd r3 = LDC, 1, r0
  7254. nop __LINE__
  7255. shl r2 = K, 1 + ZBASE_SHIFT
  7256. }
  7257. ;;
  7258. { .mmi
  7259. sub B = B, r2
  7260. sub C = C, r3
  7261. nop __LINE__
  7262. }
  7263. ;;
  7264. #endif
  7265. mov C1 = C
  7266. add C2 = LDC, C
  7267. ;;
  7268. #ifdef LN
  7269. add KK = M, OFFSET
  7270. #elif defined LT
  7271. mov KK = OFFSET
  7272. #else
  7273. nop __LINE__
  7274. #endif
  7275. ;;
  7276. #if defined(LN) || defined(RT)
  7277. mov AORIG = A
  7278. #else
  7279. mov AOFFSET = A
  7280. #endif
  7281. ;;
  7282. #if defined(LT) || defined(RN)
  7283. mov L = KK
  7284. #else
  7285. sub L = K, KK
  7286. #endif
  7287. ;;
  7288. { .mib
  7289. #ifndef RT
  7290. shladd C = LDC, 1, C
  7291. #else
  7292. nop __LINE__
  7293. #endif
  7294. }
  7295. ;;
  7296. .L070:
  7297. { .mib
  7298. #if defined(LT) || defined(RN)
  7299. mov L = KK
  7300. #else
  7301. sub L = K, KK
  7302. #endif
  7303. tbit.z p6, p7 = M, 0
  7304. (p6) br.cond.dptk .L060
  7305. }
  7306. ;;
  7307. { .mmi
  7308. cmp.ne p7, p0 = r0, L
  7309. adds BOFFSET = 0 * SIZE, B
  7310. shl r2 = K, ZBASE_SHIFT
  7311. }
  7312. { .mmi
  7313. shladd r3 = KK, ZBASE_SHIFT, r0
  7314. nop __LINE__
  7315. nop __LINE__
  7316. }
  7317. ;;
  7318. #if defined(LT) || defined(RN)
  7319. { .mfb
  7320. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  7321. }
  7322. ;;
  7323. #else
  7324. { .mfi
  7325. shladd BOFFSET = r3, 1, B
  7326. #ifdef LN
  7327. sub AORIG = AORIG, r2
  7328. #else
  7329. nop __LINE__
  7330. #endif
  7331. }
  7332. ;;
  7333. { .mfi
  7334. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  7335. add AOFFSET = r3, AORIG
  7336. }
  7337. ;;
  7338. #endif
  7339. ;;
  7340. adds L = 1, L
  7341. ;;
  7342. { .mii
  7343. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  7344. tbit.z p12, p0 = L, 0
  7345. shr L = L, 1
  7346. }
  7347. ;;
  7348. { .mmi
  7349. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  7350. adds PREA = (PREFETCHSIZE + 0) * SIZE, AOFFSET
  7351. adds L = -1, L
  7352. }
  7353. ;;
  7354. { .mmi
  7355. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  7356. cmp.eq p3, p0 = r0, r0
  7357. mov ar.lc = L
  7358. }
  7359. ;;
  7360. cmp.eq p6, p0 = -1, L
  7361. (p6) br.cond.dpnt .L078
  7362. ;;
  7363. .align 16
  7364. .L072:
  7365. { .mfb
  7366. lfetch.nt1 [PREA], 4 * SIZE
  7367. FMA f64 = f32, f48, f64 // A1 * B1
  7368. nop __LINE__
  7369. }
  7370. { .mfi
  7371. nop __LINE__
  7372. FMA f96 = f32, f49, f96 // A1 * B2
  7373. (p12) cmp.ne p3, p0 = 0, L
  7374. }
  7375. ;;
  7376. { .mfi
  7377. lfetch.nt1 [PREB], 8 * SIZE
  7378. FMA f80 = f32, f50, f80 // A1 * B3
  7379. cmp.ne p4, p5 = 0, L
  7380. }
  7381. { .mfb
  7382. nop __LINE__
  7383. FMA f112 = f32, f51, f112 // A1 * B4
  7384. nop __LINE__
  7385. }
  7386. ;;
  7387. { .mfi
  7388. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  7389. FMA f65 = f33, f48, f65 // A2 * B1
  7390. }
  7391. { .mfi
  7392. nop __LINE__
  7393. FMA f97 = f33, f49, f97 // A2 * B2
  7394. }
  7395. ;;
  7396. { .mfi
  7397. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  7398. FMA f81 = f33, f50, f81 // A2 * B3
  7399. }
  7400. { .mmf
  7401. nop __LINE__
  7402. nop __LINE__
  7403. FMA f113 = f33, f51, f113 // A2 * B4
  7404. }
  7405. ;;
  7406. { .mfb
  7407. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  7408. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  7409. nop __LINE__
  7410. }
  7411. { .mmf
  7412. nop __LINE__
  7413. nop __LINE__
  7414. (p3) FMA f96 = f40, f57, f96 // A1 * B2
  7415. }
  7416. ;;
  7417. { .mfb
  7418. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  7419. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  7420. nop __LINE__
  7421. }
  7422. { .mmf
  7423. nop __LINE__
  7424. nop __LINE__
  7425. (p3) FMA f112 = f40, f59, f112 // A1 * B4
  7426. }
  7427. ;;
  7428. { .mfb
  7429. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  7430. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  7431. nop __LINE__
  7432. }
  7433. { .mfb
  7434. nop __LINE__
  7435. (p3) FMA f97 = f41, f57, f97 // A2 * B2
  7436. nop __LINE__
  7437. }
  7438. ;;
  7439. { .mfi
  7440. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  7441. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  7442. adds L = -1, L
  7443. }
  7444. { .mfb
  7445. nop __LINE__
  7446. (p3) FMA f113 = f41, f59, f113 // A2 * B4
  7447. br.cloop.sptk.few .L072
  7448. }
  7449. ;;
  7450. { .mfb
  7451. nop __LINE__
  7452. FCALC_A f64 = f64, f97
  7453. nop __LINE__
  7454. }
  7455. { .mfb
  7456. nop __LINE__
  7457. FCALC_A f80 = f80, f113
  7458. nop __LINE__
  7459. }
  7460. { .mfb
  7461. nop __LINE__
  7462. FCALC_B f65 = f65, f96
  7463. nop __LINE__
  7464. }
  7465. { .mfb
  7466. nop __LINE__
  7467. FCALC_B f81 = f81, f112
  7468. nop __LINE__
  7469. }
  7470. ;;
  7471. .L078:
  7472. #if defined(LN) || defined(RT)
  7473. #ifdef LN
  7474. adds r2 = -1, KK
  7475. #else
  7476. adds r2 = -2, KK
  7477. #endif
  7478. ;;
  7479. shladd r2 = r2, ZBASE_SHIFT, r0
  7480. ;;
  7481. add AOFFSET = r2, AORIG
  7482. shladd BOFFSET = r2, 1, B
  7483. ;;
  7484. #endif
  7485. #if defined(LN) || defined(LT)
  7486. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  7487. ;;
  7488. LDFPD f74, f75 = [BOFFSET]
  7489. adds BOFFSET = -2 * SIZE, BOFFSET
  7490. ;;
  7491. FSUB f64 = f72, f64
  7492. FSUB_A f65 = f73, f65
  7493. FSUB f80 = f74, f80
  7494. FSUB_A f81 = f75, f81
  7495. ;;
  7496. #else
  7497. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  7498. ;;
  7499. LDFPD f88, f89 = [AOFFSET]
  7500. adds AOFFSET = -2 * SIZE, AOFFSET
  7501. ;;
  7502. FSUB f64 = f72, f64
  7503. FSUB f65 = f73, f65
  7504. FSUB f80 = f88, f80
  7505. FSUB f81 = f89, f81
  7506. ;;
  7507. #endif
  7508. #ifdef LN
  7509. LDFPD f120, f121 = [AOFFSET]
  7510. ;;
  7511. FMPY f32 = f120, f64
  7512. FMPY f33 = f121, f64
  7513. FMPY f34 = f120, f80
  7514. FMPY f35 = f121, f80
  7515. ;;
  7516. FMA_C f64 = f121, f65, f32
  7517. FMA_D f65 = f120, f65, f33
  7518. FMA_C f80 = f121, f81, f34
  7519. FMA_D f81 = f120, f81, f35
  7520. ;;
  7521. #endif
  7522. #ifdef LT
  7523. LDFPD f72, f73 = [AOFFSET]
  7524. ;;
  7525. FMPY f32 = f72, f64
  7526. FMPY f33 = f73, f64
  7527. FMPY f34 = f72, f80
  7528. FMPY f35 = f73, f80
  7529. ;;
  7530. FMA_C f64 = f73, f65, f32
  7531. FMA_D f65 = f72, f65, f33
  7532. FMA_C f80 = f73, f81, f34
  7533. FMA_D f81 = f72, f81, f35
  7534. ;;
  7535. #endif
  7536. #ifdef RN
  7537. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  7538. ;;
  7539. LDFPD f74, f75 = [BOFFSET]
  7540. adds BOFFSET = 4 * SIZE, BOFFSET
  7541. ;;
  7542. LDFPD f90, f91 = [BOFFSET]
  7543. adds BOFFSET = - 6 * SIZE, BOFFSET
  7544. ;;
  7545. FMPY f32 = f72, f64
  7546. FMPY f33 = f73, f64
  7547. ;;
  7548. FMA_C f64 = f73, f65, f32
  7549. FMA_D f65 = f72, f65, f33
  7550. ;;
  7551. FNMA f80 = f74, f64, f80
  7552. FMA_A f81 = f75, f64, f81
  7553. ;;
  7554. FMA_B f80 = f75, f65, f80
  7555. FNMA f81 = f74, f65, f81
  7556. ;;
  7557. FMPY f32 = f90, f80
  7558. FMPY f33 = f91, f80
  7559. ;;
  7560. FMA_C f80 = f91, f81, f32
  7561. FMA_D f81 = f90, f81, f33
  7562. ;;
  7563. #endif
  7564. #ifdef RT
  7565. adds BOFFSET = 6 * SIZE, BOFFSET
  7566. ;;
  7567. LDFPD f104, f105 = [BOFFSET]
  7568. adds BOFFSET = - 2 * SIZE, BOFFSET
  7569. ;;
  7570. LDFPD f106, f107 = [BOFFSET]
  7571. adds BOFFSET = - 4 * SIZE, BOFFSET
  7572. ;;
  7573. LDFPD f120, f121 = [BOFFSET]
  7574. ;;
  7575. FMPY f32 = f104, f80
  7576. FMPY f33 = f105, f80
  7577. ;;
  7578. FMA_C f80 = f105, f81, f32
  7579. FMA_D f81 = f104, f81, f33
  7580. ;;
  7581. FNMA f64 = f106, f80, f64
  7582. FMA_A f65 = f107, f80, f65
  7583. ;;
  7584. FMA_B f64 = f107, f81, f64
  7585. FNMA f65 = f106, f81, f65
  7586. ;;
  7587. FMPY f32 = f120, f64
  7588. FMPY f33 = f121, f64
  7589. ;;
  7590. FMA_C f64 = f121, f65, f32
  7591. FMA_D f65 = f120, f65, f33
  7592. ;;
  7593. #endif
  7594. #if defined(LN) || defined(LT)
  7595. STFD [BOFFSET] = f64, SIZE
  7596. ;;
  7597. STFD [BOFFSET] = f65, SIZE
  7598. ;;
  7599. STFD [BOFFSET] = f80, SIZE
  7600. ;;
  7601. STFD [BOFFSET] = f81, SIZE
  7602. ;;
  7603. adds BOFFSET = - 4 * SIZE, BOFFSET
  7604. ;;
  7605. #else
  7606. STFD [AOFFSET] = f64, SIZE
  7607. ;;
  7608. STFD [AOFFSET] = f65, SIZE
  7609. ;;
  7610. STFD [AOFFSET] = f80, SIZE
  7611. ;;
  7612. STFD [AOFFSET] = f81, SIZE
  7613. ;;
  7614. adds AOFFSET = - 4 * SIZE, AOFFSET
  7615. ;;
  7616. #endif
  7617. #ifdef LN
  7618. adds C1 = -2 * SIZE, C1
  7619. adds C2 = -2 * SIZE, C2
  7620. #endif
  7621. ;;
  7622. STFD [C1 ] = f64, SIZE
  7623. ;;
  7624. STFD [C1 ] = f65, SIZE
  7625. ;;
  7626. STFD [C2 ] = f80, SIZE
  7627. ;;
  7628. STFD [C2 ] = f81, SIZE
  7629. ;;
  7630. mov f64 = f0
  7631. mov f65 = f0
  7632. mov f80 = f0
  7633. mov f81 = f0
  7634. mov f96 = f0
  7635. mov f97 = f0
  7636. mov f112 = f0
  7637. mov f113 = f0
  7638. ;;
  7639. #ifdef LN
  7640. adds C1 = -2 * SIZE, C1
  7641. adds C2 = -2 * SIZE, C2
  7642. #endif
  7643. ;;
  7644. cmp.ne p6, p0 = 1, I
  7645. ;;
  7646. adds I = -1, I
  7647. ;;
  7648. shladd r2 = K, ZBASE_SHIFT, r0
  7649. ;;
  7650. sub L = K, KK
  7651. ;;
  7652. #ifdef RT
  7653. add AORIG = r2, AORIG
  7654. #endif
  7655. ;;
  7656. #if defined(LT) || defined(RN)
  7657. shladd L = L, ZBASE_SHIFT, r0
  7658. ;;
  7659. add AOFFSET = L, AOFFSET
  7660. shladd BOFFSET = L, 1, BOFFSET
  7661. #endif
  7662. ;;
  7663. #ifdef LT
  7664. adds KK = 1, KK
  7665. #elif defined LN
  7666. adds KK = -1, KK
  7667. #else
  7668. nop __LINE__
  7669. #endif
  7670. ;;
  7671. #if defined(LT) || defined(RN)
  7672. mov L = KK
  7673. #else
  7674. sub L = K, KK
  7675. #endif
  7676. ;;
  7677. .align 16
  7678. .L060:
  7679. { .mib
  7680. #if defined(LT) || defined(RN)
  7681. mov L = KK
  7682. #else
  7683. sub L = K, KK
  7684. #endif
  7685. tbit.z p6, p7 = M, 1
  7686. (p6) br.cond.dptk .L051
  7687. }
  7688. ;;
  7689. { .mmi
  7690. cmp.ne p7, p0 = r0, L
  7691. adds BOFFSET = 0 * SIZE, B
  7692. shl r2 = K, 1 + ZBASE_SHIFT
  7693. }
  7694. { .mmi
  7695. shladd r3 = KK, ZBASE_SHIFT, r0
  7696. nop __LINE__
  7697. nop __LINE__
  7698. }
  7699. ;;
  7700. #if defined(LT) || defined(RN)
  7701. { .mfb
  7702. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  7703. }
  7704. ;;
  7705. #else
  7706. { .mfi
  7707. shladd BOFFSET = r3, 1, B
  7708. #ifdef LN
  7709. sub AORIG = AORIG, r2
  7710. #else
  7711. nop __LINE__
  7712. #endif
  7713. }
  7714. ;;
  7715. { .mfi
  7716. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  7717. shladd AOFFSET = r3, 1, AORIG
  7718. }
  7719. ;;
  7720. #endif
  7721. ;;
  7722. adds L = 1, L
  7723. ;;
  7724. { .mmi
  7725. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  7726. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  7727. tbit.z p12, p0 = L, 0
  7728. }
  7729. { .mmi
  7730. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  7731. shr L = L, 1
  7732. }
  7733. ;;
  7734. { .mmi
  7735. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  7736. nop __LINE__
  7737. adds L = -1, L
  7738. }
  7739. ;;
  7740. { .mmi
  7741. nop __LINE__
  7742. nop __LINE__
  7743. mov ar.lc = L
  7744. }
  7745. ;;
  7746. cmp.eq p6, p0 = -1, L
  7747. (p6) br.cond.dpnt .L068
  7748. ;;
  7749. .align 16
  7750. .L062:
  7751. { .mfi
  7752. lfetch.nt1 [PREA], 8 * SIZE
  7753. FMA f64 = f32, f48, f64 // A1 * B1
  7754. cmp.ne p4, p5 = 0, L
  7755. }
  7756. { .mfi
  7757. nop __LINE__
  7758. FMA_B f65 = f32, f49, f65 // A1 * B2
  7759. (p12) cmp.ne p3, p0 = 0, L
  7760. }
  7761. ;;
  7762. { .mfb
  7763. lfetch.nt1 [PREB], 8 * SIZE
  7764. FMA f80 = f32, f50, f80 // A1 * B3
  7765. nop __LINE__
  7766. }
  7767. { .mfb
  7768. nop __LINE__
  7769. FMA_B f81 = f32, f51, f81 // A1 * B4
  7770. nop __LINE__
  7771. }
  7772. ;;
  7773. { .mfb
  7774. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  7775. FMA f96 = f34, f48, f96 // A3 * B1
  7776. nop __LINE__
  7777. }
  7778. { .mfb
  7779. nop __LINE__
  7780. FMA_B f97 = f34, f49, f97 // A3 * B2
  7781. nop __LINE__
  7782. }
  7783. ;;
  7784. { .mfb
  7785. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  7786. FMA f112 = f34, f50, f112 // A3 * B3
  7787. nop __LINE__
  7788. }
  7789. { .mfb
  7790. nop __LINE__
  7791. FMA_B f113 = f34, f51, f113 // A3 * B4
  7792. nop __LINE__
  7793. }
  7794. ;;
  7795. { .mfb
  7796. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  7797. FMA f65 = f33, f48, f65 // A2 * B1
  7798. nop __LINE__
  7799. }
  7800. { .mfb
  7801. nop __LINE__
  7802. FMA_A f64 = f33, f49, f64 // A2 * B2
  7803. nop __LINE__
  7804. }
  7805. ;;
  7806. { .mfb
  7807. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  7808. FMA f81 = f33, f50, f81 // A2 * B3
  7809. nop __LINE__
  7810. }
  7811. { .mfb
  7812. nop __LINE__
  7813. FMA_A f80 = f33, f51, f80 // A2 * B4
  7814. nop __LINE__
  7815. }
  7816. ;;
  7817. { .mfb
  7818. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  7819. FMA f97 = f35, f48, f97 // A4 * B1
  7820. }
  7821. { .mfb
  7822. FMA_A f96 = f35, f49, f96 // A4 * B2
  7823. nop __LINE__
  7824. }
  7825. { .mfb
  7826. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  7827. FMA f113 = f35, f50, f113 // A4 * B3
  7828. nop __LINE__
  7829. }
  7830. { .mfb
  7831. FMA_A f112 = f35, f51, f112 // A4 * B4
  7832. nop __LINE__
  7833. }
  7834. ;;
  7835. { .mfb
  7836. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  7837. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  7838. nop __LINE__
  7839. }
  7840. { .mfb
  7841. (p3) FMA_B f65 = f40, f57, f65 // A1 * B2
  7842. nop __LINE__
  7843. }
  7844. ;;
  7845. { .mfb
  7846. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  7847. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  7848. nop __LINE__
  7849. }
  7850. { .mfb
  7851. (p3) FMA_B f81 = f40, f59, f81 // A1 * B4
  7852. nop __LINE__
  7853. }
  7854. ;;
  7855. { .mfb
  7856. nop __LINE__
  7857. (p3) FMA f96 = f42, f56, f96 // A3 * B1
  7858. nop __LINE__
  7859. }
  7860. { .mfb
  7861. nop __LINE__
  7862. (p3) FMA_B f97 = f42, f57, f97 // A3 * B2
  7863. nop __LINE__
  7864. }
  7865. ;;
  7866. { .mfb
  7867. nop __LINE__
  7868. (p3) FMA f112 = f42, f58, f112 // A3 * B3
  7869. nop __LINE__
  7870. }
  7871. { .mfb
  7872. nop __LINE__
  7873. (p3) FMA_B f113 = f42, f59, f113 // A3 * B4
  7874. nop __LINE__
  7875. }
  7876. ;;
  7877. { .mfb
  7878. nop __LINE__
  7879. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  7880. nop __LINE__
  7881. }
  7882. { .mfb
  7883. nop __LINE__
  7884. (p3) FMA_A f64 = f41, f57, f64 // A2 * B2
  7885. nop __LINE__
  7886. }
  7887. ;;
  7888. { .mfb
  7889. nop __LINE__
  7890. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  7891. nop __LINE__
  7892. }
  7893. { .mfb
  7894. nop __LINE__
  7895. (p3) FMA_A f80 = f41, f59, f80 // A2 * B4
  7896. nop __LINE__
  7897. }
  7898. ;;
  7899. { .mfb
  7900. nop __LINE__
  7901. (p3) FMA f97 = f43, f56, f97 // A4 * B1
  7902. nop __LINE__
  7903. }
  7904. { .mfb
  7905. nop __LINE__
  7906. (p3) FMA_A f96 = f43, f57, f96 // A4 * B2
  7907. nop __LINE__
  7908. }
  7909. ;;
  7910. { .mfi
  7911. nop __LINE__
  7912. (p3) FMA f113 = f43, f58, f113 // A4 * B3
  7913. adds L = -1, L
  7914. }
  7915. { .mfb
  7916. nop __LINE__
  7917. (p3) FMA_A f112 = f43, f59, f112 // A4 * B4
  7918. br.cloop.sptk.few .L062
  7919. }
  7920. ;;
  7921. .L068:
  7922. #if defined(LN) || defined(RT)
  7923. #ifdef LN
  7924. adds r2 = -2, KK
  7925. #else
  7926. adds r2 = -2, KK
  7927. #endif
  7928. ;;
  7929. shladd r2 = r2, ZBASE_SHIFT, r0
  7930. ;;
  7931. shladd AOFFSET = r2, 1, AORIG
  7932. shladd BOFFSET = r2, 1, B
  7933. ;;
  7934. #endif
  7935. #if defined(LN) || defined(LT)
  7936. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  7937. ;;
  7938. LDFPD f74, f75 = [BOFFSET], 2 * SIZE
  7939. ;;
  7940. LDFPD f88, f89 = [BOFFSET], 2 * SIZE
  7941. ;;
  7942. LDFPD f90, f91 = [BOFFSET]
  7943. adds BOFFSET = -6 * SIZE, BOFFSET
  7944. ;;
  7945. FSUB f64 = f72, f64
  7946. FSUB_A f65 = f73, f65
  7947. FSUB f80 = f74, f80
  7948. FSUB_A f81 = f75, f81
  7949. FSUB f96 = f88, f96
  7950. FSUB_A f97 = f89, f97
  7951. FSUB f112 = f90, f112
  7952. FSUB_A f113 = f91, f113
  7953. ;;
  7954. #else
  7955. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  7956. ;;
  7957. LDFPD f74, f75 = [AOFFSET], 2 * SIZE
  7958. ;;
  7959. LDFPD f88, f89 = [AOFFSET], 2 * SIZE
  7960. ;;
  7961. LDFPD f90, f91 = [AOFFSET]
  7962. adds AOFFSET = -6 * SIZE, AOFFSET
  7963. ;;
  7964. FSUB f64 = f72, f64
  7965. FSUB f65 = f73, f65
  7966. FSUB f96 = f74, f96
  7967. FSUB f97 = f75, f97
  7968. FSUB f80 = f88, f80
  7969. FSUB f81 = f89, f81
  7970. FSUB f112 = f90, f112
  7971. FSUB f113 = f91, f113
  7972. ;;
  7973. #endif
  7974. #ifdef LN
  7975. adds AOFFSET = 6 * SIZE, AOFFSET
  7976. ;;
  7977. LDFPD f104, f105 = [AOFFSET]
  7978. adds AOFFSET = - 2 * SIZE, AOFFSET
  7979. ;;
  7980. LDFPD f106, f107 = [AOFFSET]
  7981. adds AOFFSET = - 4 * SIZE, AOFFSET
  7982. ;;
  7983. LDFPD f120, f121 = [AOFFSET]
  7984. ;;
  7985. FMPY f32 = f104, f96
  7986. FMPY f33 = f105, f96
  7987. FMPY f34 = f104, f112
  7988. FMPY f35 = f105, f112
  7989. ;;
  7990. FMA_C f96 = f105, f97, f32
  7991. FMA_D f97 = f104, f97, f33
  7992. FMA_C f112 = f105, f113, f34
  7993. FMA_D f113 = f104, f113, f35
  7994. ;;
  7995. FNMA f64 = f106, f96, f64
  7996. FMA_A f65 = f107, f96, f65
  7997. FNMA f80 = f106, f112, f80
  7998. FMA_A f81 = f107, f112, f81
  7999. ;;
  8000. FMA_B f64 = f107, f97, f64
  8001. FNMA f65 = f106, f97, f65
  8002. FMA_B f80 = f107, f113, f80
  8003. FNMA f81 = f106, f113, f81
  8004. ;;
  8005. FMPY f32 = f120, f64
  8006. FMPY f33 = f121, f64
  8007. FMPY f34 = f120, f80
  8008. FMPY f35 = f121, f80
  8009. ;;
  8010. FMA_C f64 = f121, f65, f32
  8011. FMA_D f65 = f120, f65, f33
  8012. FMA_C f80 = f121, f81, f34
  8013. FMA_D f81 = f120, f81, f35
  8014. ;;
  8015. #endif
  8016. #ifdef LT
  8017. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  8018. ;;
  8019. LDFPD f74, f75 = [AOFFSET]
  8020. adds AOFFSET = 4 * SIZE, AOFFSET
  8021. ;;
  8022. LDFPD f90, f91 = [AOFFSET]
  8023. adds AOFFSET = - 6 * SIZE, AOFFSET
  8024. ;;
  8025. FMPY f32 = f72, f64
  8026. FMPY f33 = f73, f64
  8027. FMPY f34 = f72, f80
  8028. FMPY f35 = f73, f80
  8029. ;;
  8030. FMA_C f64 = f73, f65, f32
  8031. FMA_D f65 = f72, f65, f33
  8032. FMA_C f80 = f73, f81, f34
  8033. FMA_D f81 = f72, f81, f35
  8034. ;;
  8035. FNMA f96 = f74, f64, f96
  8036. FMA_A f97 = f75, f64, f97
  8037. FNMA f112 = f74, f80, f112
  8038. FMA_A f113 = f75, f80, f113
  8039. ;;
  8040. FMA_B f96 = f75, f65, f96
  8041. FNMA f97 = f74, f65, f97
  8042. FMA_B f112 = f75, f81, f112
  8043. FNMA f113 = f74, f81, f113
  8044. ;;
  8045. FMPY f32 = f90, f96
  8046. FMPY f33 = f91, f96
  8047. FMPY f34 = f90, f112
  8048. FMPY f35 = f91, f112
  8049. ;;
  8050. FMA_C f96 = f91, f97, f32
  8051. FMA_D f97 = f90, f97, f33
  8052. FMA_C f112 = f91, f113, f34
  8053. FMA_D f113 = f90, f113, f35
  8054. ;;
  8055. #endif
  8056. #ifdef RN
  8057. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  8058. ;;
  8059. LDFPD f74, f75 = [BOFFSET]
  8060. adds BOFFSET = 4 * SIZE, BOFFSET
  8061. ;;
  8062. LDFPD f90, f91 = [BOFFSET]
  8063. adds BOFFSET = - 6 * SIZE, BOFFSET
  8064. ;;
  8065. FMPY f32 = f72, f64
  8066. FMPY f33 = f73, f64
  8067. FMPY f34 = f72, f96
  8068. FMPY f35 = f73, f96
  8069. ;;
  8070. FMA_C f64 = f73, f65, f32
  8071. FMA_D f65 = f72, f65, f33
  8072. FMA_C f96 = f73, f97, f34
  8073. FMA_D f97 = f72, f97, f35
  8074. ;;
  8075. FNMA f80 = f74, f64, f80
  8076. FMA_A f81 = f75, f64, f81
  8077. FNMA f112 = f74, f96, f112
  8078. FMA_A f113 = f75, f96, f113
  8079. ;;
  8080. FMA_B f80 = f75, f65, f80
  8081. FNMA f81 = f74, f65, f81
  8082. FMA_B f112 = f75, f97, f112
  8083. FNMA f113 = f74, f97, f113
  8084. ;;
  8085. FMPY f32 = f90, f80
  8086. FMPY f33 = f91, f80
  8087. FMPY f34 = f90, f112
  8088. FMPY f35 = f91, f112
  8089. ;;
  8090. FMA_C f80 = f91, f81, f32
  8091. FMA_D f81 = f90, f81, f33
  8092. FMA_C f112 = f91, f113, f34
  8093. FMA_D f113 = f90, f113, f35
  8094. ;;
  8095. #endif
  8096. #ifdef RT
  8097. adds BOFFSET = 6 * SIZE, BOFFSET
  8098. ;;
  8099. LDFPD f104, f105 = [BOFFSET]
  8100. adds BOFFSET = - 2 * SIZE, BOFFSET
  8101. ;;
  8102. LDFPD f106, f107 = [BOFFSET]
  8103. adds BOFFSET = - 4 * SIZE, BOFFSET
  8104. ;;
  8105. LDFPD f120, f121 = [BOFFSET]
  8106. ;;
  8107. FMPY f32 = f104, f80
  8108. FMPY f33 = f105, f80
  8109. FMPY f34 = f104, f112
  8110. FMPY f35 = f105, f112
  8111. ;;
  8112. FMA_C f80 = f105, f81, f32
  8113. FMA_D f81 = f104, f81, f33
  8114. FMA_C f112 = f105, f113, f34
  8115. FMA_D f113 = f104, f113, f35
  8116. ;;
  8117. FNMA f64 = f106, f80, f64
  8118. FMA_A f65 = f107, f80, f65
  8119. FNMA f96 = f106, f112, f96
  8120. FMA_A f97 = f107, f112, f97
  8121. ;;
  8122. FMA_B f64 = f107, f81, f64
  8123. FNMA f65 = f106, f81, f65
  8124. FMA_B f96 = f107, f113, f96
  8125. FNMA f97 = f106, f113, f97
  8126. ;;
  8127. FMPY f32 = f120, f64
  8128. FMPY f33 = f121, f64
  8129. FMPY f34 = f120, f96
  8130. FMPY f35 = f121, f96
  8131. ;;
  8132. FMA_C f64 = f121, f65, f32
  8133. FMA_D f65 = f120, f65, f33
  8134. FMA_C f96 = f121, f97, f34
  8135. FMA_D f97 = f120, f97, f35
  8136. ;;
  8137. #endif
  8138. #if defined(LN) || defined(LT)
  8139. adds BOFFSET2 = 4 * SIZE, BOFFSET
  8140. ;;
  8141. STFD [BOFFSET] = f64, SIZE
  8142. STFD [BOFFSET2] = f96, SIZE
  8143. ;;
  8144. STFD [BOFFSET] = f65, SIZE
  8145. STFD [BOFFSET2] = f97, SIZE
  8146. ;;
  8147. STFD [BOFFSET] = f80, SIZE
  8148. STFD [BOFFSET2] = f112, SIZE
  8149. ;;
  8150. STFD [BOFFSET] = f81, 5 * SIZE
  8151. STFD [BOFFSET2] = f113, 5 * SIZE
  8152. ;;
  8153. adds BOFFSET = - 8 * SIZE, BOFFSET
  8154. ;;
  8155. #else
  8156. adds AOFFSET2 = 4 * SIZE, AOFFSET
  8157. ;;
  8158. STFD [AOFFSET] = f64, SIZE
  8159. STFD [AOFFSET2] = f80, SIZE
  8160. ;;
  8161. STFD [AOFFSET] = f65, SIZE
  8162. STFD [AOFFSET2] = f81, SIZE
  8163. ;;
  8164. STFD [AOFFSET] = f96, SIZE
  8165. STFD [AOFFSET2] = f112, SIZE
  8166. ;;
  8167. STFD [AOFFSET] = f97, 5 * SIZE
  8168. STFD [AOFFSET2] = f113, 5 * SIZE
  8169. ;;
  8170. adds AOFFSET = - 8 * SIZE, AOFFSET
  8171. ;;
  8172. #endif
  8173. #ifdef LN
  8174. adds C1 = -4 * SIZE, C1
  8175. adds C2 = -4 * SIZE, C2
  8176. #endif
  8177. ;;
  8178. STFD [C1 ] = f64, SIZE
  8179. ;;
  8180. STFD [C1 ] = f65, SIZE
  8181. ;;
  8182. STFD [C1 ] = f96, SIZE
  8183. ;;
  8184. STFD [C1 ] = f97, SIZE
  8185. ;;
  8186. STFD [C2 ] = f80, SIZE
  8187. ;;
  8188. STFD [C2 ] = f81, SIZE
  8189. ;;
  8190. STFD [C2 ] = f112, SIZE
  8191. ;;
  8192. STFD [C2 ] = f113, SIZE
  8193. ;;
  8194. mov f64 = f0
  8195. mov f65 = f0
  8196. mov f80 = f0
  8197. mov f81 = f0
  8198. mov f96 = f0
  8199. mov f97 = f0
  8200. mov f112 = f0
  8201. mov f113 = f0
  8202. ;;
  8203. #ifdef LN
  8204. adds C1 = -4 * SIZE, C1
  8205. adds C2 = -4 * SIZE, C2
  8206. #endif
  8207. ;;
  8208. cmp.ne p6, p0 = 1, I
  8209. ;;
  8210. adds I = -1, I
  8211. ;;
  8212. shladd r2 = K, ZBASE_SHIFT, r0
  8213. ;;
  8214. sub L = K, KK
  8215. ;;
  8216. #ifdef RT
  8217. shladd AORIG = r2, 1, AORIG
  8218. #endif
  8219. ;;
  8220. #if defined(LT) || defined(RN)
  8221. shladd L = L, ZBASE_SHIFT, r0
  8222. ;;
  8223. shladd AOFFSET = L, 1, AOFFSET
  8224. shladd BOFFSET = L, 1, BOFFSET
  8225. #endif
  8226. ;;
  8227. #ifdef LT
  8228. adds KK = 2, KK
  8229. #elif defined LN
  8230. adds KK = -2, KK
  8231. #else
  8232. nop __LINE__
  8233. #endif
  8234. ;;
  8235. #if defined(LT) || defined(RN)
  8236. mov L = KK
  8237. #else
  8238. sub L = K, KK
  8239. #endif
  8240. ;;
  8241. .align 16
  8242. .L051:
  8243. shr I = M, 2
  8244. ;;
  8245. cmp.eq p6, p7 = 0, I
  8246. (p6) br.cond.dpnt .L089
  8247. ;;
  8248. .align 16
  8249. .L052:
  8250. { .mmi
  8251. cmp.ne p7, p0 = r0, L
  8252. adds BOFFSET = 0 * SIZE, B
  8253. shl r2 = K, 2 + ZBASE_SHIFT
  8254. }
  8255. { .mmi
  8256. shladd r3 = KK, ZBASE_SHIFT, r0
  8257. nop __LINE__
  8258. nop __LINE__
  8259. }
  8260. ;;
  8261. #if defined(LT) || defined(RN)
  8262. { .mfb
  8263. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  8264. mov f66 = f0
  8265. nop __LINE__
  8266. }
  8267. { .mmf
  8268. nop __LINE__
  8269. nop __LINE__
  8270. mov f67 = f0
  8271. }
  8272. ;;
  8273. #else
  8274. { .mfi
  8275. shladd BOFFSET = r3, 1, B
  8276. mov f66 = f0
  8277. #ifdef LN
  8278. sub AORIG = AORIG, r2
  8279. #else
  8280. nop __LINE__
  8281. #endif
  8282. }
  8283. ;;
  8284. { .mfi
  8285. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  8286. mov f67 = f0
  8287. shladd AOFFSET = r3, 2, AORIG
  8288. }
  8289. ;;
  8290. #endif
  8291. { .mfi
  8292. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  8293. mov f82 = f0
  8294. adds PREC = CPREFETCHSIZE * SIZE, C1
  8295. }
  8296. { .mfi
  8297. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  8298. mov f83 = f0
  8299. nop __LINE__
  8300. }
  8301. ;;
  8302. { .mfi
  8303. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  8304. mov f98 = f0
  8305. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  8306. }
  8307. { .mfi
  8308. cmp.eq p3, p0 = r0, r0
  8309. mov f99 = f0
  8310. adds L = 1, L
  8311. }
  8312. ;;
  8313. { .mfi
  8314. (p7) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  8315. mov f114 = f0
  8316. tbit.z p12, p0 = L, 0
  8317. }
  8318. { .mfi
  8319. CPREFETCH [PREC], LDC
  8320. mov f115 = f0
  8321. shr L = L, 1
  8322. }
  8323. ;;
  8324. { .mmi
  8325. (p7) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  8326. adds C5 = 4 * SIZE, C1
  8327. adds L = -1, L
  8328. }
  8329. ;;
  8330. { .mmi
  8331. CPREFETCH [PREC], LDC
  8332. adds C6 = 4 * SIZE, C2
  8333. mov ar.lc = L
  8334. }
  8335. ;;
  8336. cmp.eq p6, p0 = -1, L
  8337. (p6) br.cond.dpnt .L058
  8338. ;;
  8339. .align 16
  8340. .L053:
  8341. { .mfb
  8342. lfetch.nt1 [PREA], 16 * SIZE
  8343. FMA f64 = f32, f48, f64 // A1 * B1
  8344. nop __LINE__
  8345. }
  8346. { .mfi
  8347. nop __LINE__
  8348. FMA_B f65 = f32, f49, f65 // A1 * B2
  8349. (p12) cmp.ne p3, p0 = 0, L
  8350. }
  8351. ;;
  8352. { .mfi
  8353. lfetch.nt1 [PREB], 8 * SIZE
  8354. FMA f80 = f32, f50, f80 // A1 * B3
  8355. cmp.ne p4, p5 = 0, L
  8356. }
  8357. { .mfi
  8358. nop __LINE__
  8359. FMA_B f81 = f32, f51, f81 // A1 * B4
  8360. nop __LINE__
  8361. }
  8362. ;;
  8363. { .mfi
  8364. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  8365. FMA f96 = f34, f48, f96 // A3 * B1
  8366. nop __LINE__
  8367. }
  8368. { .mfi
  8369. FMA_B f97 = f34, f49, f97 // A3 * B2
  8370. nop __LINE__
  8371. }
  8372. ;;
  8373. { .mfi
  8374. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  8375. FMA f112 = f34, f50, f112 // A3 * B3
  8376. nop __LINE__
  8377. }
  8378. { .mfb
  8379. nop __LINE__
  8380. FMA_B f113 = f34, f51, f113 // A3 * B4
  8381. nop __LINE__
  8382. }
  8383. ;;
  8384. { .mfb
  8385. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  8386. FMA f65 = f33, f48, f65 // A2 * B1
  8387. nop __LINE__
  8388. }
  8389. { .mfb
  8390. nop __LINE__
  8391. FMA_A f64 = f33, f49, f64 // A2 * B2
  8392. nop __LINE__
  8393. }
  8394. ;;
  8395. { .mfb
  8396. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  8397. FMA f81 = f33, f50, f81 // A2 * B3
  8398. nop __LINE__
  8399. }
  8400. { .mfb
  8401. nop __LINE__
  8402. FMA_A f80 = f33, f51, f80 // A2 * B4
  8403. nop __LINE__
  8404. }
  8405. ;;
  8406. { .mfb
  8407. (p3) LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  8408. FMA f97 = f35, f48, f97 // A4 * B1
  8409. nop __LINE__
  8410. }
  8411. { .mfb
  8412. nop __LINE__
  8413. FMA_A f96 = f35, f49, f96 // A4 * B2
  8414. nop __LINE__
  8415. }
  8416. ;;
  8417. { .mfb
  8418. (p3) LDFPD f46, f47 = [AOFFSET], 2 * SIZE
  8419. FMA f113 = f35, f50, f113 // A4 * B3
  8420. nop __LINE__
  8421. }
  8422. { .mfb
  8423. nop __LINE__
  8424. FMA_A f112 = f35, f51, f112 // A4 * B4
  8425. nop __LINE__
  8426. }
  8427. ;;
  8428. { .mfb
  8429. nop __LINE__
  8430. FMA f66 = f36, f48, f66 // A5 * B1
  8431. nop __LINE__
  8432. }
  8433. { .mfb
  8434. nop __LINE__
  8435. FMA_B f67 = f36, f49, f67 // A5 * B2
  8436. nop __LINE__
  8437. }
  8438. ;;
  8439. { .mfb
  8440. nop __LINE__
  8441. FMA f82 = f36, f50, f82 // A5 * B3
  8442. nop __LINE__
  8443. }
  8444. { .mfb
  8445. nop __LINE__
  8446. FMA_B f83 = f36, f51, f83 // A5 * B4
  8447. nop __LINE__
  8448. }
  8449. ;;
  8450. { .mfb
  8451. nop __LINE__
  8452. FMA f98 = f38, f48, f98 // A7 * B1
  8453. nop __LINE__
  8454. }
  8455. { .mfb
  8456. nop __LINE__
  8457. FMA_B f99 = f38, f49, f99 // A7 * B2
  8458. nop __LINE__
  8459. }
  8460. ;;
  8461. { .mfb
  8462. nop __LINE__
  8463. FMA f114 = f38, f50, f114 // A7 * B3
  8464. nop __LINE__
  8465. }
  8466. { .mfb
  8467. nop __LINE__
  8468. FMA_B f115 = f38, f51, f115 // A7 * B4
  8469. nop __LINE__
  8470. }
  8471. ;;
  8472. { .mfb
  8473. nop __LINE__
  8474. FMA f67 = f37, f48, f67 // A6 * B1
  8475. nop __LINE__
  8476. }
  8477. { .mfb
  8478. nop __LINE__
  8479. FMA_A f66 = f37, f49, f66 // A6 * B2
  8480. nop __LINE__
  8481. }
  8482. ;;
  8483. { .mfb
  8484. nop __LINE__
  8485. FMA f83 = f37, f50, f83 // A6 * B3
  8486. nop __LINE__
  8487. }
  8488. { .mfb
  8489. nop __LINE__
  8490. FMA_A f82 = f37, f51, f82 // A6 * B4
  8491. nop __LINE__
  8492. }
  8493. ;;
  8494. { .mfb
  8495. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  8496. FMA f99 = f39, f48, f99 // A8 * B1
  8497. nop __LINE__
  8498. }
  8499. { .mfb
  8500. nop __LINE__
  8501. FMA_A f98 = f39, f49, f98 // A8 * B2
  8502. nop __LINE__
  8503. }
  8504. ;;
  8505. { .mfb
  8506. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  8507. FMA f115 = f39, f50, f115 // A8 * B3
  8508. nop __LINE__
  8509. }
  8510. { .mfb
  8511. nop __LINE__
  8512. FMA_A f114 = f39, f51, f114 // A8 * B4
  8513. nop __LINE__
  8514. }
  8515. ;;
  8516. { .mfb
  8517. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  8518. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  8519. nop __LINE__
  8520. }
  8521. { .mfb
  8522. nop __LINE__
  8523. (p3) FMA_B f65 = f40, f57, f65 // A1 * B2
  8524. nop __LINE__
  8525. }
  8526. ;;
  8527. { .mfb
  8528. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  8529. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  8530. nop __LINE__
  8531. }
  8532. { .mfb
  8533. nop __LINE__
  8534. (p3) FMA_B f81 = f40, f59, f81 // A1 * B4
  8535. nop __LINE__
  8536. }
  8537. ;;
  8538. { .mfb
  8539. (p4) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  8540. (p3) FMA f96 = f42, f56, f96 // A3 * B1
  8541. nop __LINE__
  8542. }
  8543. { .mfb
  8544. nop __LINE__
  8545. (p3) FMA_B f97 = f42, f57, f97 // A3 * B2
  8546. nop __LINE__
  8547. }
  8548. ;;
  8549. { .mfb
  8550. (p4) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  8551. (p3) FMA f112 = f42, f58, f112 // A3 * B3
  8552. nop __LINE__
  8553. }
  8554. { .mfb
  8555. nop __LINE__
  8556. (p3) FMA_B f113 = f42, f59, f113 // A3 * B4
  8557. nop __LINE__
  8558. }
  8559. ;;
  8560. { .mfb
  8561. nop __LINE__
  8562. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  8563. nop __LINE__
  8564. }
  8565. { .mfb
  8566. nop __LINE__
  8567. (p3) FMA_A f64 = f41, f57, f64 // A2 * B2
  8568. nop __LINE__
  8569. }
  8570. ;;
  8571. { .mfb
  8572. nop __LINE__
  8573. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  8574. nop __LINE__
  8575. }
  8576. { .mfb
  8577. nop __LINE__
  8578. (p3) FMA_A f80 = f41, f59, f80 // A2 * B4
  8579. nop __LINE__
  8580. }
  8581. ;;
  8582. { .mfb
  8583. nop __LINE__
  8584. (p3) FMA f97 = f43, f56, f97 // A4 * B1
  8585. nop __LINE__
  8586. }
  8587. { .mfb
  8588. nop __LINE__
  8589. (p3) FMA_A f96 = f43, f57, f96 // A4 * B2
  8590. nop __LINE__
  8591. }
  8592. ;;
  8593. { .mfb
  8594. nop __LINE__
  8595. (p3) FMA f113 = f43, f58, f113 // A4 * B3
  8596. nop __LINE__
  8597. }
  8598. { .mfb
  8599. nop __LINE__
  8600. (p3) FMA_A f112 = f43, f59, f112 // A4 * B4
  8601. nop __LINE__
  8602. }
  8603. ;;
  8604. { .mfb
  8605. nop __LINE__
  8606. (p3) FMA f66 = f44, f56, f66 // A5 * B1
  8607. nop __LINE__
  8608. }
  8609. { .mfb
  8610. nop __LINE__
  8611. (p3) FMA_B f67 = f44, f57, f67 // A5 * B2
  8612. nop __LINE__
  8613. }
  8614. ;;
  8615. { .mfb
  8616. nop __LINE__
  8617. (p3) FMA f82 = f44, f58, f82 // A5 * B3
  8618. nop __LINE__
  8619. }
  8620. { .mfb
  8621. nop __LINE__
  8622. (p3) FMA_B f83 = f44, f59, f83 // A5 * B4
  8623. nop __LINE__
  8624. }
  8625. ;;
  8626. { .mfb
  8627. nop __LINE__
  8628. (p3) FMA f98 = f46, f56, f98 // A7 * B1
  8629. nop __LINE__
  8630. }
  8631. { .mfb
  8632. nop __LINE__
  8633. (p3) FMA_B f99 = f46, f57, f99 // A7 * B2
  8634. nop __LINE__
  8635. }
  8636. ;;
  8637. { .mfb
  8638. nop __LINE__
  8639. (p3) FMA f114 = f46, f58, f114 // A7 * B3
  8640. nop __LINE__
  8641. }
  8642. { .mfb
  8643. nop __LINE__
  8644. (p3) FMA_B f115 = f46, f59, f115 // A7 * B4
  8645. nop __LINE__
  8646. }
  8647. ;;
  8648. { .mfb
  8649. nop __LINE__
  8650. (p3) FMA f67 = f45, f56, f67 // A6 * B1
  8651. nop __LINE__
  8652. }
  8653. { .mfb
  8654. nop __LINE__
  8655. (p3) FMA_A f66 = f45, f57, f66 // A6 * B2
  8656. nop __LINE__
  8657. }
  8658. ;;
  8659. { .mfb
  8660. nop __LINE__
  8661. (p3) FMA f83 = f45, f58, f83 // A6 * B3
  8662. nop __LINE__
  8663. }
  8664. { .mfb
  8665. nop __LINE__
  8666. (p3) FMA_A f82 = f45, f59, f82 // A6 * B4
  8667. nop __LINE__
  8668. }
  8669. ;;
  8670. { .mfb
  8671. nop __LINE__
  8672. (p3) FMA f99 = f47, f56, f99 // A8 * B1
  8673. nop __LINE__
  8674. }
  8675. { .mfb
  8676. nop __LINE__
  8677. (p3) FMA_A f98 = f47, f57, f98 // A8 * B2
  8678. nop __LINE__
  8679. }
  8680. ;;
  8681. { .mfi
  8682. nop __LINE__
  8683. (p3) FMA f115 = f47, f58, f115 // A8 * B3
  8684. adds L = -1, L
  8685. }
  8686. { .mfb
  8687. nop __LINE__
  8688. (p3) FMA_A f114 = f47, f59, f114 // A8 * B4
  8689. br.cloop.sptk.few .L053
  8690. }
  8691. ;;
  8692. .L058:
  8693. #if defined(LN) || defined(RT)
  8694. #ifdef LN
  8695. adds r2 = -4, KK
  8696. #else
  8697. adds r2 = -2, KK
  8698. #endif
  8699. ;;
  8700. shladd r2 = r2, ZBASE_SHIFT, r0
  8701. ;;
  8702. shladd AOFFSET = r2, 2, AORIG
  8703. shladd BOFFSET = r2, 1, B
  8704. ;;
  8705. #endif
  8706. #if defined(LN) || defined(LT)
  8707. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  8708. ;;
  8709. LDFPD f74, f75 = [BOFFSET], 2 * SIZE
  8710. ;;
  8711. LDFPD f88, f89 = [BOFFSET], 2 * SIZE
  8712. ;;
  8713. LDFPD f90, f91 = [BOFFSET], 2 * SIZE
  8714. ;;
  8715. LDFPD f104, f105 = [BOFFSET], 2 * SIZE
  8716. ;;
  8717. LDFPD f106, f107 = [BOFFSET], 2 * SIZE
  8718. ;;
  8719. LDFPD f120, f121 = [BOFFSET], 2 * SIZE
  8720. ;;
  8721. LDFPD f122, f123 = [BOFFSET]
  8722. adds BOFFSET = -14 * SIZE, BOFFSET
  8723. ;;
  8724. FSUB f64 = f72, f64
  8725. FSUB_A f65 = f73, f65
  8726. FSUB f80 = f74, f80
  8727. FSUB_A f81 = f75, f81
  8728. FSUB f96 = f88, f96
  8729. FSUB_A f97 = f89, f97
  8730. FSUB f112 = f90, f112
  8731. FSUB_A f113 = f91, f113
  8732. FSUB f66 = f104, f66
  8733. FSUB_A f67 = f105, f67
  8734. FSUB f82 = f106, f82
  8735. FSUB_A f83 = f107, f83
  8736. FSUB f98 = f120, f98
  8737. FSUB_A f99 = f121, f99
  8738. FSUB f114 = f122, f114
  8739. FSUB_A f115 = f123, f115
  8740. ;;
  8741. #else
  8742. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  8743. ;;
  8744. LDFPD f74, f75 = [AOFFSET], 2 * SIZE
  8745. ;;
  8746. LDFPD f76, f77 = [AOFFSET], 2 * SIZE
  8747. ;;
  8748. LDFPD f78, f79 = [AOFFSET], 2 * SIZE
  8749. ;;
  8750. LDFPD f88, f89 = [AOFFSET], 2 * SIZE
  8751. ;;
  8752. LDFPD f90, f91 = [AOFFSET], 2 * SIZE
  8753. ;;
  8754. LDFPD f92, f93 = [AOFFSET], 2 * SIZE
  8755. ;;
  8756. LDFPD f94, f95 = [AOFFSET]
  8757. adds AOFFSET = -14 * SIZE, AOFFSET
  8758. ;;
  8759. FSUB f64 = f72, f64
  8760. FSUB f65 = f73, f65
  8761. FSUB f96 = f74, f96
  8762. FSUB f97 = f75, f97
  8763. FSUB f66 = f76, f66
  8764. FSUB f67 = f77, f67
  8765. FSUB f98 = f78, f98
  8766. FSUB f99 = f79, f99
  8767. FSUB f80 = f88, f80
  8768. FSUB f81 = f89, f81
  8769. FSUB f112 = f90, f112
  8770. FSUB f113 = f91, f113
  8771. FSUB f82 = f92, f82
  8772. FSUB f83 = f93, f83
  8773. FSUB f114 = f94, f114
  8774. FSUB f115 = f95, f115
  8775. ;;
  8776. #endif
  8777. #ifdef LN
  8778. adds AOFFSET = 30 * SIZE, AOFFSET
  8779. ;;
  8780. LDFPD f72, f73 = [AOFFSET]
  8781. adds AOFFSET = - 2 * SIZE, AOFFSET
  8782. ;;
  8783. LDFPD f74, f75 = [AOFFSET]
  8784. adds AOFFSET = - 2 * SIZE, AOFFSET
  8785. ;;
  8786. LDFPD f76, f77 = [AOFFSET]
  8787. adds AOFFSET = - 2 * SIZE, AOFFSET
  8788. ;;
  8789. LDFPD f78, f79 = [AOFFSET]
  8790. adds AOFFSET = - 4 * SIZE, AOFFSET
  8791. ;;
  8792. LDFPD f88, f89 = [AOFFSET]
  8793. adds AOFFSET = - 2 * SIZE, AOFFSET
  8794. ;;
  8795. LDFPD f90, f91 = [AOFFSET]
  8796. adds AOFFSET = - 2 * SIZE, AOFFSET
  8797. ;;
  8798. LDFPD f92, f93 = [AOFFSET]
  8799. adds AOFFSET = - 6 * SIZE, AOFFSET
  8800. ;;
  8801. LDFPD f104, f105 = [AOFFSET]
  8802. adds AOFFSET = - 2 * SIZE, AOFFSET
  8803. ;;
  8804. LDFPD f106, f107 = [AOFFSET]
  8805. adds AOFFSET = - 8 * SIZE, AOFFSET
  8806. ;;
  8807. LDFPD f120, f121 = [AOFFSET]
  8808. ;;
  8809. FMPY f32 = f72, f98
  8810. FMPY f33 = f73, f98
  8811. FMPY f34 = f72, f114
  8812. FMPY f35 = f73, f114
  8813. ;;
  8814. FMA_C f98 = f73, f99, f32
  8815. FMA_D f99 = f72, f99, f33
  8816. FMA_C f114 = f73, f115, f34
  8817. FMA_D f115 = f72, f115, f35
  8818. ;;
  8819. FNMA f66 = f74, f98, f66
  8820. FMA_A f67 = f75, f98, f67
  8821. FNMA f82 = f74, f114, f82
  8822. FMA_A f83 = f75, f114, f83
  8823. ;;
  8824. FMA_B f66 = f75, f99, f66
  8825. FNMA f67 = f74, f99, f67
  8826. FMA_B f82 = f75, f115, f82
  8827. FNMA f83 = f74, f115, f83
  8828. ;;
  8829. FNMA f96 = f76, f98, f96
  8830. FMA_A f97 = f77, f98, f97
  8831. FNMA f112 = f76, f114, f112
  8832. FMA_A f113 = f77, f114, f113
  8833. ;;
  8834. FMA_B f96 = f77, f99, f96
  8835. FNMA f97 = f76, f99, f97
  8836. FMA_B f112 = f77, f115, f112
  8837. FNMA f113 = f76, f115, f113
  8838. ;;
  8839. FNMA f64 = f78, f98, f64
  8840. FMA_A f65 = f79, f98, f65
  8841. FNMA f80 = f78, f114, f80
  8842. FMA_A f81 = f79, f114, f81
  8843. ;;
  8844. FMA_B f64 = f79, f99, f64
  8845. FNMA f65 = f78, f99, f65
  8846. FMA_B f80 = f79, f115, f80
  8847. FNMA f81 = f78, f115, f81
  8848. ;;
  8849. FMPY f32 = f88, f66
  8850. FMPY f33 = f89, f66
  8851. FMPY f34 = f88, f82
  8852. FMPY f35 = f89, f82
  8853. ;;
  8854. FMA_C f66 = f89, f67, f32
  8855. FMA_D f67 = f88, f67, f33
  8856. FMA_C f82 = f89, f83, f34
  8857. FMA_D f83 = f88, f83, f35
  8858. ;;
  8859. FNMA f96 = f90, f66, f96
  8860. FMA_A f97 = f91, f66, f97
  8861. FNMA f112 = f90, f82, f112
  8862. FMA_A f113 = f91, f82, f113
  8863. ;;
  8864. FMA_B f96 = f91, f67, f96
  8865. FNMA f97 = f90, f67, f97
  8866. FMA_B f112 = f91, f83, f112
  8867. FNMA f113 = f90, f83, f113
  8868. ;;
  8869. FNMA f64 = f92, f66, f64
  8870. FMA_A f65 = f93, f66, f65
  8871. FNMA f80 = f92, f82, f80
  8872. FMA_A f81 = f93, f82, f81
  8873. ;;
  8874. FMA_B f64 = f93, f67, f64
  8875. FNMA f65 = f92, f67, f65
  8876. FMA_B f80 = f93, f83, f80
  8877. FNMA f81 = f92, f83, f81
  8878. ;;
  8879. FMPY f32 = f104, f96
  8880. FMPY f33 = f105, f96
  8881. FMPY f34 = f104, f112
  8882. FMPY f35 = f105, f112
  8883. ;;
  8884. FMA_C f96 = f105, f97, f32
  8885. FMA_D f97 = f104, f97, f33
  8886. FMA_C f112 = f105, f113, f34
  8887. FMA_D f113 = f104, f113, f35
  8888. ;;
  8889. FNMA f64 = f106, f96, f64
  8890. FMA_A f65 = f107, f96, f65
  8891. FNMA f80 = f106, f112, f80
  8892. FMA_A f81 = f107, f112, f81
  8893. ;;
  8894. FMA_B f64 = f107, f97, f64
  8895. FNMA f65 = f106, f97, f65
  8896. FMA_B f80 = f107, f113, f80
  8897. FNMA f81 = f106, f113, f81
  8898. ;;
  8899. FMPY f32 = f120, f64
  8900. FMPY f33 = f121, f64
  8901. FMPY f34 = f120, f80
  8902. FMPY f35 = f121, f80
  8903. ;;
  8904. FMA_C f64 = f121, f65, f32
  8905. FMA_D f65 = f120, f65, f33
  8906. FMA_C f80 = f121, f81, f34
  8907. FMA_D f81 = f120, f81, f35
  8908. ;;
  8909. #endif
  8910. #ifdef LT
  8911. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  8912. ;;
  8913. LDFPD f74, f75 = [AOFFSET], 2 * SIZE
  8914. ;;
  8915. LDFPD f76, f77 = [AOFFSET], 2 * SIZE
  8916. ;;
  8917. LDFPD f78, f79 = [AOFFSET]
  8918. adds AOFFSET = 4 * SIZE, AOFFSET
  8919. ;;
  8920. LDFPD f90, f91 = [AOFFSET], 2 * SIZE
  8921. ;;
  8922. LDFPD f92, f93 = [AOFFSET], 2 * SIZE
  8923. ;;
  8924. LDFPD f94, f95 = [AOFFSET]
  8925. adds AOFFSET = 6 * SIZE, AOFFSET
  8926. ;;
  8927. LDFPD f108, f109 = [AOFFSET], 2 * SIZE
  8928. ;;
  8929. LDFPD f110, f111 = [AOFFSET]
  8930. adds AOFFSET = 8 * SIZE, AOFFSET
  8931. ;;
  8932. LDFPD f126, f127 = [AOFFSET]
  8933. adds AOFFSET = - 30 * SIZE, AOFFSET
  8934. ;;
  8935. FMPY f32 = f72, f64
  8936. FMPY f33 = f73, f64
  8937. FMPY f34 = f72, f80
  8938. FMPY f35 = f73, f80
  8939. ;;
  8940. FMA_C f64 = f73, f65, f32
  8941. FMA_D f65 = f72, f65, f33
  8942. FMA_C f80 = f73, f81, f34
  8943. FMA_D f81 = f72, f81, f35
  8944. ;;
  8945. FNMA f96 = f74, f64, f96
  8946. FMA_A f97 = f75, f64, f97
  8947. FNMA f112 = f74, f80, f112
  8948. FMA_A f113 = f75, f80, f113
  8949. ;;
  8950. FMA_B f96 = f75, f65, f96
  8951. FNMA f97 = f74, f65, f97
  8952. FMA_B f112 = f75, f81, f112
  8953. FNMA f113 = f74, f81, f113
  8954. ;;
  8955. FNMA f66 = f76, f64, f66
  8956. FMA_A f67 = f77, f64, f67
  8957. FNMA f82 = f76, f80, f82
  8958. FMA_A f83 = f77, f80, f83
  8959. ;;
  8960. FMA_B f66 = f77, f65, f66
  8961. FNMA f67 = f76, f65, f67
  8962. FMA_B f82 = f77, f81, f82
  8963. FNMA f83 = f76, f81, f83
  8964. ;;
  8965. FNMA f98 = f78, f64, f98
  8966. FMA_A f99 = f79, f64, f99
  8967. FNMA f114 = f78, f80, f114
  8968. FMA_A f115 = f79, f80, f115
  8969. ;;
  8970. FMA_B f98 = f79, f65, f98
  8971. FNMA f99 = f78, f65, f99
  8972. FMA_B f114 = f79, f81, f114
  8973. FNMA f115 = f78, f81, f115
  8974. ;;
  8975. FMPY f32 = f90, f96
  8976. FMPY f33 = f91, f96
  8977. FMPY f34 = f90, f112
  8978. FMPY f35 = f91, f112
  8979. ;;
  8980. FMA_C f96 = f91, f97, f32
  8981. FMA_D f97 = f90, f97, f33
  8982. FMA_C f112 = f91, f113, f34
  8983. FMA_D f113 = f90, f113, f35
  8984. ;;
  8985. FNMA f66 = f92, f96, f66
  8986. FMA_A f67 = f93, f96, f67
  8987. FNMA f82 = f92, f112, f82
  8988. FMA_A f83 = f93, f112, f83
  8989. ;;
  8990. FMA_B f66 = f93, f97, f66
  8991. FNMA f67 = f92, f97, f67
  8992. FMA_B f82 = f93, f113, f82
  8993. FNMA f83 = f92, f113, f83
  8994. ;;
  8995. FNMA f98 = f94, f96, f98
  8996. FMA_A f99 = f95, f96, f99
  8997. FNMA f114 = f94, f112, f114
  8998. FMA_A f115 = f95, f112, f115
  8999. ;;
  9000. FMA_B f98 = f95, f97, f98
  9001. FNMA f99 = f94, f97, f99
  9002. FMA_B f114 = f95, f113, f114
  9003. FNMA f115 = f94, f113, f115
  9004. ;;
  9005. FMPY f32 = f108, f66
  9006. FMPY f33 = f109, f66
  9007. FMPY f34 = f108, f82
  9008. FMPY f35 = f109, f82
  9009. ;;
  9010. FMA_C f66 = f109, f67, f32
  9011. FMA_D f67 = f108, f67, f33
  9012. FMA_C f82 = f109, f83, f34
  9013. FMA_D f83 = f108, f83, f35
  9014. ;;
  9015. FNMA f98 = f110, f66, f98
  9016. FMA_A f99 = f111, f66, f99
  9017. FNMA f114 = f110, f82, f114
  9018. FMA_A f115 = f111, f82, f115
  9019. ;;
  9020. FMA_B f98 = f111, f67, f98
  9021. FNMA f99 = f110, f67, f99
  9022. FMA_B f114 = f111, f83, f114
  9023. FNMA f115 = f110, f83, f115
  9024. ;;
  9025. FMPY f32 = f126, f98
  9026. FMPY f33 = f127, f98
  9027. FMPY f34 = f126, f114
  9028. FMPY f35 = f127, f114
  9029. ;;
  9030. FMA_C f98 = f127, f99, f32
  9031. FMA_D f99 = f126, f99, f33
  9032. FMA_C f114 = f127, f115, f34
  9033. FMA_D f115 = f126, f115, f35
  9034. ;;
  9035. #endif
  9036. #ifdef RN
  9037. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  9038. ;;
  9039. LDFPD f74, f75 = [BOFFSET]
  9040. adds BOFFSET = 4 * SIZE, BOFFSET
  9041. ;;
  9042. LDFPD f90, f91 = [BOFFSET]
  9043. adds BOFFSET = - 6 * SIZE, BOFFSET
  9044. ;;
  9045. FMPY f32 = f72, f64
  9046. FMPY f33 = f73, f64
  9047. FMPY f34 = f72, f96
  9048. FMPY f35 = f73, f96
  9049. FMPY f36 = f72, f66
  9050. FMPY f37 = f73, f66
  9051. FMPY f38 = f72, f98
  9052. FMPY f39 = f73, f98
  9053. ;;
  9054. FMA_C f64 = f73, f65, f32
  9055. FMA_D f65 = f72, f65, f33
  9056. FMA_C f96 = f73, f97, f34
  9057. FMA_D f97 = f72, f97, f35
  9058. FMA_C f66 = f73, f67, f36
  9059. FMA_D f67 = f72, f67, f37
  9060. FMA_C f98 = f73, f99, f38
  9061. FMA_D f99 = f72, f99, f39
  9062. ;;
  9063. FNMA f80 = f74, f64, f80
  9064. FMA_A f81 = f75, f64, f81
  9065. FNMA f112 = f74, f96, f112
  9066. FMA_A f113 = f75, f96, f113
  9067. FNMA f82 = f74, f66, f82
  9068. FMA_A f83 = f75, f66, f83
  9069. FNMA f114 = f74, f98, f114
  9070. FMA_A f115 = f75, f98, f115
  9071. ;;
  9072. FMA_B f80 = f75, f65, f80
  9073. FNMA f81 = f74, f65, f81
  9074. FMA_B f112 = f75, f97, f112
  9075. FNMA f113 = f74, f97, f113
  9076. FMA_B f82 = f75, f67, f82
  9077. FNMA f83 = f74, f67, f83
  9078. FMA_B f114 = f75, f99, f114
  9079. FNMA f115 = f74, f99, f115
  9080. ;;
  9081. FMPY f32 = f90, f80
  9082. FMPY f33 = f91, f80
  9083. FMPY f34 = f90, f112
  9084. FMPY f35 = f91, f112
  9085. FMPY f36 = f90, f82
  9086. FMPY f37 = f91, f82
  9087. FMPY f38 = f90, f114
  9088. FMPY f39 = f91, f114
  9089. ;;
  9090. FMA_C f80 = f91, f81, f32
  9091. FMA_D f81 = f90, f81, f33
  9092. FMA_C f112 = f91, f113, f34
  9093. FMA_D f113 = f90, f113, f35
  9094. FMA_C f82 = f91, f83, f36
  9095. FMA_D f83 = f90, f83, f37
  9096. FMA_C f114 = f91, f115, f38
  9097. FMA_D f115 = f90, f115, f39
  9098. ;;
  9099. #endif
  9100. #ifdef RT
  9101. adds BOFFSET = 6 * SIZE, BOFFSET
  9102. ;;
  9103. LDFPD f104, f105 = [BOFFSET]
  9104. adds BOFFSET = - 2 * SIZE, BOFFSET
  9105. ;;
  9106. LDFPD f106, f107 = [BOFFSET]
  9107. adds BOFFSET = - 4 * SIZE, BOFFSET
  9108. ;;
  9109. LDFPD f120, f121 = [BOFFSET]
  9110. ;;
  9111. FMPY f32 = f104, f80
  9112. FMPY f33 = f105, f80
  9113. FMPY f34 = f104, f112
  9114. FMPY f35 = f105, f112
  9115. FMPY f36 = f104, f82
  9116. FMPY f37 = f105, f82
  9117. FMPY f38 = f104, f114
  9118. FMPY f39 = f105, f114
  9119. ;;
  9120. FMA_C f80 = f105, f81, f32
  9121. FMA_D f81 = f104, f81, f33
  9122. FMA_C f112 = f105, f113, f34
  9123. FMA_D f113 = f104, f113, f35
  9124. FMA_C f82 = f105, f83, f36
  9125. FMA_D f83 = f104, f83, f37
  9126. FMA_C f114 = f105, f115, f38
  9127. FMA_D f115 = f104, f115, f39
  9128. ;;
  9129. FNMA f64 = f106, f80, f64
  9130. FMA_A f65 = f107, f80, f65
  9131. FNMA f96 = f106, f112, f96
  9132. FMA_A f97 = f107, f112, f97
  9133. FNMA f66 = f106, f82, f66
  9134. FMA_A f67 = f107, f82, f67
  9135. FNMA f98 = f106, f114, f98
  9136. FMA_A f99 = f107, f114, f99
  9137. ;;
  9138. FMA_B f64 = f107, f81, f64
  9139. FNMA f65 = f106, f81, f65
  9140. FMA_B f96 = f107, f113, f96
  9141. FNMA f97 = f106, f113, f97
  9142. FMA_B f66 = f107, f83, f66
  9143. FNMA f67 = f106, f83, f67
  9144. FMA_B f98 = f107, f115, f98
  9145. FNMA f99 = f106, f115, f99
  9146. ;;
  9147. FMPY f32 = f120, f64
  9148. FMPY f33 = f121, f64
  9149. FMPY f34 = f120, f96
  9150. FMPY f35 = f121, f96
  9151. FMPY f36 = f120, f66
  9152. FMPY f37 = f121, f66
  9153. FMPY f38 = f120, f98
  9154. FMPY f39 = f121, f98
  9155. ;;
  9156. FMA_C f64 = f121, f65, f32
  9157. FMA_D f65 = f120, f65, f33
  9158. FMA_C f96 = f121, f97, f34
  9159. FMA_D f97 = f120, f97, f35
  9160. FMA_C f66 = f121, f67, f36
  9161. FMA_D f67 = f120, f67, f37
  9162. FMA_C f98 = f121, f99, f38
  9163. FMA_D f99 = f120, f99, f39
  9164. ;;
  9165. #endif
  9166. #if defined(LN) || defined(LT)
  9167. adds BOFFSET2 = 4 * SIZE, BOFFSET
  9168. ;;
  9169. STFD [BOFFSET] = f64, SIZE
  9170. STFD [BOFFSET2] = f96, SIZE
  9171. ;;
  9172. STFD [BOFFSET] = f65, SIZE
  9173. STFD [BOFFSET2] = f97, SIZE
  9174. ;;
  9175. STFD [BOFFSET] = f80, SIZE
  9176. STFD [BOFFSET2] = f112, SIZE
  9177. ;;
  9178. STFD [BOFFSET] = f81, 5 * SIZE
  9179. STFD [BOFFSET2] = f113, 5 * SIZE
  9180. ;;
  9181. STFD [BOFFSET] = f66, SIZE
  9182. STFD [BOFFSET2] = f98, SIZE
  9183. ;;
  9184. STFD [BOFFSET] = f67, SIZE
  9185. STFD [BOFFSET2] = f99, SIZE
  9186. ;;
  9187. STFD [BOFFSET] = f82, SIZE
  9188. STFD [BOFFSET2] = f114, SIZE
  9189. ;;
  9190. STFD [BOFFSET] = f83, 5 * SIZE
  9191. STFD [BOFFSET2] = f115, 5 * SIZE
  9192. ;;
  9193. adds BOFFSET = - 16 * SIZE, BOFFSET
  9194. ;;
  9195. #else
  9196. adds AOFFSET2 = 4 * SIZE, AOFFSET
  9197. ;;
  9198. STFD [AOFFSET] = f64, SIZE
  9199. STFD [AOFFSET2] = f66, SIZE
  9200. ;;
  9201. STFD [AOFFSET] = f65, SIZE
  9202. STFD [AOFFSET2] = f67, SIZE
  9203. ;;
  9204. STFD [AOFFSET] = f96, SIZE
  9205. STFD [AOFFSET2] = f98, SIZE
  9206. ;;
  9207. STFD [AOFFSET] = f97, 5 * SIZE
  9208. STFD [AOFFSET2] = f99, 5 * SIZE
  9209. ;;
  9210. STFD [AOFFSET] = f80, SIZE
  9211. STFD [AOFFSET2] = f82, SIZE
  9212. ;;
  9213. STFD [AOFFSET] = f81, SIZE
  9214. STFD [AOFFSET2] = f83, SIZE
  9215. ;;
  9216. STFD [AOFFSET] = f112, SIZE
  9217. STFD [AOFFSET2] = f114, SIZE
  9218. ;;
  9219. STFD [AOFFSET] = f113, 5 * SIZE
  9220. STFD [AOFFSET2] = f115, 5 * SIZE
  9221. ;;
  9222. adds AOFFSET = - 16 * SIZE, AOFFSET
  9223. ;;
  9224. #endif
  9225. #ifdef LN
  9226. adds C1 = -8 * SIZE, C1
  9227. adds C2 = -8 * SIZE, C2
  9228. adds C5 = -8 * SIZE, C5
  9229. adds C6 = -8 * SIZE, C6
  9230. #endif
  9231. ;;
  9232. STFD [C1 ] = f64, SIZE
  9233. STFD [C5 ] = f66, SIZE
  9234. ;;
  9235. STFD [C1 ] = f65, SIZE
  9236. STFD [C5 ] = f67, SIZE
  9237. ;;
  9238. STFD [C1 ] = f96, SIZE
  9239. STFD [C5 ] = f98, SIZE
  9240. ;;
  9241. STFD [C1 ] = f97, 5 * SIZE
  9242. STFD [C5 ] = f99, 5 * SIZE
  9243. ;;
  9244. STFD [C2 ] = f80, SIZE
  9245. STFD [C6 ] = f82, SIZE
  9246. ;;
  9247. STFD [C2 ] = f81, SIZE
  9248. STFD [C6 ] = f83, SIZE
  9249. ;;
  9250. STFD [C2 ] = f112, SIZE
  9251. STFD [C6 ] = f114, SIZE
  9252. ;;
  9253. STFD [C2 ] = f113, 5 * SIZE
  9254. STFD [C6 ] = f115, 5 * SIZE
  9255. ;;
  9256. mov f64 = f0
  9257. mov f65 = f0
  9258. mov f80 = f0
  9259. mov f81 = f0
  9260. mov f96 = f0
  9261. mov f97 = f0
  9262. mov f112 = f0
  9263. mov f113 = f0
  9264. ;;
  9265. #ifdef LN
  9266. adds C1 = -8 * SIZE, C1
  9267. adds C2 = -8 * SIZE, C2
  9268. adds C5 = -8 * SIZE, C5
  9269. adds C6 = -8 * SIZE, C6
  9270. #endif
  9271. ;;
  9272. cmp.ne p6, p0 = 1, I
  9273. ;;
  9274. adds I = -1, I
  9275. ;;
  9276. shladd r2 = K, ZBASE_SHIFT, r0
  9277. ;;
  9278. sub L = K, KK
  9279. ;;
  9280. #ifdef RT
  9281. shladd AORIG = r2, 2, AORIG
  9282. #endif
  9283. ;;
  9284. #if defined(LT) || defined(RN)
  9285. shladd L = L, ZBASE_SHIFT, r0
  9286. ;;
  9287. shladd AOFFSET = L, 2, AOFFSET
  9288. shladd BOFFSET = L, 1, BOFFSET
  9289. #endif
  9290. ;;
  9291. #ifdef LT
  9292. adds KK = 4, KK
  9293. #elif defined LN
  9294. adds KK = -4, KK
  9295. #else
  9296. nop __LINE__
  9297. #endif
  9298. ;;
  9299. #if defined(LT) || defined(RN)
  9300. mov L = KK
  9301. #else
  9302. sub L = K, KK
  9303. #endif
  9304. ;;
  9305. (p6) br.cond.dptk .L052
  9306. ;;
  9307. .align 16
  9308. .L089:
  9309. #ifdef LN
  9310. shladd KK8 = K, ZBASE_SHIFT, r0
  9311. ;;
  9312. shladd B = KK8, 1, B
  9313. #endif
  9314. #if defined(LT) || defined(RN)
  9315. mov B = BOFFSET
  9316. #endif
  9317. #ifdef RN
  9318. adds KK = 2, KK
  9319. #endif
  9320. #ifdef RT
  9321. adds KK = -2, KK
  9322. #endif
  9323. ;;
  9324. { .mmi
  9325. mov AOFFSET = A
  9326. nop __LINE__
  9327. }
  9328. ;;
  9329. .align 16
  9330. .L090:
  9331. tbit.z p6, p0 = N, 0
  9332. (p6) br.cond.dpnt .L999
  9333. ;;
  9334. #ifdef RT
  9335. { .mmi
  9336. shl r2 = K, ZBASE_SHIFT
  9337. }
  9338. ;;
  9339. { .mmi
  9340. sub B = B, r2
  9341. sub C = C, LDC
  9342. nop __LINE__
  9343. }
  9344. ;;
  9345. #endif
  9346. mov C1 = C
  9347. #ifdef LN
  9348. add KK = M, OFFSET
  9349. #elif defined LT
  9350. mov KK = OFFSET
  9351. #else
  9352. nop __LINE__
  9353. #endif
  9354. ;;
  9355. #if defined(LN) || defined(RT)
  9356. mov AORIG = A
  9357. #else
  9358. mov AOFFSET = A
  9359. #endif
  9360. ;;
  9361. #if defined(LT) || defined(RN)
  9362. mov L = KK
  9363. #else
  9364. sub L = K, KK
  9365. #endif
  9366. ;;
  9367. { .mib
  9368. #ifndef RT
  9369. add C = LDC, C
  9370. #else
  9371. nop __LINE__
  9372. #endif
  9373. }
  9374. ;;
  9375. .L110:
  9376. { .mib
  9377. #if defined(LT) || defined(RN)
  9378. mov L = KK
  9379. #else
  9380. sub L = K, KK
  9381. #endif
  9382. tbit.z p6, p7 = M, 0
  9383. (p6) br.cond.dptk .L100
  9384. }
  9385. ;;
  9386. { .mmi
  9387. cmp.ne p7, p0 = r0, L
  9388. adds BOFFSET = 0 * SIZE, B
  9389. shl r2 = K, ZBASE_SHIFT
  9390. }
  9391. { .mmi
  9392. shladd r3 = KK, ZBASE_SHIFT, r0
  9393. nop __LINE__
  9394. nop __LINE__
  9395. }
  9396. ;;
  9397. #if defined(LT) || defined(RN)
  9398. { .mfb
  9399. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  9400. mov f66 = f0
  9401. nop __LINE__
  9402. }
  9403. { .mmf
  9404. nop __LINE__
  9405. nop __LINE__
  9406. mov f67 = f0
  9407. }
  9408. ;;
  9409. #else
  9410. { .mfi
  9411. add BOFFSET = r3, B
  9412. mov f66 = f0
  9413. #ifdef LN
  9414. sub AORIG = AORIG, r2
  9415. #else
  9416. nop __LINE__
  9417. #endif
  9418. }
  9419. ;;
  9420. { .mfi
  9421. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  9422. mov f67 = f0
  9423. add AOFFSET = r3, AORIG
  9424. }
  9425. ;;
  9426. #endif
  9427. ;;
  9428. adds L = 1, L
  9429. ;;
  9430. { .mii
  9431. nop __LINE__
  9432. tbit.z p12, p0 = L, 0
  9433. shr L = L, 1
  9434. }
  9435. ;;
  9436. { .mmi
  9437. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  9438. cmp.eq p3, p0 = r0, r0
  9439. adds L = -1, L
  9440. }
  9441. ;;
  9442. { .mmi
  9443. adds PREA = (PREFETCHSIZE + 0) * SIZE, AOFFSET
  9444. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  9445. mov ar.lc = L
  9446. }
  9447. ;;
  9448. cmp.eq p6, p0 = -1, L
  9449. (p6) br.cond.dpnt .L118
  9450. ;;
  9451. .align 16
  9452. .L112:
  9453. { .mfi
  9454. lfetch.nt1 [PREA], 4 * SIZE
  9455. FMA f64 = f32, f48, f64 // A1 * B1
  9456. cmp.ne p4, p5 = 0, L
  9457. }
  9458. { .mfi
  9459. lfetch.nt1 [PREB], 4 * SIZE
  9460. FMA f80 = f32, f49, f80 // A1 * B2
  9461. (p12) cmp.ne p3, p0 = 0, L
  9462. }
  9463. ;;
  9464. { .mmf
  9465. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  9466. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  9467. FMA f65 = f33, f48, f65 // A2 * B1
  9468. }
  9469. { .mmf
  9470. nop __LINE__
  9471. nop __LINE__
  9472. FMA f81 = f33, f49, f81 // A2 * B2
  9473. }
  9474. ;;
  9475. { .mfb
  9476. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  9477. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  9478. nop __LINE__
  9479. }
  9480. { .mfb
  9481. nop __LINE__
  9482. (p3) FMA f80 = f40, f57, f80 // A1 * B2
  9483. nop __LINE__
  9484. }
  9485. ;;
  9486. { .mfi
  9487. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  9488. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  9489. adds L = -1, L
  9490. }
  9491. { .mfb
  9492. (p3) FMA f81 = f41, f57, f81 // A2 * B2
  9493. br.cloop.sptk.few .L112
  9494. }
  9495. ;;
  9496. { .mfb
  9497. nop __LINE__
  9498. FCALC_A f64 = f64, f81
  9499. nop __LINE__
  9500. }
  9501. { .mfb
  9502. nop __LINE__
  9503. FCALC_B f65 = f65, f80
  9504. nop __LINE__
  9505. }
  9506. ;;
  9507. .L118:
  9508. #if defined(LN) || defined(RT)
  9509. #ifdef LN
  9510. adds r2 = -1, KK
  9511. #else
  9512. adds r2 = -1, KK
  9513. #endif
  9514. ;;
  9515. shladd r2 = r2, ZBASE_SHIFT, r0
  9516. ;;
  9517. add AOFFSET = r2, AORIG
  9518. add BOFFSET = r2, B
  9519. ;;
  9520. #endif
  9521. #if defined(LN) || defined(LT)
  9522. LDFPD f72, f73 = [BOFFSET]
  9523. ;;
  9524. FSUB f64 = f72, f64
  9525. FSUB_A f65 = f73, f65
  9526. ;;
  9527. #else
  9528. LDFPD f72, f73 = [AOFFSET]
  9529. ;;
  9530. FSUB f64 = f72, f64
  9531. FSUB f65 = f73, f65
  9532. ;;
  9533. #endif
  9534. #ifdef LN
  9535. LDFPD f120, f121 = [AOFFSET]
  9536. ;;
  9537. FMPY f32 = f120, f64
  9538. FMPY f33 = f121, f64
  9539. ;;
  9540. FMA_C f64 = f121, f65, f32
  9541. FMA_D f65 = f120, f65, f33
  9542. ;;
  9543. #endif
  9544. #ifdef LT
  9545. LDFPD f72, f73 = [AOFFSET]
  9546. ;;
  9547. FMPY f32 = f72, f64
  9548. FMPY f33 = f73, f64
  9549. ;;
  9550. FMA_C f64 = f73, f65, f32
  9551. FMA_D f65 = f72, f65, f33
  9552. ;;
  9553. #endif
  9554. #ifdef RN
  9555. LDFPD f72, f73 = [BOFFSET]
  9556. ;;
  9557. FMPY f32 = f72, f64
  9558. FMPY f33 = f73, f64
  9559. ;;
  9560. FMA_C f64 = f73, f65, f32
  9561. FMA_D f65 = f72, f65, f33
  9562. ;;
  9563. #endif
  9564. #ifdef RT
  9565. LDFPD f72, f73 = [BOFFSET]
  9566. ;;
  9567. FMPY f32 = f72, f64
  9568. FMPY f33 = f73, f64
  9569. ;;
  9570. FMA_C f64 = f73, f65, f32
  9571. FMA_D f65 = f72, f65, f33
  9572. ;;
  9573. #endif
  9574. #if defined(LN) || defined(LT)
  9575. STFD [BOFFSET] = f64, SIZE
  9576. ;;
  9577. STFD [BOFFSET] = f65, SIZE
  9578. ;;
  9579. adds BOFFSET = - 2 * SIZE, BOFFSET
  9580. ;;
  9581. #else
  9582. STFD [AOFFSET] = f64, SIZE
  9583. ;;
  9584. STFD [AOFFSET] = f65, SIZE
  9585. ;;
  9586. adds AOFFSET = - 2 * SIZE, AOFFSET
  9587. ;;
  9588. #endif
  9589. #ifdef LN
  9590. adds C1 = -2 * SIZE, C1
  9591. #endif
  9592. ;;
  9593. STFD [C1 ] = f64, SIZE
  9594. ;;
  9595. STFD [C1 ] = f65, SIZE
  9596. ;;
  9597. mov f64 = f0
  9598. mov f65 = f0
  9599. mov f80 = f0
  9600. mov f81 = f0
  9601. ;;
  9602. #ifdef LN
  9603. adds C1 = -2 * SIZE, C1
  9604. #endif
  9605. ;;
  9606. cmp.ne p6, p0 = 1, I
  9607. ;;
  9608. adds I = -1, I
  9609. ;;
  9610. shladd r2 = K, ZBASE_SHIFT, r0
  9611. ;;
  9612. sub L = K, KK
  9613. ;;
  9614. #ifdef RT
  9615. add AORIG = r2, AORIG
  9616. #endif
  9617. ;;
  9618. #if defined(LT) || defined(RN)
  9619. shladd L = L, ZBASE_SHIFT, r0
  9620. ;;
  9621. add AOFFSET = L, AOFFSET
  9622. add BOFFSET = L, BOFFSET
  9623. #endif
  9624. ;;
  9625. #ifdef LT
  9626. adds KK = 1, KK
  9627. #elif defined LN
  9628. adds KK = -1, KK
  9629. #else
  9630. nop __LINE__
  9631. #endif
  9632. ;;
  9633. #if defined(LT) || defined(RN)
  9634. mov L = KK
  9635. #else
  9636. sub L = K, KK
  9637. #endif
  9638. ;;
  9639. .align 16
  9640. .L100:
  9641. { .mib
  9642. #if defined(LT) || defined(RN)
  9643. mov L = KK
  9644. #else
  9645. sub L = K, KK
  9646. #endif
  9647. tbit.z p6, p7 = M, 1
  9648. (p6) br.cond.dptk .L091
  9649. }
  9650. ;;
  9651. { .mmi
  9652. cmp.ne p7, p0 = r0, L
  9653. adds BOFFSET = 0 * SIZE, B
  9654. shl r2 = K, 1 + ZBASE_SHIFT
  9655. }
  9656. { .mmi
  9657. shladd r3 = KK, ZBASE_SHIFT, r0
  9658. nop __LINE__
  9659. nop __LINE__
  9660. }
  9661. ;;
  9662. #if defined(LT) || defined(RN)
  9663. { .mfb
  9664. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  9665. mov f66 = f0
  9666. nop __LINE__
  9667. }
  9668. { .mmf
  9669. nop __LINE__
  9670. nop __LINE__
  9671. mov f67 = f0
  9672. }
  9673. ;;
  9674. #else
  9675. { .mfi
  9676. add BOFFSET = r3, B
  9677. mov f66 = f0
  9678. #ifdef LN
  9679. sub AORIG = AORIG, r2
  9680. #else
  9681. nop __LINE__
  9682. #endif
  9683. }
  9684. ;;
  9685. { .mfi
  9686. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  9687. mov f67 = f0
  9688. shladd AOFFSET = r3, 1, AORIG
  9689. }
  9690. ;;
  9691. #endif
  9692. ;;
  9693. adds L = 1, L
  9694. ;;
  9695. { .mii
  9696. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  9697. tbit.z p12, p0 = L, 0
  9698. shr L = L, 1
  9699. }
  9700. ;;
  9701. { .mmi
  9702. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  9703. nop __LINE__
  9704. adds L = -1, L
  9705. }
  9706. ;;
  9707. { .mmi
  9708. adds PREA = (PREFETCHSIZE + 0) * SIZE, AOFFSET
  9709. cmp.eq p3, p0 = r0, r0
  9710. mov ar.lc = L
  9711. }
  9712. ;;
  9713. cmp.eq p6, p0 = -1, L
  9714. (p6) br.cond.dpnt .L108
  9715. ;;
  9716. .align 16
  9717. .L102:
  9718. { .mfi
  9719. lfetch.nt1 [PREA], 8 * SIZE
  9720. FMA f64 = f32, f48, f64 // A1 * B1
  9721. cmp.ne p4, p5 = 0, L
  9722. }
  9723. { .mfi
  9724. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  9725. FMA f80 = f32, f49, f80 // A1 * B2
  9726. (p12) cmp.ne p3, p0 = 0, L
  9727. }
  9728. ;;
  9729. { .mfb
  9730. lfetch.nt1 [PREB], 4 * SIZE
  9731. FMA f65 = f33, f48, f65 // A2 * B1
  9732. nop __LINE__
  9733. }
  9734. { .mfb
  9735. nop __LINE__
  9736. FMA f81 = f33, f49, f81 // A2 * B2
  9737. nop __LINE__
  9738. }
  9739. ;;
  9740. { .mfb
  9741. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  9742. FMA f96 = f34, f48, f96 // A3 * B1
  9743. nop __LINE__
  9744. }
  9745. { .mfb
  9746. nop __LINE__
  9747. FMA f112 = f34, f49, f112 // A3 * B2
  9748. nop __LINE__
  9749. }
  9750. ;;
  9751. { .mfb
  9752. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  9753. FMA f97 = f35, f48, f97 // A4 * B1
  9754. nop __LINE__
  9755. }
  9756. { .mfb
  9757. nop __LINE__
  9758. FMA f113 = f35, f49, f113 // A4 * B2
  9759. nop __LINE__
  9760. }
  9761. ;;
  9762. { .mfb
  9763. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  9764. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  9765. nop __LINE__
  9766. }
  9767. { .mfb
  9768. nop __LINE__
  9769. (p3) FMA f80 = f40, f57, f80 // A1 * B2
  9770. nop __LINE__
  9771. }
  9772. ;;
  9773. { .mfb
  9774. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  9775. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  9776. nop __LINE__
  9777. }
  9778. { .mfb
  9779. nop __LINE__
  9780. (p3) FMA f81 = f41, f57, f81 // A2 * B2
  9781. nop __LINE__
  9782. }
  9783. ;;
  9784. { .mfb
  9785. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  9786. (p3) FMA f96 = f42, f56, f96 // A3 * B1
  9787. nop __LINE__
  9788. }
  9789. { .mfb
  9790. nop __LINE__
  9791. (p3) FMA f112 = f42, f57, f112 // A3 * B2
  9792. nop __LINE__
  9793. }
  9794. ;;
  9795. { .mfi
  9796. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  9797. (p3) FMA f97 = f43, f56, f97 // A4 * B1
  9798. adds L = -1, L
  9799. }
  9800. { .mfb
  9801. nop __LINE__
  9802. (p3) FMA f113 = f43, f57, f113 // A4 * B2
  9803. br.cloop.sptk.few .L102
  9804. }
  9805. ;;
  9806. { .mfb
  9807. nop __LINE__
  9808. FCALC_A f64 = f64, f81
  9809. nop __LINE__
  9810. }
  9811. { .mfb
  9812. nop __LINE__
  9813. FCALC_B f65 = f65, f80
  9814. nop __LINE__
  9815. }
  9816. { .mfb
  9817. nop __LINE__
  9818. FCALC_A f96 = f96, f113
  9819. nop __LINE__
  9820. }
  9821. { .mfb
  9822. nop __LINE__
  9823. FCALC_B f97 = f97, f112
  9824. nop __LINE__
  9825. }
  9826. ;;
  9827. .L108:
  9828. #if defined(LN) || defined(RT)
  9829. #ifdef LN
  9830. adds r2 = -2, KK
  9831. #else
  9832. adds r2 = -1, KK
  9833. #endif
  9834. ;;
  9835. shladd r2 = r2, ZBASE_SHIFT, r0
  9836. ;;
  9837. shladd AOFFSET = r2, 1, AORIG
  9838. add BOFFSET = r2, B
  9839. ;;
  9840. #endif
  9841. #if defined(LN) || defined(LT)
  9842. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  9843. ;;
  9844. LDFPD f88, f89 = [BOFFSET]
  9845. adds BOFFSET = -2 * SIZE, BOFFSET
  9846. ;;
  9847. FSUB f64 = f72, f64
  9848. FSUB_A f65 = f73, f65
  9849. FSUB f96 = f88, f96
  9850. FSUB_A f97 = f89, f97
  9851. ;;
  9852. #else
  9853. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  9854. ;;
  9855. LDFPD f88, f89 = [AOFFSET]
  9856. adds AOFFSET = -2 * SIZE, AOFFSET
  9857. ;;
  9858. FSUB f64 = f72, f64
  9859. FSUB f65 = f73, f65
  9860. FSUB f96 = f88, f96
  9861. FSUB f97 = f89, f97
  9862. ;;
  9863. #endif
  9864. #ifdef LN
  9865. adds AOFFSET = 6 * SIZE, AOFFSET
  9866. ;;
  9867. LDFPD f104, f105 = [AOFFSET]
  9868. adds AOFFSET = - 2 * SIZE, AOFFSET
  9869. ;;
  9870. LDFPD f106, f107 = [AOFFSET]
  9871. adds AOFFSET = - 4 * SIZE, AOFFSET
  9872. ;;
  9873. LDFPD f120, f121 = [AOFFSET]
  9874. ;;
  9875. FMPY f32 = f104, f96
  9876. FMPY f33 = f105, f96
  9877. ;;
  9878. FMA_C f96 = f105, f97, f32
  9879. FMA_D f97 = f104, f97, f33
  9880. ;;
  9881. FNMA f64 = f106, f96, f64
  9882. FMA_A f65 = f107, f96, f65
  9883. ;;
  9884. FMA_B f64 = f107, f97, f64
  9885. FNMA f65 = f106, f97, f65
  9886. ;;
  9887. FMPY f32 = f120, f64
  9888. FMPY f33 = f121, f64
  9889. ;;
  9890. FMA_C f64 = f121, f65, f32
  9891. FMA_D f65 = f120, f65, f33
  9892. ;;
  9893. #endif
  9894. #ifdef LT
  9895. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  9896. ;;
  9897. LDFPD f74, f75 = [AOFFSET]
  9898. adds AOFFSET = 4 * SIZE, AOFFSET
  9899. ;;
  9900. LDFPD f90, f91 = [AOFFSET]
  9901. adds AOFFSET = - 6 * SIZE, AOFFSET
  9902. ;;
  9903. FMPY f32 = f72, f64
  9904. FMPY f33 = f73, f64
  9905. ;;
  9906. FMA_C f64 = f73, f65, f32
  9907. FMA_D f65 = f72, f65, f33
  9908. ;;
  9909. FNMA f96 = f74, f64, f96
  9910. FMA_A f97 = f75, f64, f97
  9911. ;;
  9912. FMA_B f96 = f75, f65, f96
  9913. FNMA f97 = f74, f65, f97
  9914. ;;
  9915. FMPY f32 = f90, f96
  9916. FMPY f33 = f91, f96
  9917. ;;
  9918. FMA_C f96 = f91, f97, f32
  9919. FMA_D f97 = f90, f97, f33
  9920. ;;
  9921. #endif
  9922. #ifdef RN
  9923. LDFPD f72, f73 = [BOFFSET]
  9924. ;;
  9925. FMPY f32 = f72, f64
  9926. FMPY f33 = f73, f64
  9927. FMPY f36 = f72, f96
  9928. FMPY f37 = f73, f96
  9929. ;;
  9930. FMA_C f64 = f73, f65, f32
  9931. FMA_D f65 = f72, f65, f33
  9932. FMA_C f96 = f73, f97, f36
  9933. FMA_D f97 = f72, f97, f37
  9934. ;;
  9935. #endif
  9936. #ifdef RT
  9937. LDFPD f72, f73 = [BOFFSET]
  9938. ;;
  9939. FMPY f32 = f72, f64
  9940. FMPY f33 = f73, f64
  9941. FMPY f36 = f72, f96
  9942. FMPY f37 = f73, f96
  9943. ;;
  9944. FMA_C f64 = f73, f65, f32
  9945. FMA_D f65 = f72, f65, f33
  9946. FMA_C f96 = f73, f97, f36
  9947. FMA_D f97 = f72, f97, f37
  9948. ;;
  9949. #endif
  9950. #if defined(LN) || defined(LT)
  9951. STFD [BOFFSET] = f64, SIZE
  9952. ;;
  9953. STFD [BOFFSET] = f65, SIZE
  9954. ;;
  9955. STFD [BOFFSET] = f96, SIZE
  9956. ;;
  9957. STFD [BOFFSET] = f97, SIZE
  9958. ;;
  9959. adds BOFFSET = - 4 * SIZE, BOFFSET
  9960. ;;
  9961. #else
  9962. adds AOFFSET2 = 4 * SIZE, AOFFSET
  9963. ;;
  9964. STFD [AOFFSET] = f64, SIZE
  9965. ;;
  9966. STFD [AOFFSET] = f65, SIZE
  9967. ;;
  9968. STFD [AOFFSET] = f96, SIZE
  9969. ;;
  9970. STFD [AOFFSET] = f97, SIZE
  9971. ;;
  9972. adds AOFFSET = - 4 * SIZE, AOFFSET
  9973. ;;
  9974. #endif
  9975. #ifdef LN
  9976. adds C1 = -4 * SIZE, C1
  9977. adds C5 = -4 * SIZE, C5
  9978. #endif
  9979. ;;
  9980. STFD [C1 ] = f64, SIZE
  9981. ;;
  9982. STFD [C1 ] = f65, SIZE
  9983. ;;
  9984. STFD [C1 ] = f96, SIZE
  9985. ;;
  9986. STFD [C1 ] = f97, SIZE
  9987. ;;
  9988. mov f64 = f0
  9989. mov f65 = f0
  9990. mov f80 = f0
  9991. mov f81 = f0
  9992. mov f96 = f0
  9993. mov f97 = f0
  9994. mov f112 = f0
  9995. mov f113 = f0
  9996. ;;
  9997. #ifdef LN
  9998. adds C1 = -4 * SIZE, C1
  9999. adds C5 = -4 * SIZE, C5
  10000. #endif
  10001. ;;
  10002. cmp.ne p6, p0 = 1, I
  10003. ;;
  10004. adds I = -1, I
  10005. ;;
  10006. shladd r2 = K, ZBASE_SHIFT, r0
  10007. ;;
  10008. sub L = K, KK
  10009. ;;
  10010. #ifdef RT
  10011. shladd AORIG = r2, 1, AORIG
  10012. #endif
  10013. ;;
  10014. #if defined(LT) || defined(RN)
  10015. shladd L = L, ZBASE_SHIFT, r0
  10016. ;;
  10017. shladd AOFFSET = L, 1, AOFFSET
  10018. add BOFFSET = L, BOFFSET
  10019. #endif
  10020. ;;
  10021. #ifdef LT
  10022. adds KK = 2, KK
  10023. #elif defined LN
  10024. adds KK = -2, KK
  10025. #else
  10026. nop __LINE__
  10027. #endif
  10028. ;;
  10029. #if defined(LT) || defined(RN)
  10030. mov L = KK
  10031. #else
  10032. sub L = K, KK
  10033. #endif
  10034. .align 16
  10035. .L091:
  10036. shr I = M, 2
  10037. ;;
  10038. cmp.eq p6, p7 = 0, I
  10039. (p6) br.cond.dpnt .L119
  10040. ;;
  10041. .align 16
  10042. .L092:
  10043. { .mmi
  10044. cmp.ne p7, p0 = r0, L
  10045. adds BOFFSET = 0 * SIZE, B
  10046. shl r2 = K, 2 + ZBASE_SHIFT
  10047. }
  10048. { .mmi
  10049. shladd r3 = KK, ZBASE_SHIFT, r0
  10050. nop __LINE__
  10051. nop __LINE__
  10052. }
  10053. ;;
  10054. #if defined(LT) || defined(RN)
  10055. { .mfb
  10056. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  10057. mov f66 = f0
  10058. nop __LINE__
  10059. }
  10060. { .mmf
  10061. nop __LINE__
  10062. nop __LINE__
  10063. mov f67 = f0
  10064. }
  10065. ;;
  10066. #else
  10067. { .mfi
  10068. add BOFFSET = r3, B
  10069. mov f66 = f0
  10070. #ifdef LN
  10071. sub AORIG = AORIG, r2
  10072. #else
  10073. nop __LINE__
  10074. #endif
  10075. }
  10076. ;;
  10077. { .mfi
  10078. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  10079. mov f67 = f0
  10080. shladd AOFFSET = r3, 2, AORIG
  10081. }
  10082. ;;
  10083. #endif
  10084. ;;
  10085. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  10086. adds L = 1, L
  10087. ;;
  10088. { .mfi
  10089. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  10090. tbit.z p12, p0 = L, 0
  10091. }
  10092. { .mfi
  10093. adds PREC = CPREFETCHSIZE * SIZE, C1
  10094. shr L = L, 1
  10095. }
  10096. ;;
  10097. { .mfi
  10098. adds PREA = (PREFETCHSIZE + 0) * SIZE, AOFFSET
  10099. adds L = -1, L
  10100. }
  10101. { .mmf
  10102. (p7) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  10103. CPREFETCH [PREC]
  10104. }
  10105. ;;
  10106. { .mfi
  10107. (p7) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  10108. mov ar.lc = L
  10109. }
  10110. { .mmi
  10111. adds C5 = 4 * SIZE, C1
  10112. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  10113. cmp.eq p3, p0 = r0, r0
  10114. }
  10115. ;;
  10116. cmp.eq p6, p0 = -1, L
  10117. (p6) br.cond.dpnt .L098
  10118. ;;
  10119. .align 16
  10120. .L093:
  10121. /* 1 */
  10122. { .mfi
  10123. lfetch.nt1 [PREA], 16 * SIZE
  10124. FMA f64 = f32, f48, f64 // A1 * B1
  10125. cmp.ne p4, p5 = 0, L
  10126. }
  10127. { .mfi
  10128. nop __LINE__
  10129. FMA_B f65 = f32, f49, f65 // A1 * B2
  10130. (p12) cmp.ne p3, p0 = 0, L
  10131. }
  10132. ;;
  10133. { .mfi
  10134. lfetch.nt1 [PREB], 4 * SIZE
  10135. FMA f80 = f34, f48, f80 // A3 * B1
  10136. nop __LINE__
  10137. }
  10138. { .mfi
  10139. nop __LINE__
  10140. FMA_B f81 = f34, f49, f81 // A3 * B2
  10141. nop __LINE__
  10142. }
  10143. ;;
  10144. { .mfi
  10145. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  10146. FMA f96 = f36, f48, f96 // A5 * B1
  10147. nop __LINE__
  10148. }
  10149. { .mfi
  10150. nop __LINE__
  10151. FMA_B f97 = f36, f49, f97 // A5 * B2
  10152. nop __LINE__
  10153. }
  10154. ;;
  10155. { .mfb
  10156. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  10157. FMA f112 = f38, f48, f112 // A7 * B1
  10158. nop __LINE__
  10159. }
  10160. { .mfb
  10161. nop __LINE__
  10162. FMA_B f113 = f38, f49, f113 // A7 * B2
  10163. nop __LINE__
  10164. }
  10165. ;;
  10166. { .mfb
  10167. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  10168. FMA f65 = f33, f48, f65 // A2 * B1
  10169. nop __LINE__
  10170. }
  10171. { .mfb
  10172. nop __LINE__
  10173. FMA_A f64 = f33, f49, f64 // A2 * B2
  10174. nop __LINE__
  10175. }
  10176. ;;
  10177. { .mfb
  10178. (p3) LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  10179. FMA f81 = f35, f48, f81 // A4 * B1
  10180. nop __LINE__
  10181. }
  10182. { .mfb
  10183. nop __LINE__
  10184. FMA_A f80 = f35, f49, f80 // A4 * B2
  10185. nop __LINE__
  10186. }
  10187. ;;
  10188. { .mfb
  10189. (p3) LDFPD f46, f47 = [AOFFSET], 2 * SIZE
  10190. FMA f97 = f37, f48, f97 // A6 * B1
  10191. nop __LINE__
  10192. }
  10193. { .mfb
  10194. nop __LINE__
  10195. FMA_A f96 = f37, f49, f96 // A6 * B2
  10196. nop __LINE__
  10197. }
  10198. ;;
  10199. { .mfb
  10200. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  10201. FMA f113 = f39, f48, f113 // A8 * B1
  10202. nop __LINE__
  10203. }
  10204. { .mfb
  10205. nop __LINE__
  10206. FMA_A f112 = f39, f49, f112 // A8 * B2
  10207. nop __LINE__
  10208. }
  10209. ;;
  10210. { .mfb
  10211. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  10212. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  10213. nop __LINE__
  10214. }
  10215. { .mfb
  10216. nop __LINE__
  10217. (p3) FMA_B f65 = f40, f57, f65 // A1 * B2
  10218. nop __LINE__
  10219. }
  10220. ;;
  10221. { .mfb
  10222. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  10223. (p3) FMA f80 = f42, f56, f80 // A3 * B1
  10224. nop __LINE__
  10225. }
  10226. { .mfb
  10227. nop __LINE__
  10228. (p3) FMA_B f81 = f42, f57, f81 // A3 * B2
  10229. nop __LINE__
  10230. }
  10231. ;;
  10232. { .mfb
  10233. (p4) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  10234. (p3) FMA f96 = f44, f56, f96 // A5 * B1
  10235. nop __LINE__
  10236. }
  10237. { .mfb
  10238. nop __LINE__
  10239. (p3) FMA_B f97 = f44, f57, f97 // A5 * B2
  10240. nop __LINE__
  10241. }
  10242. ;;
  10243. { .mfb
  10244. (p4) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  10245. (p3) FMA f112 = f46, f56, f112 // A7 * B1
  10246. nop __LINE__
  10247. }
  10248. { .mfb
  10249. nop __LINE__
  10250. (p3) FMA_B f113 = f46, f57, f113 // A7 * B2
  10251. nop __LINE__
  10252. }
  10253. ;;
  10254. { .mfb
  10255. nop __LINE__
  10256. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  10257. nop __LINE__
  10258. }
  10259. { .mfb
  10260. nop __LINE__
  10261. (p3) FMA_A f64 = f41, f57, f64 // A2 * B2
  10262. nop __LINE__
  10263. }
  10264. ;;
  10265. { .mfb
  10266. nop __LINE__
  10267. (p3) FMA f81 = f43, f56, f81 // A4 * B1
  10268. nop __LINE__
  10269. }
  10270. { .mfb
  10271. nop __LINE__
  10272. (p3) FMA_A f80 = f43, f57, f80 // A4 * B2
  10273. nop __LINE__
  10274. }
  10275. ;;
  10276. { .mfb
  10277. nop __LINE__
  10278. (p3) FMA f97 = f45, f56, f97 // A6 * B1
  10279. nop __LINE__
  10280. }
  10281. { .mfb
  10282. nop __LINE__
  10283. (p3) FMA_A f96 = f45, f57, f96 // A6 * B2
  10284. nop __LINE__
  10285. }
  10286. ;;
  10287. { .mfi
  10288. nop __LINE__
  10289. (p3) FMA f113 = f47, f56, f113 // A8 * B1
  10290. adds L = -1, L
  10291. }
  10292. { .mfb
  10293. nop __LINE__
  10294. (p3) FMA_A f112 = f47, f57, f112 // A8 * B2
  10295. br.cloop.sptk.few .L093
  10296. }
  10297. ;;
  10298. .L098:
  10299. #if defined(LN) || defined(RT)
  10300. #ifdef LN
  10301. adds r2 = -4, KK
  10302. #else
  10303. adds r2 = -1, KK
  10304. #endif
  10305. ;;
  10306. shladd r2 = r2, ZBASE_SHIFT, r0
  10307. ;;
  10308. shladd AOFFSET = r2, 2, AORIG
  10309. add BOFFSET = r2, B
  10310. ;;
  10311. #endif
  10312. #if defined(LN) || defined(LT)
  10313. LDFPD f72, f73 = [BOFFSET], 2 * SIZE
  10314. ;;
  10315. LDFPD f74, f75 = [BOFFSET], 2 * SIZE
  10316. ;;
  10317. LDFPD f88, f89 = [BOFFSET], 2 * SIZE
  10318. ;;
  10319. LDFPD f90, f91 = [BOFFSET]
  10320. adds BOFFSET = -6 * SIZE, BOFFSET
  10321. ;;
  10322. FSUB f64 = f72, f64
  10323. FSUB_A f65 = f73, f65
  10324. FSUB f80 = f74, f80
  10325. FSUB_A f81 = f75, f81
  10326. FSUB f96 = f88, f96
  10327. FSUB_A f97 = f89, f97
  10328. FSUB f112 = f90, f112
  10329. FSUB_A f113 = f91, f113
  10330. ;;
  10331. #else
  10332. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  10333. ;;
  10334. LDFPD f74, f75 = [AOFFSET], 2 * SIZE
  10335. ;;
  10336. LDFPD f88, f89 = [AOFFSET], 2 * SIZE
  10337. ;;
  10338. LDFPD f90, f91 = [AOFFSET]
  10339. adds AOFFSET = -6 * SIZE, AOFFSET
  10340. ;;
  10341. FSUB f64 = f72, f64
  10342. FSUB f65 = f73, f65
  10343. FSUB f80 = f74, f80
  10344. FSUB f81 = f75, f81
  10345. FSUB f96 = f88, f96
  10346. FSUB f97 = f89, f97
  10347. FSUB f112 = f90, f112
  10348. FSUB f113 = f91, f113
  10349. ;;
  10350. #endif
  10351. #ifdef LN
  10352. adds AOFFSET = 30 * SIZE, AOFFSET
  10353. ;;
  10354. LDFPD f72, f73 = [AOFFSET]
  10355. adds AOFFSET = - 2 * SIZE, AOFFSET
  10356. ;;
  10357. LDFPD f74, f75 = [AOFFSET]
  10358. adds AOFFSET = - 2 * SIZE, AOFFSET
  10359. ;;
  10360. LDFPD f76, f77 = [AOFFSET]
  10361. adds AOFFSET = - 2 * SIZE, AOFFSET
  10362. ;;
  10363. LDFPD f78, f79 = [AOFFSET]
  10364. adds AOFFSET = - 4 * SIZE, AOFFSET
  10365. ;;
  10366. LDFPD f88, f89 = [AOFFSET]
  10367. adds AOFFSET = - 2 * SIZE, AOFFSET
  10368. ;;
  10369. LDFPD f90, f91 = [AOFFSET]
  10370. adds AOFFSET = - 2 * SIZE, AOFFSET
  10371. ;;
  10372. LDFPD f92, f93 = [AOFFSET]
  10373. adds AOFFSET = - 6 * SIZE, AOFFSET
  10374. ;;
  10375. LDFPD f104, f105 = [AOFFSET]
  10376. adds AOFFSET = - 2 * SIZE, AOFFSET
  10377. ;;
  10378. LDFPD f106, f107 = [AOFFSET]
  10379. adds AOFFSET = - 8 * SIZE, AOFFSET
  10380. ;;
  10381. LDFPD f120, f121 = [AOFFSET]
  10382. ;;
  10383. FMPY f32 = f72, f112
  10384. FMPY f33 = f73, f112
  10385. ;;
  10386. FMA_C f112 = f73, f113, f32
  10387. FMA_D f113 = f72, f113, f33
  10388. ;;
  10389. FNMA f96 = f74, f112, f96
  10390. FMA_A f97 = f75, f112, f97
  10391. FNMA f80 = f76, f112, f80
  10392. FMA_A f81 = f77, f112, f81
  10393. FNMA f64 = f78, f112, f64
  10394. FMA_A f65 = f79, f112, f65
  10395. ;;
  10396. FMA_B f96 = f75, f113, f96
  10397. FNMA f97 = f74, f113, f97
  10398. FMA_B f80 = f77, f113, f80
  10399. FNMA f81 = f76, f113, f81
  10400. FMA_B f64 = f79, f113, f64
  10401. FNMA f65 = f78, f113, f65
  10402. ;;
  10403. FMPY f32 = f88, f96
  10404. FMPY f33 = f89, f96
  10405. ;;
  10406. FMA_C f96 = f89, f97, f32
  10407. FMA_D f97 = f88, f97, f33
  10408. ;;
  10409. FNMA f80 = f90, f96, f80
  10410. FMA_A f81 = f91, f96, f81
  10411. FNMA f64 = f92, f96, f64
  10412. FMA_A f65 = f93, f96, f65
  10413. ;;
  10414. FMA_B f80 = f91, f97, f80
  10415. FNMA f81 = f90, f97, f81
  10416. FMA_B f64 = f93, f97, f64
  10417. FNMA f65 = f92, f97, f65
  10418. ;;
  10419. FMPY f32 = f104, f80
  10420. FMPY f33 = f105, f80
  10421. ;;
  10422. FMA_C f80 = f105, f81, f32
  10423. FMA_D f81 = f104, f81, f33
  10424. ;;
  10425. FNMA f64 = f106, f80, f64
  10426. FMA_A f65 = f107, f80, f65
  10427. ;;
  10428. FMA_B f64 = f107, f81, f64
  10429. FNMA f65 = f106, f81, f65
  10430. ;;
  10431. FMPY f32 = f120, f64
  10432. FMPY f33 = f121, f64
  10433. ;;
  10434. FMA_C f64 = f121, f65, f32
  10435. FMA_D f65 = f120, f65, f33
  10436. ;;
  10437. #endif
  10438. #ifdef LT
  10439. LDFPD f72, f73 = [AOFFSET], 2 * SIZE
  10440. ;;
  10441. LDFPD f74, f75 = [AOFFSET], 2 * SIZE
  10442. ;;
  10443. LDFPD f76, f77 = [AOFFSET], 2 * SIZE
  10444. ;;
  10445. LDFPD f78, f79 = [AOFFSET]
  10446. adds AOFFSET = 4 * SIZE, AOFFSET
  10447. ;;
  10448. LDFPD f90, f91 = [AOFFSET], 2 * SIZE
  10449. ;;
  10450. LDFPD f92, f93 = [AOFFSET], 2 * SIZE
  10451. ;;
  10452. LDFPD f94, f95 = [AOFFSET]
  10453. adds AOFFSET = 6 * SIZE, AOFFSET
  10454. ;;
  10455. LDFPD f108, f109 = [AOFFSET], 2 * SIZE
  10456. ;;
  10457. LDFPD f110, f111 = [AOFFSET]
  10458. adds AOFFSET = 8 * SIZE, AOFFSET
  10459. ;;
  10460. LDFPD f126, f127 = [AOFFSET]
  10461. adds AOFFSET = - 30 * SIZE, AOFFSET
  10462. ;;
  10463. FMPY f32 = f72, f64
  10464. FMPY f33 = f73, f64
  10465. ;;
  10466. FMA_C f64 = f73, f65, f32
  10467. FMA_D f65 = f72, f65, f33
  10468. ;;
  10469. FNMA f80 = f74, f64, f80
  10470. FMA_A f81 = f75, f64, f81
  10471. FNMA f96 = f76, f64, f96
  10472. FMA_A f97 = f77, f64, f97
  10473. FNMA f112 = f78, f64, f112
  10474. FMA_A f113 = f79, f64, f113
  10475. ;;
  10476. FMA_B f80 = f75, f65, f80
  10477. FNMA f81 = f74, f65, f81
  10478. FMA_B f96 = f77, f65, f96
  10479. FNMA f97 = f76, f65, f97
  10480. FMA_B f112 = f79, f65, f112
  10481. FNMA f113 = f78, f65, f113
  10482. ;;
  10483. FMPY f32 = f90, f80
  10484. FMPY f33 = f91, f80
  10485. ;;
  10486. FMA_C f80 = f91, f81, f32
  10487. FMA_D f81 = f90, f81, f33
  10488. ;;
  10489. FNMA f96 = f92, f80, f96
  10490. FMA_A f97 = f93, f80, f97
  10491. FNMA f112 = f94, f80, f112
  10492. FMA_A f113 = f95, f80, f113
  10493. ;;
  10494. FMA_B f96 = f93, f81, f96
  10495. FNMA f97 = f92, f81, f97
  10496. FMA_B f112 = f95, f81, f112
  10497. FNMA f113 = f94, f81, f113
  10498. ;;
  10499. FMPY f32 = f108, f96
  10500. FMPY f33 = f109, f96
  10501. ;;
  10502. FMA_C f96 = f109, f97, f32
  10503. FMA_D f97 = f108, f97, f33
  10504. ;;
  10505. FNMA f112 = f110, f96, f112
  10506. FMA_A f113 = f111, f96, f113
  10507. ;;
  10508. FMA_B f112 = f111, f97, f112
  10509. FNMA f113 = f110, f97, f113
  10510. ;;
  10511. FMPY f32 = f126, f112
  10512. FMPY f33 = f127, f112
  10513. ;;
  10514. FMA_C f112 = f127, f113, f32
  10515. FMA_D f113 = f126, f113, f33
  10516. ;;
  10517. #endif
  10518. #ifdef RN
  10519. LDFPD f72, f73 = [BOFFSET]
  10520. ;;
  10521. FMPY f32 = f72, f64
  10522. FMPY f33 = f73, f64
  10523. FMPY f34 = f72, f80
  10524. FMPY f35 = f73, f80
  10525. FMPY f36 = f72, f96
  10526. FMPY f37 = f73, f96
  10527. FMPY f38 = f72, f112
  10528. FMPY f39 = f73, f112
  10529. ;;
  10530. FMA_C f64 = f73, f65, f32
  10531. FMA_D f65 = f72, f65, f33
  10532. FMA_C f80 = f73, f81, f34
  10533. FMA_D f81 = f72, f81, f35
  10534. FMA_C f96 = f73, f97, f36
  10535. FMA_D f97 = f72, f97, f37
  10536. FMA_C f112 = f73, f113, f38
  10537. FMA_D f113 = f72, f113, f39
  10538. ;;
  10539. #endif
  10540. #ifdef RT
  10541. LDFPD f72, f73 = [BOFFSET]
  10542. ;;
  10543. FMPY f32 = f72, f64
  10544. FMPY f33 = f73, f64
  10545. FMPY f34 = f72, f80
  10546. FMPY f35 = f73, f80
  10547. FMPY f36 = f72, f96
  10548. FMPY f37 = f73, f96
  10549. FMPY f38 = f72, f112
  10550. FMPY f39 = f73, f112
  10551. ;;
  10552. FMA_C f64 = f73, f65, f32
  10553. FMA_D f65 = f72, f65, f33
  10554. FMA_C f80 = f73, f81, f34
  10555. FMA_D f81 = f72, f81, f35
  10556. FMA_C f96 = f73, f97, f36
  10557. FMA_D f97 = f72, f97, f37
  10558. FMA_C f112 = f73, f113, f38
  10559. FMA_D f113 = f72, f113, f39
  10560. ;;
  10561. #endif
  10562. #if defined(LN) || defined(LT)
  10563. adds BOFFSET2 = 4 * SIZE, BOFFSET
  10564. ;;
  10565. STFD [BOFFSET] = f64, SIZE
  10566. STFD [BOFFSET2] = f96, SIZE
  10567. ;;
  10568. STFD [BOFFSET] = f65, SIZE
  10569. STFD [BOFFSET2] = f97, SIZE
  10570. ;;
  10571. STFD [BOFFSET] = f80, SIZE
  10572. STFD [BOFFSET2] = f112, SIZE
  10573. ;;
  10574. STFD [BOFFSET] = f81, 5 * SIZE
  10575. STFD [BOFFSET2] = f113, 5 * SIZE
  10576. ;;
  10577. adds BOFFSET = - 8 * SIZE, BOFFSET
  10578. ;;
  10579. #else
  10580. adds AOFFSET2 = 4 * SIZE, AOFFSET
  10581. ;;
  10582. STFD [AOFFSET] = f64, SIZE
  10583. STFD [AOFFSET2] = f96, SIZE
  10584. ;;
  10585. STFD [AOFFSET] = f65, SIZE
  10586. STFD [AOFFSET2] = f97, SIZE
  10587. ;;
  10588. STFD [AOFFSET] = f80, SIZE
  10589. STFD [AOFFSET2] = f112, SIZE
  10590. ;;
  10591. STFD [AOFFSET] = f81, 5 * SIZE
  10592. STFD [AOFFSET2] = f113, 5 * SIZE
  10593. ;;
  10594. adds AOFFSET = - 8 * SIZE, AOFFSET
  10595. ;;
  10596. #endif
  10597. #ifdef LN
  10598. adds C1 = -8 * SIZE, C1
  10599. adds C5 = -8 * SIZE, C5
  10600. #endif
  10601. ;;
  10602. STFD [C1 ] = f64, SIZE
  10603. STFD [C5 ] = f96, SIZE
  10604. ;;
  10605. STFD [C1 ] = f65, SIZE
  10606. STFD [C5 ] = f97, SIZE
  10607. ;;
  10608. STFD [C1 ] = f80, SIZE
  10609. STFD [C5 ] = f112, SIZE
  10610. ;;
  10611. STFD [C1 ] = f81, 5 * SIZE
  10612. STFD [C5 ] = f113, 5 * SIZE
  10613. ;;
  10614. mov f64 = f0
  10615. mov f65 = f0
  10616. mov f80 = f0
  10617. mov f81 = f0
  10618. mov f96 = f0
  10619. mov f97 = f0
  10620. mov f112 = f0
  10621. mov f113 = f0
  10622. ;;
  10623. #ifdef LN
  10624. adds C1 = -8 * SIZE, C1
  10625. adds C5 = -8 * SIZE, C5
  10626. #endif
  10627. ;;
  10628. cmp.ne p6, p0 = 1, I
  10629. ;;
  10630. adds I = -1, I
  10631. ;;
  10632. shladd r2 = K, ZBASE_SHIFT, r0
  10633. ;;
  10634. sub L = K, KK
  10635. ;;
  10636. #ifdef RT
  10637. shladd AORIG = r2, 2, AORIG
  10638. #endif
  10639. ;;
  10640. #if defined(LT) || defined(RN)
  10641. shladd L = L, ZBASE_SHIFT, r0
  10642. ;;
  10643. shladd AOFFSET = L, 2, AOFFSET
  10644. add BOFFSET = L, BOFFSET
  10645. #endif
  10646. ;;
  10647. #ifdef LT
  10648. adds KK = 4, KK
  10649. #elif defined LN
  10650. adds KK = -4, KK
  10651. #else
  10652. nop __LINE__
  10653. #endif
  10654. ;;
  10655. #if defined(LT) || defined(RN)
  10656. mov L = KK
  10657. #else
  10658. sub L = K, KK
  10659. #endif
  10660. ;;
  10661. (p6) br.cond.dptk .L092
  10662. ;;
  10663. .align 16
  10664. .L119:
  10665. #ifdef LN
  10666. shladd KK8 = K, ZBASE_SHIFT, r0
  10667. ;;
  10668. add B = KK8, B
  10669. #endif
  10670. #if defined(LT) || defined(RN)
  10671. mov B = BOFFSET
  10672. #endif
  10673. #ifdef RN
  10674. adds KK = 1, KK
  10675. #endif
  10676. #ifdef RT
  10677. adds KK = -1, KK
  10678. #endif
  10679. ;;
  10680. { .mmi
  10681. mov AOFFSET = A
  10682. nop __LINE__
  10683. }
  10684. ;;
  10685. .align 16
  10686. .L999:
  10687. { .mii
  10688. nop __LINE__
  10689. mov ar.lc = ARLC
  10690. mov pr = PR, -1
  10691. }
  10692. { .mib
  10693. nop __LINE__
  10694. #ifdef TRMMKERNEL
  10695. mov ar.pfs = ARPFS
  10696. #else
  10697. nop __LINE__
  10698. #endif
  10699. br.ret.sptk.many b0
  10700. }
  10701. EPILOGUE