You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

trsm_kernel_RT.S 279 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102141031410414105141061410714108141091411014111141121411314114141151411614117141181411914120141211412214123141241412514126141271412814129141301413114132141331413414135141361413714138141391414014141141421414314144141451414614147141481414914150141511415214153141541415514156141571415814159141601416114162141631416414165141661416714168141691417014171141721417314174141751417614177141781417914180141811418214183141841418514186141871418814189141901419114192141931419414195141961419714198141991420014201142021420314204142051420614207142081420914210142111421214213142141421514216142171421814219142201422114222142231422414225142261422714228142291423014231142321423314234142351423614237142381423914240142411424214243142441424514246142471424814249142501425114252142531425414255142561425714258142591426014261142621426314264142651426614267142681426914270142711427214273142741427514276142771427814279142801428114282142831428414285142861428714288142891429014291142921429314294142951429614297142981429914300143011430214303143041430514306143071430814309143101431114312143131431414315143161431714318143191432014321143221432314324143251432614327143281432914330143311433214333143341433514336143371433814339143401434114342143431434414345143461434714348143491435014351143521435314354143551435614357143581435914360143611436214363143641436514366143671436814369143701437114372143731437414375143761437714378143791438014381143821438314384143851438614387143881438914390143911439214393143941439514396143971439814399144001440114402144031440414405144061440714408144091441014411144121441314414144151441614417144181441914420144211442214423144241442514426144271442814429144301443114432144331443414435144361443714438144391444014441144421444314444144451444614447144481444914450144511445214453144541445514456144571445814459144601446114462144631446414465144661446714468144691447014471144721447314474144751447614477144781447914480144811448214483144841448514486144871448814489144901449114492144931449414495144961449714498144991450014501145021450314504145051450614507145081450914510145111451214513145141451514516145171451814519145201452114522145231452414525145261452714528145291453014531145321453314534145351453614537145381453914540145411454214543145441454514546145471454814549145501455114552145531455414555145561455714558145591456014561145621456314564145651456614567145681456914570145711457214573145741457514576145771457814579145801458114582145831458414585145861458714588145891459014591145921459314594145951459614597145981459914600146011460214603146041460514606146071460814609146101461114612146131461414615146161461714618146191462014621146221462314624146251462614627146281462914630146311463214633146341463514636146371463814639146401464114642146431464414645146461464714648146491465014651146521465314654146551465614657146581465914660146611466214663146641466514666146671466814669146701467114672146731467414675146761467714678146791468014681146821468314684146851468614687146881468914690146911469214693146941469514696146971469814699147001470114702147031470414705147061470714708147091471014711147121471314714147151471614717147181471914720147211472214723147241472514726147271472814729147301473114732147331473414735147361473714738147391474014741147421474314744147451474614747147481474914750147511475214753147541475514756147571475814759147601476114762147631476414765147661476714768147691477014771147721477314774147751477614777147781477914780147811478214783147841478514786147871478814789147901479114792147931479414795147961479714798147991480014801148021480314804148051480614807148081480914810148111481214813148141481514816148171481814819148201482114822148231482414825148261482714828148291483014831148321483314834148351483614837148381483914840148411484214843148441484514846148471484814849148501485114852148531485414855148561485714858148591486014861148621486314864148651486614867148681486914870148711487214873148741487514876148771487814879148801488114882148831488414885148861488714888148891489014891148921489314894148951489614897148981489914900149011490214903149041490514906149071490814909149101491114912149131491414915149161491714918149191492014921149221492314924149251492614927149281492914930149311493214933149341493514936149371493814939149401494114942149431494414945149461494714948149491495014951149521495314954149551495614957149581495914960149611496214963149641496514966149671496814969149701497114972149731497414975149761497714978149791498014981149821498314984149851498614987149881498914990149911499214993149941499514996149971499814999150001500115002150031500415005150061500715008150091501015011150121501315014150151501615017150181501915020150211502215023150241502515026150271502815029150301503115032150331503415035150361503715038150391504015041150421504315044150451504615047150481504915050150511505215053150541505515056150571505815059150601506115062150631506415065150661506715068150691507015071150721507315074150751507615077150781507915080150811508215083150841508515086150871508815089150901509115092150931509415095150961509715098150991510015101151021510315104151051510615107151081510915110151111511215113151141511515116151171511815119151201512115122151231512415125151261512715128151291513015131151321513315134151351513615137151381513915140151411514215143151441514515146151471514815149151501515115152151531515415155151561515715158151591516015161151621516315164151651516615167151681516915170151711517215173151741517515176151771517815179151801518115182151831518415185151861518715188151891519015191151921519315194151951519615197151981519915200152011520215203152041520515206152071520815209152101521115212152131521415215152161521715218152191522015221152221522315224152251522615227152281522915230152311523215233152341523515236152371523815239152401524115242152431524415245152461524715248152491525015251152521525315254152551525615257152581525915260152611526215263152641526515266152671526815269152701527115272152731527415275152761527715278152791528015281152821528315284152851528615287152881528915290152911529215293152941529515296152971529815299153001530115302153031530415305153061530715308153091531015311153121531315314153151531615317153181531915320153211532215323153241532515326153271532815329153301533115332153331533415335153361533715338153391534015341153421534315344153451534615347153481534915350153511535215353153541535515356153571535815359153601536115362153631536415365153661536715368153691537015371153721537315374153751537615377153781537915380153811538215383153841538515386153871538815389153901539115392153931539415395153961539715398153991540015401154021540315404154051540615407154081540915410154111541215413154141541515416154171541815419154201542115422154231542415425154261542715428154291543015431154321543315434154351543615437154381543915440154411544215443154441544515446154471544815449154501545115452154531545415455154561545715458154591546015461154621546315464154651546615467154681546915470154711547215473154741547515476154771547815479154801548115482154831548415485154861548715488154891549015491154921549315494154951549615497154981549915500155011550215503155041550515506155071550815509155101551115512155131551415515155161551715518155191552015521155221552315524155251552615527155281552915530155311553215533155341553515536155371553815539155401554115542155431554415545155461554715548155491555015551155521555315554155551555615557155581555915560155611556215563155641556515566155671556815569155701557115572155731557415575155761557715578155791558015581155821558315584155851558615587155881558915590155911559215593155941559515596155971559815599156001560115602156031560415605156061560715608156091561015611156121561315614156151561615617156181561915620156211562215623156241562515626156271562815629156301563115632156331563415635156361563715638156391564015641156421564315644156451564615647156481564915650156511565215653156541565515656156571565815659156601566115662156631566415665156661566715668156691567015671156721567315674156751567615677156781567915680156811568215683156841568515686156871568815689156901569115692156931569415695156961569715698156991570015701157021570315704157051570615707157081570915710157111571215713157141571515716157171571815719157201572115722157231572415725157261572715728157291573015731157321573315734157351573615737157381573915740157411574215743157441574515746157471574815749157501575115752157531575415755157561575715758157591576015761157621576315764157651576615767157681576915770157711577215773157741577515776157771577815779157801578115782157831578415785157861578715788157891579015791157921579315794157951579615797157981579915800158011580215803158041580515806158071580815809158101581115812158131581415815158161581715818158191582015821158221582315824158251582615827158281582915830158311583215833158341583515836158371583815839158401584115842158431584415845158461584715848158491585015851158521585315854158551585615857158581585915860158611586215863158641586515866158671586815869158701587115872158731587415875158761587715878158791588015881158821588315884158851588615887158881588915890158911589215893158941589515896158971589815899159001590115902159031590415905159061590715908159091591015911159121591315914159151591615917159181591915920159211592215923159241592515926159271592815929159301593115932159331593415935159361593715938159391594015941159421594315944159451594615947159481594915950159511595215953159541595515956159571595815959159601596115962159631596415965159661596715968159691597015971159721597315974159751597615977159781597915980159811598215983159841598515986159871598815989159901599115992159931599415995159961599715998159991600016001160021600316004160051600616007160081600916010160111601216013160141601516016160171601816019160201602116022160231602416025160261602716028160291603016031160321603316034160351603616037160381603916040160411604216043160441604516046160471604816049160501605116052160531605416055160561605716058160591606016061160621606316064160651606616067160681606916070160711607216073160741607516076160771607816079160801608116082160831608416085160861608716088160891609016091160921609316094160951609616097160981609916100161011610216103161041610516106161071610816109161101611116112161131611416115161161611716118161191612016121161221612316124161251612616127161281612916130161311613216133161341613516136161371613816139161401614116142161431614416145161461614716148161491615016151161521615316154161551615616157161581615916160161611616216163161641616516166161671616816169161701617116172161731617416175161761617716178161791618016181161821618316184161851618616187161881618916190161911619216193161941619516196161971619816199162001620116202162031620416205162061620716208162091621016211162121621316214162151621616217162181621916220162211622216223162241622516226162271622816229162301623116232162331623416235162361623716238162391624016241162421624316244162451624616247162481624916250162511625216253162541625516256162571625816259162601626116262162631626416265162661626716268162691627016271162721627316274162751627616277162781627916280162811628216283162841628516286162871628816289162901629116292162931629416295162961629716298162991630016301163021630316304163051630616307163081630916310163111631216313163141631516316163171631816319163201632116322163231632416325163261632716328163291633016331163321633316334163351633616337163381633916340163411634216343163441634516346163471634816349163501635116352163531635416355163561635716358163591636016361163621636316364163651636616367163681636916370163711637216373163741637516376163771637816379163801638116382163831638416385163861638716388163891639016391163921639316394163951639616397163981639916400164011640216403164041640516406164071640816409164101641116412164131641416415164161641716418164191642016421164221642316424164251642616427164281642916430164311643216433164341643516436164371643816439164401644116442164431644416445164461644716448164491645016451164521645316454164551645616457164581645916460164611646216463164641646516466164671646816469164701647116472164731647416475164761647716478164791648016481164821648316484164851648616487164881648916490164911649216493164941649516496164971649816499165001650116502165031650416505165061650716508165091651016511165121651316514165151651616517165181651916520165211652216523165241652516526165271652816529165301653116532165331653416535165361653716538165391654016541165421654316544165451654616547165481654916550165511655216553165541655516556165571655816559165601656116562165631656416565165661656716568165691657016571165721657316574165751657616577165781657916580165811658216583165841658516586165871658816589165901659116592165931659416595165961659716598165991660016601166021660316604166051660616607166081660916610166111661216613166141661516616166171661816619166201662116622166231662416625166261662716628166291663016631166321663316634166351663616637166381663916640166411664216643166441664516646166471664816649166501665116652166531665416655166561665716658166591666016661166621666316664166651666616667166681666916670166711667216673166741667516676166771667816679166801668116682166831668416685166861668716688
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #define ASSEMBLER
  39. #include "common.h"
  40. #ifdef DOUBLE
  41. #define PREFETCHSIZE (16 * 8)
  42. #else
  43. #define PREFETCHSIZE (32 * 4)
  44. #endif
  45. #ifndef LN
  46. #define CPREFETCHSIZE 8
  47. #else
  48. #define CPREFETCHSIZE -8
  49. #endif
  50. #define CPREFETCH lfetch.excl.nt1
  51. #define M r32
  52. #define N r33
  53. #define K r34
  54. #define A r36
  55. #define B r37
  56. #define C r38
  57. #define LDC r39
  58. #define I r15
  59. #define J r16
  60. #define AOFFSET r17
  61. #define BOFFSET r18
  62. #define TEMP r19
  63. #define L r20
  64. #define C1 r21
  65. #define C2 r22
  66. #define C3 r23
  67. #define C4 r24
  68. #define C5 r25
  69. #define C6 r26
  70. #define C7 r27
  71. #define C8 r28
  72. #define C9 loc0
  73. #define C10 loc1
  74. #define C11 loc2
  75. #define C12 loc3
  76. #define C13 loc4
  77. #define C14 loc5
  78. #define C15 loc6
  79. #define C16 loc7
  80. #define PREA r8
  81. #define PREB r9
  82. #define PREC r10
  83. #define SP r12
  84. #define ARLC r29
  85. #define PR r30
  86. #define ARPFS r31
  87. #define ALPHA f8
  88. #define AORIG loc8
  89. #define KK loc9
  90. #define KK8 loc10
  91. #define OFFSET loc11
  92. #define AOFFSET2 loc12
  93. #define BOFFSET2 loc13
  94. PROLOGUE
  95. .prologue
  96. PROFCODE
  97. { .mmi
  98. .save ar.pfs, ARPFS
  99. alloc ARPFS = ar.pfs, 8, 16, 0, 0
  100. adds r14 = 16, SP
  101. mov ARLC = ar.lc
  102. }
  103. { .mmi
  104. adds r8 = -6 * 16, SP
  105. adds r9 = -5 * 16, SP
  106. adds SP = -6 * 16, SP
  107. }
  108. ;;
  109. { .mmi
  110. stf.spill [r8] = f16, 32
  111. stf.spill [r9] = f17, 32
  112. mov PR = pr
  113. }
  114. ;;
  115. { .mmi
  116. stf.spill [r8] = f18, 32
  117. stf.spill [r9] = f19, 32
  118. nop __LINE__
  119. }
  120. ;;
  121. { .mmi
  122. stf.spill [r8] = f20
  123. stf.spill [r9] = f21
  124. shladd LDC = LDC, BASE_SHIFT, r0
  125. }
  126. ;;
  127. .body
  128. { .mmi
  129. ld8 OFFSET = [r14]
  130. mov AOFFSET = A
  131. }
  132. ;;
  133. #ifdef LN
  134. { .mmi
  135. setf.sig f32 = M
  136. setf.sig f33 = K
  137. shladd C = M, BASE_SHIFT, C
  138. }
  139. ;;
  140. {.mmf
  141. nop __LINE__
  142. nop __LINE__
  143. xmpy.l f32 = f32, f33
  144. }
  145. ;;
  146. { .mmi
  147. getf.sig r2 = f32
  148. ;;
  149. nop __LINE__
  150. shladd A = r2, BASE_SHIFT, A
  151. }
  152. ;;
  153. #endif
  154. #ifdef RN
  155. sub KK = r0, OFFSET
  156. #endif
  157. #ifdef RT
  158. { .mmi
  159. setf.sig f32 = N
  160. setf.sig f33 = K
  161. nop __LINE__
  162. }
  163. ;;
  164. { .mmi
  165. setf.sig f34 = LDC
  166. nop __LINE__
  167. nop __LINE__
  168. }
  169. ;;
  170. { .mmf
  171. nop __LINE__
  172. nop __LINE__
  173. xmpy.l f33 = f32, f33
  174. }
  175. { .mmf
  176. nop __LINE__
  177. sub KK = N, OFFSET
  178. xmpy.l f34 = f32, f34
  179. }
  180. ;;
  181. { .mmi
  182. getf.sig r2 = f33
  183. getf.sig r3 = f34
  184. }
  185. ;;
  186. shladd B = r2, BASE_SHIFT, B
  187. add C = r3, C
  188. #endif
  189. ;;
  190. .L130:
  191. tbit.z p6, p0 = N, 0
  192. (p6) br.cond.dpnt .L090
  193. ;;
  194. #ifdef RT
  195. { .mmi
  196. nop __LINE__
  197. shl r2 = K, BASE_SHIFT
  198. }
  199. ;;
  200. { .mmi
  201. sub B = B, r2
  202. sub C = C, LDC
  203. nop __LINE__
  204. }
  205. #endif
  206. ;;
  207. mov f64 = f0
  208. mov f65 = f0
  209. mov f66 = f0
  210. mov f67 = f0
  211. mov f68 = f0
  212. mov f69 = f0
  213. mov f70 = f0
  214. mov f71 = f0
  215. ;;
  216. { .mfi
  217. shr I = M, 3
  218. }
  219. { .mfi
  220. mov C1 = C // coffset1 = c + 0 * ldc
  221. #ifdef LN
  222. add KK = M, OFFSET
  223. #elif defined LT
  224. mov KK = OFFSET
  225. #else
  226. nop __LINE__
  227. #endif
  228. }
  229. ;;
  230. { .mmf
  231. cmp.eq p6, p7 = 0, I
  232. #if defined(LN) || defined(RT)
  233. mov AORIG = A
  234. #else
  235. mov AOFFSET = A
  236. #endif
  237. }
  238. ;;
  239. { .mfi
  240. #ifndef RT
  241. add C = C, LDC // coffset += 8 * ldc
  242. #else
  243. nop __LINE__
  244. #endif
  245. #if defined(LT) || defined(RN)
  246. mov L = KK
  247. #else
  248. sub L = K, KK
  249. #endif
  250. }{ .mfb
  251. (p6) br.cond.dpnt .L140
  252. }
  253. ;;
  254. .align 16
  255. .L132:
  256. { .mmi
  257. cmp.ne p7, p0 = r0, L
  258. adds BOFFSET = 0 * SIZE, B
  259. shl r2 = K, 3 + BASE_SHIFT
  260. }
  261. ;;
  262. #if defined(LT) || defined(RN)
  263. { .mmi
  264. (p7) LDFD f48 = [BOFFSET], 1 * SIZE
  265. nop __LINE__
  266. nop __LINE__
  267. }
  268. ;;
  269. #else
  270. { .mfi
  271. shladd BOFFSET = KK, BASE_SHIFT, B
  272. #ifdef LN
  273. sub AORIG = AORIG, r2
  274. #else
  275. nop __LINE__
  276. #endif
  277. }
  278. ;;
  279. { .mfi
  280. (p7) LDFD f48 = [BOFFSET], 1 * SIZE
  281. shladd AOFFSET = r3, 3, AORIG
  282. }
  283. ;;
  284. #endif
  285. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  286. ;;
  287. { .mmf
  288. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  289. }
  290. ;;
  291. { .mmf
  292. (p7) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  293. }
  294. { .mfi
  295. cmp.eq p3, p0 = r0, r0
  296. }
  297. ;;
  298. { .mmf
  299. (p7) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  300. }
  301. { .mfi
  302. adds PREC = CPREFETCHSIZE * SIZE, C1
  303. }
  304. ;;
  305. { .mmf
  306. CPREFETCH [PREC]
  307. }
  308. { .mfi
  309. adds L = 1, L
  310. }
  311. ;;
  312. { .mfi
  313. adds PREA = (PREFETCHSIZE + 8) * SIZE, AOFFSET
  314. }
  315. ;;
  316. { .mfi
  317. adds PREB = (PREFETCHSIZE - 8) * SIZE, BOFFSET
  318. }
  319. ;;
  320. { .mfi
  321. tbit.z p12, p0 = L, 0
  322. }
  323. { .mfi
  324. shr L = L, 1
  325. }
  326. ;;
  327. { .mfi
  328. adds L = -1, L
  329. }
  330. ;;
  331. { .mfi
  332. mov ar.lc = L
  333. }
  334. ;;
  335. { .mfb
  336. cmp.eq p6, p0 = -1, L
  337. (p6) br.cond.dpnt .L138
  338. }
  339. ;;
  340. .align 16
  341. .L133:
  342. { .mfi
  343. lfetch.nt1 [PREA], 16 * SIZE
  344. FMA f64 = f32, f48, f64 // A1 * B1
  345. cmp.ne p4, p5 = 0, L
  346. }
  347. { .mfi
  348. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  349. FMA f65 = f33, f48, f65 // A2 * B1
  350. (p12) cmp.ne p3, p0 = 0, L
  351. }
  352. ;;
  353. { .mfi
  354. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  355. FMA f66 = f34, f48, f66 // A3 * B1
  356. adds C9 = 4 * SIZE, C1
  357. }
  358. { .mmf
  359. (p3) LDFD f56 = [BOFFSET], 1 * SIZE
  360. nop __LINE__
  361. FMA f67 = f35, f48, f67 // A4 * B1
  362. }
  363. ;;
  364. { .mfb
  365. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  366. FMA f68 = f36, f48, f68 // A5 * B1
  367. nop __LINE__
  368. }
  369. { .mfb
  370. nop __LINE__
  371. FMA f69 = f37, f48, f69 // A6 * B1
  372. nop __LINE__
  373. }
  374. ;;
  375. { .mfb
  376. (p3) LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  377. FMA f70 = f38, f48, f70 // A7 * B1
  378. nop __LINE__
  379. }
  380. { .mfb
  381. nop __LINE__
  382. FMA f71 = f39, f48, f71 // A8 * B1
  383. nop __LINE__
  384. }
  385. ;;
  386. { .mfb
  387. (p3) LDFPD f46, f47 = [AOFFSET], 2 * SIZE
  388. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  389. nop __LINE__
  390. }
  391. { .mfb
  392. nop __LINE__
  393. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  394. nop __LINE__
  395. }
  396. ;;
  397. { .mfb
  398. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  399. (p3) FMA f66 = f42, f56, f66 // A3 * B1
  400. nop __LINE__
  401. }
  402. { .mmf
  403. (p4) LDFD f48 = [BOFFSET], 1 * SIZE
  404. nop __LINE__
  405. (p3) FMA f67 = f43, f56, f67 // A4 * B1
  406. }
  407. ;;
  408. { .mfb
  409. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  410. (p3) FMA f68 = f44, f56, f68 // A5 * B1
  411. nop __LINE__
  412. }
  413. { .mfb
  414. nop __LINE__
  415. (p3) FMA f69 = f45, f56, f69 // A6 * B1
  416. nop __LINE__
  417. }
  418. ;;
  419. { .mfi
  420. (p4) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  421. (p3) FMA f70 = f46, f56, f70 // A7 * B1
  422. adds L = -1, L
  423. }
  424. { .mfb
  425. nop __LINE__
  426. (p3) FMA f71 = f47, f56, f71 // A8 * B1
  427. nop __LINE__
  428. }
  429. ;;
  430. { .mfb
  431. (p4) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  432. nop __LINE__
  433. br.cloop.sptk.few .L133
  434. }
  435. ;;
  436. .L138:
  437. #if defined(LN) || defined(RT)
  438. #ifdef LN
  439. adds r2 = -8, KK
  440. #else
  441. adds r2 = -1, KK
  442. #endif
  443. ;;
  444. shladd r2 = r2, BASE_SHIFT, r0
  445. ;;
  446. shladd AOFFSET = r2, 3, AORIG
  447. add BOFFSET = r2, B
  448. ;;
  449. #endif
  450. adds AOFFSET2 = 4 * SIZE, AOFFSET
  451. adds BOFFSET2 = 4 * SIZE, BOFFSET
  452. ;;
  453. #if defined(LN) || defined(LT)
  454. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  455. ;;
  456. LDFPD f34, f35 = [BOFFSET], 2 * SIZE
  457. ;;
  458. LDFPD f36, f37 = [BOFFSET], 2 * SIZE
  459. ;;
  460. LDFPD f38, f39 = [BOFFSET]
  461. adds BOFFSET = -6 * SIZE, BOFFSET
  462. ;;
  463. FSUB f64 = f32, f64
  464. FSUB f65 = f33, f65
  465. FSUB f66 = f34, f66
  466. FSUB f67 = f35, f67
  467. FSUB f68 = f36, f68
  468. FSUB f69 = f37, f69
  469. FSUB f70 = f38, f70
  470. FSUB f71 = f39, f71
  471. ;;
  472. #else
  473. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  474. ;;
  475. LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  476. ;;
  477. LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  478. ;;
  479. LDFPD f38, f39 = [AOFFSET]
  480. adds AOFFSET = -6 * SIZE, AOFFSET
  481. ;;
  482. FSUB f64 = f32, f64
  483. FSUB f65 = f33, f65
  484. FSUB f66 = f34, f66
  485. FSUB f67 = f35, f67
  486. FSUB f68 = f36, f68
  487. FSUB f69 = f37, f69
  488. FSUB f70 = f38, f70
  489. FSUB f71 = f39, f71
  490. ;;
  491. #endif
  492. #ifdef LN
  493. adds AOFFSET = 62 * SIZE, AOFFSET
  494. ;;
  495. LDFPD f33, f32 = [AOFFSET]
  496. adds AOFFSET = - 2 * SIZE, AOFFSET
  497. ;;
  498. LDFPD f35, f34 = [AOFFSET]
  499. adds AOFFSET = - 2 * SIZE, AOFFSET
  500. ;;
  501. LDFPD f37, f36 = [AOFFSET]
  502. adds AOFFSET = - 2 * SIZE, AOFFSET
  503. ;;
  504. LDFPD f39, f38 = [AOFFSET]
  505. adds AOFFSET = - 2 * SIZE, AOFFSET
  506. ;;
  507. LDFD f40 = [AOFFSET], -2 * SIZE
  508. ;;
  509. LDFPD f42, f41 = [AOFFSET]
  510. adds AOFFSET = - 2 * SIZE, AOFFSET
  511. ;;
  512. LDFPD f44, f43 = [AOFFSET]
  513. adds AOFFSET = - 2 * SIZE, AOFFSET
  514. ;;
  515. LDFPD f46, f45 = [AOFFSET]
  516. adds AOFFSET = - 4 * SIZE, AOFFSET
  517. ;;
  518. LDFPD f48, f47 = [AOFFSET]
  519. adds AOFFSET = - 2 * SIZE, AOFFSET
  520. ;;
  521. LDFPD f50, f49 = [AOFFSET]
  522. adds AOFFSET = - 2 * SIZE, AOFFSET
  523. ;;
  524. LDFPD f52, f51 = [AOFFSET]
  525. adds AOFFSET = - 4 * SIZE, AOFFSET
  526. ;;
  527. LDFD f53 = [AOFFSET], -2 * SIZE
  528. ;;
  529. LDFPD f55, f54 = [AOFFSET]
  530. adds AOFFSET = - 2 * SIZE, AOFFSET
  531. ;;
  532. LDFPD f57, f56 = [AOFFSET]
  533. adds AOFFSET = - 6 * SIZE, AOFFSET
  534. ;;
  535. LDFPD f59, f58 = [AOFFSET]
  536. adds AOFFSET = - 2 * SIZE, AOFFSET
  537. ;;
  538. LDFPD f61, f60 = [AOFFSET]
  539. adds AOFFSET = - 6 * SIZE, AOFFSET
  540. ;;
  541. LDFD f16 = [AOFFSET], -2 * SIZE
  542. ;;
  543. LDFPD f18, f17 = [AOFFSET]
  544. adds AOFFSET = - 8 * SIZE, AOFFSET
  545. ;;
  546. LDFPD f20, f19 = [AOFFSET]
  547. adds AOFFSET = - 8 * SIZE, AOFFSET
  548. ;;
  549. LDFD f21 = [AOFFSET]
  550. ;;
  551. FMPY f71 = f71, f32
  552. ;;
  553. FNMA f70 = f71, f33, f70
  554. ;;
  555. FNMA f69 = f71, f34, f69
  556. ;;
  557. FNMA f68 = f71, f35, f68
  558. ;;
  559. FNMA f67 = f71, f36, f67
  560. ;;
  561. FNMA f66 = f71, f37, f66
  562. ;;
  563. FNMA f65 = f71, f38, f65
  564. ;;
  565. FNMA f64 = f71, f39, f64
  566. ;;
  567. FMPY f70 = f70, f40
  568. ;;
  569. FNMA f69 = f70, f41, f69
  570. ;;
  571. FNMA f68 = f70, f42, f68
  572. ;;
  573. FNMA f67 = f70, f43, f67
  574. ;;
  575. FNMA f66 = f70, f44, f66
  576. ;;
  577. FNMA f65 = f70, f45, f65
  578. ;;
  579. FNMA f64 = f70, f46, f64
  580. ;;
  581. FMPY f69 = f69, f47
  582. ;;
  583. FNMA f68 = f69, f48, f68
  584. ;;
  585. FNMA f67 = f69, f49, f67
  586. ;;
  587. FNMA f66 = f69, f50, f66
  588. ;;
  589. FNMA f65 = f69, f51, f65
  590. ;;
  591. FNMA f64 = f69, f52, f64
  592. ;;
  593. FMPY f68 = f68, f53
  594. ;;
  595. FNMA f67 = f68, f54, f67
  596. ;;
  597. FNMA f66 = f68, f55, f66
  598. ;;
  599. FNMA f65 = f68, f56, f65
  600. ;;
  601. FNMA f64 = f68, f57, f64
  602. ;;
  603. FMPY f67 = f67, f58
  604. ;;
  605. FNMA f66 = f67, f59, f66
  606. ;;
  607. FNMA f65 = f67, f60, f65
  608. ;;
  609. FNMA f64 = f67, f61, f64
  610. ;;
  611. FMPY f66 = f66, f16
  612. ;;
  613. FNMA f65 = f66, f17, f65
  614. ;;
  615. FNMA f64 = f66, f18, f64
  616. ;;
  617. FMPY f65 = f65, f19
  618. ;;
  619. FNMA f64 = f65, f20, f64
  620. ;;
  621. FMPY f64 = f64, f21
  622. ;;
  623. STFD [BOFFSET] = f64, SIZE
  624. STFD [BOFFSET2] = f68, SIZE
  625. ;;
  626. STFD [BOFFSET] = f65, SIZE
  627. STFD [BOFFSET2] = f69, SIZE
  628. ;;
  629. STFD [BOFFSET] = f66, SIZE
  630. STFD [BOFFSET2] = f70, SIZE
  631. ;;
  632. STFD [BOFFSET] = f67, - 3 * SIZE
  633. STFD [BOFFSET2] = f71, - 3 * SIZE
  634. ;;
  635. adds C1 = -8 * SIZE, C1
  636. ;;
  637. #endif
  638. #ifdef LT
  639. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  640. ;;
  641. LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  642. ;;
  643. LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  644. ;;
  645. LDFPD f38, f39 = [AOFFSET]
  646. adds AOFFSET = 3 * SIZE, AOFFSET
  647. ;;
  648. LDFD f40 = [AOFFSET], 1 * SIZE
  649. ;;
  650. LDFPD f41, f42 = [AOFFSET], 2 * SIZE
  651. ;;
  652. LDFPD f43, f44 = [AOFFSET], 2 * SIZE
  653. ;;
  654. LDFPD f45, f46 = [AOFFSET]
  655. adds AOFFSET = 4 * SIZE, AOFFSET
  656. ;;
  657. LDFPD f47, f48 = [AOFFSET], 2 * SIZE
  658. ;;
  659. LDFPD f49, f50 = [AOFFSET], 2 * SIZE
  660. ;;
  661. LDFPD f51, f52 = [AOFFSET]
  662. adds AOFFSET = 5 * SIZE, AOFFSET
  663. ;;
  664. LDFD f53 = [AOFFSET], 1 * SIZE
  665. ;;
  666. LDFPD f54, f55 = [AOFFSET], 2 * SIZE
  667. ;;
  668. LDFPD f56, f57 = [AOFFSET]
  669. adds AOFFSET = 6 * SIZE, AOFFSET
  670. ;;
  671. LDFPD f58, f59 = [AOFFSET], 2 * SIZE
  672. ;;
  673. LDFPD f60, f61 = [AOFFSET]
  674. adds AOFFSET = 7 * SIZE, AOFFSET
  675. ;;
  676. LDFD f16 = [AOFFSET], 1 * SIZE
  677. ;;
  678. LDFPD f17, f18 = [AOFFSET]
  679. adds AOFFSET = 8 * SIZE, AOFFSET
  680. ;;
  681. LDFPD f19, f20 = [AOFFSET]
  682. adds AOFFSET = 9 * SIZE, AOFFSET
  683. ;;
  684. LDFD f21 = [AOFFSET]
  685. adds AOFFSET = -63 * SIZE, AOFFSET
  686. ;;
  687. FMPY f64 = f64, f32
  688. ;;
  689. FNMA f65 = f64, f33, f65
  690. ;;
  691. FNMA f66 = f64, f34, f66
  692. ;;
  693. FNMA f67 = f64, f35, f67
  694. ;;
  695. FNMA f68 = f64, f36, f68
  696. ;;
  697. FNMA f69 = f64, f37, f69
  698. ;;
  699. FNMA f70 = f64, f38, f70
  700. ;;
  701. FNMA f71 = f64, f39, f71
  702. ;;
  703. FMPY f65 = f65, f40
  704. ;;
  705. FNMA f66 = f65, f41, f66
  706. ;;
  707. FNMA f67 = f65, f42, f67
  708. ;;
  709. FNMA f68 = f65, f43, f68
  710. ;;
  711. FNMA f69 = f65, f44, f69
  712. ;;
  713. FNMA f70 = f65, f45, f70
  714. ;;
  715. FNMA f71 = f65, f46, f71
  716. ;;
  717. FMPY f66 = f66, f47
  718. ;;
  719. FNMA f67 = f66, f48, f67
  720. ;;
  721. FNMA f68 = f66, f49, f68
  722. ;;
  723. FNMA f69 = f66, f50, f69
  724. ;;
  725. FNMA f70 = f66, f51, f70
  726. ;;
  727. FNMA f71 = f66, f52, f71
  728. ;;
  729. FMPY f67 = f67, f53
  730. ;;
  731. FNMA f68 = f67, f54, f68
  732. ;;
  733. FNMA f69 = f67, f55, f69
  734. ;;
  735. FNMA f70 = f67, f56, f70
  736. ;;
  737. FNMA f71 = f67, f57, f71
  738. ;;
  739. FMPY f68 = f68, f58
  740. ;;
  741. FNMA f69 = f68, f59, f69
  742. ;;
  743. FNMA f70 = f68, f60, f70
  744. ;;
  745. FNMA f71 = f68, f61, f71
  746. ;;
  747. FMPY f69 = f69, f16
  748. ;;
  749. FNMA f70 = f69, f17, f70
  750. ;;
  751. FNMA f71 = f69, f18, f71
  752. ;;
  753. FMPY f70 = f70, f19
  754. ;;
  755. FNMA f71 = f70, f20, f71
  756. ;;
  757. FMPY f71 = f71, f21
  758. ;;
  759. STFD [BOFFSET] = f64, SIZE
  760. STFD [BOFFSET2] = f68, SIZE
  761. ;;
  762. STFD [BOFFSET] = f65, SIZE
  763. STFD [BOFFSET2] = f69, SIZE
  764. ;;
  765. STFD [BOFFSET] = f66, SIZE
  766. STFD [BOFFSET2] = f70, SIZE
  767. ;;
  768. STFD [BOFFSET] = f67, -3 * SIZE
  769. STFD [BOFFSET2] = f71, -3 * SIZE
  770. ;;
  771. adds C9 = 4 * SIZE, C1
  772. ;;
  773. #endif
  774. #ifdef RN
  775. LDFD f32 = [BOFFSET]
  776. ;;
  777. FMPY f64 = f64, f32
  778. FMPY f68 = f68, f32
  779. FMPY f65 = f65, f32
  780. FMPY f69 = f69, f32
  781. FMPY f66 = f66, f32
  782. FMPY f70 = f70, f32
  783. FMPY f67 = f67, f32
  784. FMPY f71 = f71, f32
  785. ;;
  786. STFD [AOFFSET] = f64, SIZE
  787. STFD [AOFFSET2] = f68, SIZE
  788. ;;
  789. STFD [AOFFSET] = f65, SIZE
  790. STFD [AOFFSET2] = f69, SIZE
  791. ;;
  792. STFD [AOFFSET] = f66, SIZE
  793. STFD [AOFFSET2] = f70, SIZE
  794. ;;
  795. STFD [AOFFSET] = f67, -3 * SIZE
  796. STFD [AOFFSET2] = f71, -3 * SIZE
  797. ;;
  798. #endif
  799. #ifdef RT
  800. LDFD f32 = [BOFFSET]
  801. ;;
  802. FMPY f64 = f64, f32
  803. FMPY f68 = f68, f32
  804. FMPY f65 = f65, f32
  805. FMPY f69 = f69, f32
  806. FMPY f66 = f66, f32
  807. FMPY f70 = f70, f32
  808. FMPY f67 = f67, f32
  809. FMPY f71 = f71, f32
  810. ;;
  811. STFD [AOFFSET] = f64, SIZE
  812. STFD [AOFFSET2] = f68, SIZE
  813. ;;
  814. STFD [AOFFSET] = f65, SIZE
  815. STFD [AOFFSET2] = f69, SIZE
  816. ;;
  817. STFD [AOFFSET] = f66, SIZE
  818. STFD [AOFFSET2] = f70, SIZE
  819. ;;
  820. STFD [AOFFSET] = f67, -3 * SIZE
  821. STFD [AOFFSET2] = f71, -3 * SIZE
  822. ;;
  823. #endif
  824. adds C9 = 4 * SIZE, C1
  825. ;;
  826. { .mmf
  827. STFD [C1 ] = f64, SIZE
  828. STFD [C9 ] = f68, SIZE
  829. mov f64 = f0
  830. }
  831. ;;
  832. { .mmi
  833. STFD [C1 ] = f65, SIZE
  834. STFD [C9 ] = f69, SIZE
  835. }
  836. ;;
  837. { .mmi
  838. STFD [C1 ] = f66, SIZE
  839. STFD [C9 ] = f70, SIZE
  840. }
  841. ;;
  842. { .mmi
  843. #ifndef LN
  844. STFD [C1 ] = f67, 5 * SIZE
  845. #else
  846. STFD [C1 ] = f67, - 3 * SIZE
  847. #endif
  848. STFD [C9 ] = f71
  849. }
  850. ;;
  851. { .mmf
  852. cmp.ne p6, p0 = 1, I
  853. }
  854. ;;
  855. adds I = -1, I
  856. ;;
  857. { .mmi
  858. shladd r2 = K, BASE_SHIFT, r0
  859. }
  860. ;;
  861. { .mmi
  862. sub L = K, KK
  863. }
  864. ;;
  865. { .mmi
  866. #ifdef RT
  867. shladd AORIG = r2, 3, AORIG
  868. #else
  869. nop __LINE__
  870. #endif
  871. }
  872. ;;
  873. { .mmi
  874. #if defined(LT) || defined(RN)
  875. shladd L = L, BASE_SHIFT, r0
  876. #else
  877. nop __LINE__
  878. #endif
  879. }
  880. ;;
  881. ;;
  882. { .mmi
  883. #if defined(LT) || defined(RN)
  884. shladd AOFFSET = L, 3, AOFFSET
  885. #else
  886. nop __LINE__
  887. #endif
  888. }
  889. ;;
  890. { .mmi
  891. #if defined(LT) || defined(RN)
  892. add BOFFSET = L, BOFFSET
  893. #else
  894. nop __LINE__
  895. #endif
  896. }
  897. ;;
  898. { .mmi
  899. #ifdef LT
  900. adds KK = 8, KK
  901. #elif defined LN
  902. adds KK = -8, KK
  903. #else
  904. nop __LINE__
  905. #endif
  906. }
  907. ;;
  908. { .mmi
  909. #if defined(LT) || defined(RN)
  910. mov L = KK
  911. #else
  912. sub L = K, KK
  913. #endif
  914. }
  915. ;;
  916. mov f64 = f0
  917. mov f65 = f0
  918. mov f66 = f0
  919. mov f67 = f0
  920. mov f68 = f0
  921. mov f69 = f0
  922. mov f70 = f0
  923. mov f71 = f0
  924. (p6) br.cond.dptk .L132
  925. .align 8
  926. .L140:
  927. tbit.z p6, p7 = M, 2
  928. (p6) br.cond.dptk .L150
  929. ;;
  930. { .mib
  931. #if defined(LT) || defined(RN)
  932. mov L = KK
  933. #else
  934. sub L = K, KK
  935. #endif
  936. }
  937. ;;
  938. { .mmi
  939. cmp.ne p7, p0 = r0, L
  940. adds BOFFSET = 0 * SIZE, B
  941. shl r2 = K, 2 + BASE_SHIFT
  942. }
  943. ;;
  944. #if defined(LT) || defined(RN)
  945. { .mmf
  946. (p7) LDFD f48 = [BOFFSET], 1 * SIZE
  947. mov f65 = f0
  948. }
  949. ;;
  950. #else
  951. { .mfi
  952. shladd BOFFSET = KK, BASE_SHIFT, B
  953. #ifdef LN
  954. sub AORIG = AORIG, r2
  955. #else
  956. nop __LINE__
  957. #endif
  958. }
  959. ;;
  960. { .mfi
  961. (p7) LDFD f48 = [BOFFSET], 1 * SIZE
  962. shladd AOFFSET = r3, 2, AORIG
  963. }
  964. ;;
  965. #endif
  966. { .mfi
  967. adds L = 1, L
  968. }
  969. { .mfi
  970. adds PREA = (PREFETCHSIZE + 8) * SIZE, AOFFSET
  971. cmp.eq p3, p0 = r0, r0
  972. }
  973. ;;
  974. { .mfi
  975. tbit.z p12, p0 = L, 0
  976. }
  977. { .mfi
  978. shr L = L, 1
  979. }
  980. ;;
  981. { .mfi
  982. adds L = -1, L
  983. }
  984. ;;
  985. { .mfi
  986. cmp.eq p6, p0 = -1, L
  987. }
  988. ;;
  989. { .mmf
  990. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  991. }
  992. { .mfi
  993. mov ar.lc = L
  994. }
  995. ;;
  996. { .mmf
  997. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  998. }
  999. { .mfb
  1000. (p6) br.cond.dpnt .L148
  1001. }
  1002. ;;
  1003. .L142:
  1004. { .mfi
  1005. lfetch.nt1 [PREA], 8 * SIZE
  1006. FMA f64 = f32, f48, f64 // A1 * B1
  1007. cmp.ne p4, p5 = 0, L
  1008. }
  1009. { .mfi
  1010. nop __LINE__
  1011. FMA f65 = f33, f48, f65 // A2 * B1
  1012. (p12) cmp.ne p3, p0 = 0, L
  1013. }
  1014. ;;
  1015. { .mfi
  1016. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  1017. FMA f66 = f34, f48, f66 // A3 * B1
  1018. (p5) adds C9 = 2 * SIZE, C1
  1019. }
  1020. { .mmf
  1021. nop __LINE__
  1022. (p3) LDFD f56 = [BOFFSET], 1 * SIZE
  1023. FMA f67 = f35, f48, f67 // A4 * B1
  1024. }
  1025. ;;
  1026. { .mfi
  1027. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  1028. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  1029. (p5) adds C10 = 2 * SIZE, C2
  1030. }
  1031. { .mfb
  1032. nop __LINE__
  1033. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  1034. nop __LINE__
  1035. }
  1036. ;;
  1037. { .mfb
  1038. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  1039. (p3) FMA f66 = f42, f56, f66 // A3 * B1
  1040. nop __LINE__
  1041. }
  1042. { .mmf
  1043. (p4) LDFD f48 = [BOFFSET], 1 * SIZE
  1044. nop __LINE__
  1045. (p3) FMA f67 = f43, f56, f67 // A4 * B1
  1046. }
  1047. ;;
  1048. { .mfi
  1049. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  1050. nop __LINE__
  1051. adds L = -1, L
  1052. }
  1053. { .mfb
  1054. nop __LINE__
  1055. nop.f 0
  1056. br.cloop.sptk.few .L142
  1057. }
  1058. ;;
  1059. .L148:
  1060. #if defined(LN) || defined(RT)
  1061. #ifdef LN
  1062. adds r2 = -4, KK
  1063. #else
  1064. adds r2 = -1, KK
  1065. #endif
  1066. ;;
  1067. shladd r2 = r2, BASE_SHIFT, r0
  1068. ;;
  1069. shladd AOFFSET = r2, 2, AORIG
  1070. add BOFFSET = r2, B
  1071. ;;
  1072. #endif
  1073. adds AOFFSET2 = 4 * SIZE, AOFFSET
  1074. adds BOFFSET2 = 4 * SIZE, BOFFSET
  1075. ;;
  1076. #if defined(LN) || defined(LT)
  1077. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  1078. ;;
  1079. LDFPD f34, f35 = [BOFFSET]
  1080. adds BOFFSET = -2 * SIZE, BOFFSET
  1081. ;;
  1082. FSUB f64 = f32, f64
  1083. FSUB f65 = f33, f65
  1084. FSUB f66 = f34, f66
  1085. FSUB f67 = f35, f67
  1086. ;;
  1087. #else
  1088. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  1089. ;;
  1090. LDFPD f34, f35 = [AOFFSET]
  1091. adds AOFFSET = -2 * SIZE, AOFFSET
  1092. ;;
  1093. FSUB f64 = f32, f64
  1094. FSUB f65 = f33, f65
  1095. FSUB f66 = f34, f66
  1096. FSUB f67 = f35, f67
  1097. ;;
  1098. #endif
  1099. #ifdef LN
  1100. adds AOFFSET = 14 * SIZE, AOFFSET
  1101. ;;
  1102. LDFPD f33, f32 = [AOFFSET]
  1103. adds AOFFSET = - 2 * SIZE, AOFFSET
  1104. ;;
  1105. LDFPD f35, f34 = [AOFFSET]
  1106. adds AOFFSET = - 2 * SIZE, AOFFSET
  1107. ;;
  1108. LDFD f36 = [AOFFSET], - 2 * SIZE
  1109. ;;
  1110. LDFPD f38, f37 = [AOFFSET]
  1111. adds AOFFSET = - 4 * SIZE, AOFFSET
  1112. ;;
  1113. LDFPD f40, f39 = [AOFFSET]
  1114. adds AOFFSET = - 4 * SIZE, AOFFSET
  1115. ;;
  1116. LDFD f41 = [AOFFSET]
  1117. ;;
  1118. FMPY f67 = f67, f32
  1119. ;;
  1120. FNMA f66 = f67, f33, f66
  1121. ;;
  1122. FNMA f65 = f67, f34, f65
  1123. ;;
  1124. FNMA f64 = f67, f35, f64
  1125. ;;
  1126. FMPY f66 = f66, f36
  1127. ;;
  1128. FNMA f65 = f66, f37, f65
  1129. ;;
  1130. FNMA f64 = f66, f38, f64
  1131. ;;
  1132. FMPY f65 = f65, f39
  1133. ;;
  1134. FNMA f64 = f65, f40, f64
  1135. ;;
  1136. FMPY f64 = f64, f41
  1137. ;;
  1138. STFD [BOFFSET] = f64, SIZE
  1139. ;;
  1140. STFD [BOFFSET] = f65, SIZE
  1141. ;;
  1142. STFD [BOFFSET] = f66, SIZE
  1143. ;;
  1144. STFD [BOFFSET] = f67, -3 * SIZE
  1145. ;;
  1146. adds C1 = -4 * SIZE, C1
  1147. ;;
  1148. #endif
  1149. #ifdef LT
  1150. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  1151. ;;
  1152. LDFPD f34, f35 = [AOFFSET]
  1153. adds AOFFSET = 3 * SIZE, AOFFSET
  1154. ;;
  1155. LDFD f36 = [AOFFSET], 1 * SIZE
  1156. ;;
  1157. LDFPD f37, f38 = [AOFFSET]
  1158. adds AOFFSET = 4 * SIZE, AOFFSET
  1159. ;;
  1160. LDFPD f39, f40 = [AOFFSET]
  1161. adds AOFFSET = 5 * SIZE, AOFFSET
  1162. ;;
  1163. LDFD f41 = [AOFFSET], -15 * SIZE
  1164. ;;
  1165. FMPY f64 = f64, f32
  1166. ;;
  1167. FNMA f65 = f64, f33, f65
  1168. ;;
  1169. FNMA f66 = f64, f34, f66
  1170. ;;
  1171. FNMA f67 = f64, f35, f67
  1172. ;;
  1173. FMPY f65 = f65, f36
  1174. ;;
  1175. FNMA f66 = f65, f37, f66
  1176. ;;
  1177. FNMA f67 = f65, f38, f67
  1178. ;;
  1179. FMPY f66 = f66, f39
  1180. ;;
  1181. FNMA f67 = f66, f40, f67
  1182. ;;
  1183. FMPY f67 = f67, f41
  1184. ;;
  1185. STFD [BOFFSET] = f64, SIZE
  1186. ;;
  1187. STFD [BOFFSET] = f65, SIZE
  1188. ;;
  1189. STFD [BOFFSET] = f66, SIZE
  1190. ;;
  1191. STFD [BOFFSET] = f67, -3 * SIZE
  1192. ;;
  1193. #endif
  1194. #ifdef RN
  1195. LDFD f32 = [BOFFSET]
  1196. ;;
  1197. FMPY f64 = f64, f32
  1198. FMPY f65 = f65, f32
  1199. FMPY f66 = f66, f32
  1200. FMPY f67 = f67, f32
  1201. ;;
  1202. STFD [AOFFSET] = f64, SIZE
  1203. ;;
  1204. STFD [AOFFSET] = f65, SIZE
  1205. ;;
  1206. STFD [AOFFSET] = f66, SIZE
  1207. ;;
  1208. STFD [AOFFSET] = f67, -3 * SIZE
  1209. ;;
  1210. #endif
  1211. #ifdef RT
  1212. LDFD f32 = [BOFFSET]
  1213. ;;
  1214. FMPY f64 = f64, f32
  1215. FMPY f65 = f65, f32
  1216. FMPY f66 = f66, f32
  1217. FMPY f67 = f67, f32
  1218. ;;
  1219. STFD [AOFFSET] = f64, SIZE
  1220. ;;
  1221. STFD [AOFFSET] = f65, SIZE
  1222. ;;
  1223. STFD [AOFFSET] = f66, SIZE
  1224. ;;
  1225. STFD [AOFFSET] = f67, - 3 * SIZE
  1226. ;;
  1227. #endif
  1228. { .mmf
  1229. STFD [C1 ] = f64, SIZE
  1230. mov f64 = f0
  1231. }
  1232. ;;
  1233. { .mmi
  1234. STFD [C1 ] = f65, SIZE
  1235. }
  1236. ;;
  1237. { .mmi
  1238. STFD [C1 ] = f66, SIZE
  1239. }
  1240. ;;
  1241. { .mmi
  1242. #ifndef LN
  1243. STFD [C1 ] = f67, SIZE
  1244. #else
  1245. STFD [C1 ] = f67, - 3 * SIZE
  1246. #endif
  1247. }
  1248. ;;
  1249. { .mmf
  1250. mov f72 = f0
  1251. }
  1252. ;;
  1253. mov f65 = f0
  1254. mov f73 = f0
  1255. mov f66 = f0
  1256. mov f74 = f0
  1257. mov f67 = f0
  1258. mov f75 = f0
  1259. ;;
  1260. shladd r2 = K, BASE_SHIFT, r0
  1261. ;;
  1262. { .mmi
  1263. sub L = K, KK
  1264. }
  1265. ;;
  1266. { .mmi
  1267. #ifdef RT
  1268. shladd AORIG = r2, 2, AORIG
  1269. #else
  1270. nop __LINE__
  1271. #endif
  1272. }
  1273. ;;
  1274. { .mmi
  1275. #if defined(LT) || defined(RN)
  1276. shladd L = L, BASE_SHIFT, r0
  1277. #else
  1278. nop __LINE__
  1279. #endif
  1280. }
  1281. ;;
  1282. { .mmi
  1283. #if defined(LT) || defined(RN)
  1284. shladd AOFFSET = L, 2, AOFFSET
  1285. #else
  1286. nop __LINE__
  1287. #endif
  1288. }
  1289. ;;
  1290. { .mmi
  1291. #if defined(LT) || defined(RN)
  1292. add BOFFSET = L, BOFFSET
  1293. #else
  1294. nop __LINE__
  1295. #endif
  1296. }
  1297. ;;
  1298. { .mmi
  1299. #ifdef LT
  1300. adds KK = 4, KK
  1301. #elif defined LN
  1302. adds KK = -4, KK
  1303. #else
  1304. nop __LINE__
  1305. #endif
  1306. }
  1307. ;;
  1308. { .mmi
  1309. #if defined(LT) || defined(RN)
  1310. mov L = KK
  1311. #else
  1312. sub L = K, KK
  1313. #endif
  1314. }
  1315. ;;
  1316. .align 8
  1317. .L150:
  1318. tbit.z p6, p7 = M, 1
  1319. (p6) br.cond.dptk .L160
  1320. ;;
  1321. { .mib
  1322. #if defined(LT) || defined(RN)
  1323. mov L = KK
  1324. #else
  1325. sub L = K, KK
  1326. #endif
  1327. }
  1328. ;;
  1329. { .mmi
  1330. cmp.ne p7, p0 = r0, L
  1331. adds BOFFSET = 0 * SIZE, B
  1332. shl r2 = K, 1 + BASE_SHIFT
  1333. }
  1334. ;;
  1335. #if defined(LT) || defined(RN)
  1336. { .mmf
  1337. (p7) LDFD f48 = [BOFFSET], 1 * SIZE
  1338. }
  1339. ;;
  1340. #else
  1341. { .mfi
  1342. shladd BOFFSET = KK, BASE_SHIFT, B
  1343. #ifdef LN
  1344. sub AORIG = AORIG, r2
  1345. #else
  1346. nop __LINE__
  1347. #endif
  1348. }
  1349. ;;
  1350. { .mfi
  1351. (p7) LDFD f48 = [BOFFSET], 1 * SIZE
  1352. shladd AOFFSET = r3, 1, AORIG
  1353. }
  1354. ;;
  1355. #endif
  1356. { .mfi
  1357. adds L = 1, L
  1358. }
  1359. { .mfi
  1360. adds PREA = (PREFETCHSIZE + 8) * SIZE, AOFFSET
  1361. cmp.eq p3, p0 = r0, r0
  1362. }
  1363. ;;
  1364. { .mfi
  1365. tbit.z p12, p0 = L, 0
  1366. }
  1367. { .mfi
  1368. shr L = L, 1
  1369. }
  1370. ;;
  1371. { .mmf
  1372. adds L = -1, L
  1373. }
  1374. ;;
  1375. { .mmf
  1376. cmp.eq p6, p0 = -1, L
  1377. }
  1378. ;;
  1379. { .mib
  1380. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  1381. mov ar.lc = L
  1382. (p6) br.cond.dpnt .L158
  1383. }
  1384. ;;
  1385. .L152:
  1386. { .mfi
  1387. cmp.ne p4, p5 = 0, L
  1388. FMA f64 = f32, f48, f64 // A1 * B1
  1389. (p12) cmp.ne p3, p0 = 0, L
  1390. }
  1391. ;;
  1392. { .mmf
  1393. (p3) LDFD f56 = [BOFFSET], 1 * SIZE
  1394. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  1395. FMA f65 = f33, f48, f65 // A2 * B1
  1396. }
  1397. ;;
  1398. { .mfi
  1399. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  1400. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  1401. adds L = -1, L
  1402. }
  1403. ;;
  1404. { .mfb
  1405. (p4) LDFD f48 = [BOFFSET], 1 * SIZE
  1406. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  1407. br.cloop.sptk.few .L152
  1408. }
  1409. ;;
  1410. .L158:
  1411. #if defined(LN) || defined(RT)
  1412. #ifdef LN
  1413. adds r2 = -2, KK
  1414. #else
  1415. adds r2 = -1, KK
  1416. #endif
  1417. ;;
  1418. shladd r2 = r2, BASE_SHIFT, r0
  1419. ;;
  1420. shladd AOFFSET = r2, 1, AORIG
  1421. add BOFFSET = r2, B
  1422. ;;
  1423. #endif
  1424. adds AOFFSET2 = 4 * SIZE, AOFFSET
  1425. adds BOFFSET2 = 4 * SIZE, BOFFSET
  1426. ;;
  1427. #if defined(LN) || defined(LT)
  1428. LDFPD f32, f33 = [BOFFSET]
  1429. ;;
  1430. FSUB f64 = f32, f64
  1431. FSUB f65 = f33, f65
  1432. ;;
  1433. #else
  1434. LDFPD f32, f33 = [AOFFSET]
  1435. ;;
  1436. FSUB f64 = f32, f64
  1437. FSUB f65 = f33, f65
  1438. ;;
  1439. #endif
  1440. #ifdef LN
  1441. adds AOFFSET = 2 * SIZE, AOFFSET
  1442. ;;
  1443. LDFPD f33, f32 = [AOFFSET]
  1444. adds AOFFSET = - 2 * SIZE, AOFFSET
  1445. ;;
  1446. LDFD f34 = [AOFFSET]
  1447. ;;
  1448. FMPY f65 = f65, f32
  1449. ;;
  1450. FNMA f64 = f65, f33, f64
  1451. ;;
  1452. FMPY f64 = f64, f34
  1453. ;;
  1454. STFD [BOFFSET] = f64, SIZE
  1455. ;;
  1456. STFD [BOFFSET] = f65, - SIZE
  1457. ;;
  1458. adds C1 = -2 * SIZE, C1
  1459. ;;
  1460. #endif
  1461. #ifdef LT
  1462. LDFPD f32, f33 = [AOFFSET]
  1463. adds AOFFSET = 3 * SIZE, AOFFSET
  1464. ;;
  1465. LDFD f34 = [AOFFSET], - 3 * SIZE
  1466. ;;
  1467. FMPY f64 = f64, f32
  1468. ;;
  1469. FNMA f65 = f64, f33, f65
  1470. ;;
  1471. FMPY f65 = f65, f34
  1472. ;;
  1473. STFD [BOFFSET] = f64, SIZE
  1474. ;;
  1475. STFD [BOFFSET] = f65, -SIZE
  1476. ;;
  1477. #endif
  1478. #ifdef RN
  1479. LDFD f32 = [BOFFSET]
  1480. ;;
  1481. FMPY f64 = f64, f32
  1482. FMPY f65 = f65, f32
  1483. ;;
  1484. STFD [AOFFSET] = f64, SIZE
  1485. ;;
  1486. STFD [AOFFSET] = f65, - SIZE
  1487. ;;
  1488. #endif
  1489. #ifdef RT
  1490. LDFD f32 = [BOFFSET]
  1491. ;;
  1492. FMPY f64 = f64, f32
  1493. FMPY f65 = f65, f32
  1494. ;;
  1495. STFD [AOFFSET] = f64, SIZE
  1496. ;;
  1497. STFD [AOFFSET] = f65, - SIZE
  1498. ;;
  1499. #endif
  1500. STFD [C1 ] = f64, SIZE
  1501. ;;
  1502. #ifndef LN
  1503. STFD [C1 ] = f65, SIZE
  1504. #else
  1505. STFD [C1 ] = f65, -SIZE
  1506. #endif
  1507. ;;
  1508. mov f64 = f0
  1509. mov f65 = f0
  1510. ;;
  1511. shladd r2 = K, BASE_SHIFT, r0
  1512. ;;
  1513. sub L = K, KK
  1514. ;;
  1515. #ifdef RT
  1516. shladd AORIG = r2, 1, AORIG
  1517. #else
  1518. nop __LINE__
  1519. #endif
  1520. ;;
  1521. { .mmi
  1522. #if defined(LT) || defined(RN)
  1523. shladd L = L, BASE_SHIFT, r0
  1524. #else
  1525. nop __LINE__
  1526. #endif
  1527. }
  1528. ;;
  1529. { .mmi
  1530. #if defined(LT) || defined(RN)
  1531. shladd AOFFSET = L, 1, AOFFSET
  1532. #else
  1533. nop __LINE__
  1534. #endif
  1535. }
  1536. ;;
  1537. { .mmi
  1538. #if defined(LT) || defined(RN)
  1539. add BOFFSET = L, BOFFSET
  1540. #else
  1541. nop __LINE__
  1542. #endif
  1543. }
  1544. ;;
  1545. { .mmi
  1546. #ifdef LT
  1547. adds KK = 2, KK
  1548. #elif defined LN
  1549. adds KK = -2, KK
  1550. #else
  1551. nop __LINE__
  1552. #endif
  1553. }
  1554. ;;
  1555. { .mmi
  1556. #if defined(LT) || defined(RN)
  1557. mov L = KK
  1558. #else
  1559. sub L = K, KK
  1560. #endif
  1561. }
  1562. ;;
  1563. .align 8
  1564. .L160:
  1565. { .mib
  1566. #if defined(LT) || defined(RN)
  1567. mov L = KK
  1568. #else
  1569. sub L = K, KK
  1570. #endif
  1571. tbit.z p6, p7 = M, 0
  1572. (p6) br.cond.dptk .L169
  1573. }
  1574. ;;
  1575. { .mmi
  1576. cmp.ne p7, p0 = r0, L
  1577. adds BOFFSET = 0 * SIZE, B
  1578. shl r2 = K, 0 + BASE_SHIFT
  1579. }
  1580. ;;
  1581. #if defined(LT) || defined(RN)
  1582. { .mmi
  1583. (p7) LDFD f48 = [BOFFSET], 1 * SIZE
  1584. nop __LINE__
  1585. adds L = 1, L
  1586. }
  1587. ;;
  1588. #else
  1589. { .mmi
  1590. shladd BOFFSET = KK, BASE_SHIFT, B
  1591. nop __LINE__
  1592. #ifdef LN
  1593. sub AORIG = AORIG, r2
  1594. #else
  1595. nop __LINE__
  1596. #endif
  1597. }
  1598. ;;
  1599. { .mmi
  1600. (p7) LDFD f48 = [BOFFSET], 1 * SIZE
  1601. adds L = 1, L
  1602. add AOFFSET = r3, AORIG
  1603. }
  1604. ;;
  1605. #endif
  1606. ;;
  1607. { .mii
  1608. tbit.z p12, p0 = L, 0
  1609. shr L = L, 1
  1610. }
  1611. ;;
  1612. { .mmi
  1613. cmp.eq p6, p0 = 0, L
  1614. adds L = -1, L
  1615. cmp.eq p3, p0 = r0, r0
  1616. }
  1617. ;;
  1618. { .mib
  1619. (p7) LDFD f32 = [AOFFSET], 1 * SIZE
  1620. mov ar.lc = L
  1621. (p6) br.cond.dpnt .L168
  1622. }
  1623. ;;
  1624. .align 8
  1625. .L162:
  1626. { .mmf
  1627. cmp.ne p4, p5 = 0, L
  1628. (p12) cmp.ne p3, p0 = 0, L
  1629. FMA f64 = f32, f48, f64 // A1 * B1
  1630. }
  1631. ;;
  1632. { .mmi
  1633. (p3) LDFD f56 = [BOFFSET], 1 * SIZE
  1634. (p3) LDFD f40 = [AOFFSET], 1 * SIZE
  1635. nop __LINE__
  1636. }
  1637. ;;
  1638. { .mmi
  1639. (p4) LDFD f32 = [AOFFSET], 1 * SIZE
  1640. nop __LINE__
  1641. adds L = -1, L
  1642. }
  1643. { .mfb
  1644. (p4) LDFD f48 = [BOFFSET], 1 * SIZE
  1645. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  1646. br.cloop.sptk.few .L162
  1647. }
  1648. ;;
  1649. .align 8
  1650. .L168:
  1651. #if defined(LN) || defined(RT)
  1652. #ifdef LN
  1653. adds r2 = -1, KK
  1654. #else
  1655. adds r2 = -1, KK
  1656. #endif
  1657. ;;
  1658. shladd r2 = r2, BASE_SHIFT, r0
  1659. ;;
  1660. add AOFFSET = r2, AORIG
  1661. add BOFFSET = r2, B
  1662. ;;
  1663. #endif
  1664. #if defined(LN) || defined(LT)
  1665. { .mmi
  1666. LDFD f32 = [BOFFSET]
  1667. LDFD f33 = [AOFFSET]
  1668. #ifdef LN
  1669. adds C1 = -1 * SIZE, C1
  1670. #else
  1671. nop __LINE__
  1672. #endif
  1673. }
  1674. ;;
  1675. #else
  1676. { .mmi
  1677. LDFD f32 = [AOFFSET]
  1678. LDFD f33 = [BOFFSET]
  1679. nop __LINE__
  1680. }
  1681. ;;
  1682. #endif
  1683. { .mmf
  1684. sub L = K, KK
  1685. #ifdef RT
  1686. shladd AORIG = K, BASE_SHIFT, AORIG
  1687. #else
  1688. nop __LINE__
  1689. #endif
  1690. FSUB f64 = f32, f64
  1691. }
  1692. ;;
  1693. #ifdef LT
  1694. adds KK = 1, KK
  1695. #elif defined LN
  1696. adds KK = -1, KK
  1697. #else
  1698. nop __LINE__
  1699. #endif
  1700. ;;
  1701. #if defined(LT) || defined(RN)
  1702. mov L = KK
  1703. #else
  1704. sub L = K, KK
  1705. #endif
  1706. ;;
  1707. FMPY f64 = f64, f33
  1708. ;;
  1709. #if defined(LN) || defined(LT)
  1710. { .mmf
  1711. STFD [BOFFSET] = f64
  1712. #ifndef LN
  1713. STFD [C1 ] = f64, SIZE
  1714. #else
  1715. STFD [C1 ] = f64
  1716. #endif
  1717. mov f64 = f0
  1718. }
  1719. ;;
  1720. #else
  1721. { .mmf
  1722. STFD [AOFFSET] = f64
  1723. STFD [C1 ] = f64, SIZE
  1724. mov f64 = f0
  1725. }
  1726. ;;
  1727. #endif
  1728. #if defined(LT) || defined(RN)
  1729. shladd AOFFSET = L, BASE_SHIFT, AOFFSET
  1730. #else
  1731. nop __LINE__
  1732. #endif
  1733. #if defined(LT) || defined(RN)
  1734. shladd BOFFSET = L, BASE_SHIFT, BOFFSET
  1735. #else
  1736. nop __LINE__
  1737. #endif
  1738. ;;
  1739. .align 8
  1740. .L169:
  1741. { .mii
  1742. #ifdef LN
  1743. shladd B = K, BASE_SHIFT, B
  1744. #elif defined(LT) || defined(RN)
  1745. mov B = BOFFSET
  1746. #else
  1747. nop __LINE__
  1748. #endif
  1749. #ifdef RN
  1750. adds KK = 1, KK
  1751. #elif defined RT
  1752. adds KK = -1, KK
  1753. #else
  1754. nop __LINE__
  1755. #endif
  1756. mov AOFFSET = A
  1757. }
  1758. ;;
  1759. .align 16
  1760. .L090:
  1761. tbit.z p6, p0 = N, 1
  1762. (p6) br.cond.dpnt .L050
  1763. ;;
  1764. #ifdef RT
  1765. { .mmi
  1766. shladd r3 = LDC, 1, r0
  1767. nop __LINE__
  1768. shl r2 = K, 1 + BASE_SHIFT
  1769. }
  1770. ;;
  1771. { .mmi
  1772. sub B = B, r2
  1773. sub C = C, r3
  1774. nop __LINE__
  1775. }
  1776. #endif
  1777. ;;
  1778. mov f64 = f0
  1779. mov f65 = f0
  1780. mov f66 = f0
  1781. mov f67 = f0
  1782. mov f72 = f0
  1783. mov f73 = f0
  1784. mov f74 = f0
  1785. mov f75 = f0
  1786. ;;
  1787. { .mfi
  1788. shr I = M, 3
  1789. }
  1790. { .mfi
  1791. mov C1 = C // coffset1 = c + 0 * ldc
  1792. #ifdef LN
  1793. add KK = M, OFFSET
  1794. #elif defined LT
  1795. mov KK = OFFSET
  1796. #else
  1797. nop __LINE__
  1798. #endif
  1799. }
  1800. ;;
  1801. { .mmf
  1802. cmp.eq p6, p7 = 0, I
  1803. #if defined(LN) || defined(RT)
  1804. mov AORIG = A
  1805. #else
  1806. mov AOFFSET = A
  1807. #endif
  1808. }
  1809. { .mmf
  1810. add C2 = LDC, C // coffset2 = c + 1 * ldc
  1811. }
  1812. ;;
  1813. { .mfi
  1814. #ifndef RT
  1815. shladd C = LDC, 1, C // coffset += 8 * ldc
  1816. #else
  1817. nop __LINE__
  1818. #endif
  1819. mov f81 = f0
  1820. #if defined(LT) || defined(RN)
  1821. mov L = KK
  1822. #else
  1823. sub L = K, KK
  1824. #endif
  1825. }{ .mfb
  1826. (p6) br.cond.dpnt .L100
  1827. }
  1828. ;;
  1829. .align 16
  1830. .L092:
  1831. { .mmi
  1832. cmp.ne p7, p0 = r0, L
  1833. adds BOFFSET = 0 * SIZE, B
  1834. shl r2 = K, 3 + BASE_SHIFT
  1835. }
  1836. { .mmi
  1837. shladd r3 = KK, BASE_SHIFT, r0
  1838. nop __LINE__
  1839. nop __LINE__
  1840. }
  1841. ;;
  1842. #if defined(LT) || defined(RN)
  1843. { .mmi
  1844. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  1845. nop __LINE__
  1846. nop __LINE__
  1847. }
  1848. ;;
  1849. #else
  1850. { .mfi
  1851. shladd BOFFSET = r3, 1, B
  1852. #ifdef LN
  1853. sub AORIG = AORIG, r2
  1854. #else
  1855. nop __LINE__
  1856. #endif
  1857. }
  1858. ;;
  1859. { .mfi
  1860. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  1861. shladd AOFFSET = r3, 3, AORIG
  1862. }
  1863. ;;
  1864. #endif
  1865. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  1866. ;;
  1867. { .mmf
  1868. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  1869. }
  1870. ;;
  1871. { .mmf
  1872. (p7) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  1873. }
  1874. { .mfi
  1875. cmp.eq p3, p0 = r0, r0
  1876. }
  1877. ;;
  1878. { .mmf
  1879. (p7) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  1880. }
  1881. { .mfi
  1882. adds PREC = CPREFETCHSIZE * SIZE, C1
  1883. }
  1884. ;;
  1885. { .mmf
  1886. CPREFETCH [PREC], LDC
  1887. }
  1888. { .mfi
  1889. adds L = 1, L
  1890. }
  1891. ;;
  1892. { .mmf
  1893. CPREFETCH [PREC]
  1894. }
  1895. { .mfi
  1896. adds PREA = (PREFETCHSIZE + 8) * SIZE, AOFFSET
  1897. }
  1898. ;;
  1899. { .mfi
  1900. adds PREB = (PREFETCHSIZE - 8) * SIZE, BOFFSET
  1901. }
  1902. ;;
  1903. { .mfi
  1904. tbit.z p12, p0 = L, 0
  1905. }
  1906. { .mfi
  1907. shr L = L, 1
  1908. }
  1909. ;;
  1910. { .mfi
  1911. adds L = -1, L
  1912. }
  1913. ;;
  1914. { .mfi
  1915. mov ar.lc = L
  1916. }
  1917. ;;
  1918. mov f68 = f0
  1919. mov f69 = f0
  1920. mov f70 = f0
  1921. mov f71 = f0
  1922. mov f76 = f0
  1923. mov f77 = f0
  1924. mov f78 = f0
  1925. mov f79 = f0
  1926. ;;
  1927. { .mfb
  1928. cmp.eq p6, p0 = -1, L
  1929. (p6) br.cond.dpnt .L098
  1930. }
  1931. ;;
  1932. .align 8
  1933. .L093:
  1934. /* 1 */
  1935. { .mfi
  1936. lfetch.nt1 [PREA], 16 * SIZE
  1937. FMA f64 = f32, f48, f64 // A1 * B1
  1938. cmp.ne p4, p5 = 0, L
  1939. }
  1940. { .mfi
  1941. nop __LINE__
  1942. FMA f72 = f32, f49, f72 // A1 * B2
  1943. (p12) cmp.ne p3, p0 = 0, L
  1944. }
  1945. ;;
  1946. { .mfi
  1947. lfetch.nt1 [PREB], 4 * SIZE
  1948. FMA f65 = f33, f48, f65 // A2 * B1
  1949. adds C9 = 4 * SIZE, C1
  1950. }
  1951. { .mfi
  1952. nop __LINE__
  1953. FMA f73 = f33, f49, f73 // A2 * B2
  1954. adds C10 = 4 * SIZE, C2
  1955. }
  1956. ;;
  1957. { .mfi
  1958. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  1959. FMA f66 = f34, f48, f66 // A3 * B1
  1960. adds C11 = 4 * SIZE, C3
  1961. }
  1962. { .mfi
  1963. nop __LINE__
  1964. FMA f74 = f34, f49, f74 // A3 * B2
  1965. adds C12 = 4 * SIZE, C4
  1966. }
  1967. ;;
  1968. { .mfb
  1969. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  1970. FMA f67 = f35, f48, f67 // A4 * B1
  1971. nop __LINE__
  1972. }
  1973. { .mfb
  1974. nop __LINE__
  1975. FMA f75 = f35, f49, f75 // A4 * B2
  1976. nop __LINE__
  1977. }
  1978. ;;
  1979. { .mfb
  1980. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  1981. FMA f68 = f36, f48, f68 // A5 * B1
  1982. nop __LINE__
  1983. }
  1984. { .mfb
  1985. nop __LINE__
  1986. FMA f76 = f36, f49, f76 // A5 * B2
  1987. nop __LINE__
  1988. }
  1989. ;;
  1990. { .mfb
  1991. (p3) LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  1992. FMA f69 = f37, f48, f69 // A6 * B1
  1993. nop __LINE__
  1994. }
  1995. { .mfb
  1996. nop __LINE__
  1997. FMA f77 = f37, f49, f77 // A6 * B2
  1998. nop __LINE__
  1999. }
  2000. ;;
  2001. { .mfb
  2002. (p3) LDFPD f46, f47 = [AOFFSET], 2 * SIZE
  2003. FMA f70 = f38, f48, f70 // A7 * B1
  2004. nop __LINE__
  2005. }
  2006. { .mfb
  2007. nop __LINE__
  2008. FMA f78 = f38, f49, f78 // A7 * B2
  2009. nop __LINE__
  2010. }
  2011. ;;
  2012. { .mfb
  2013. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  2014. FMA f71 = f39, f48, f71 // A8 * B1
  2015. nop __LINE__
  2016. }
  2017. { .mfb
  2018. nop __LINE__
  2019. FMA f79 = f39, f49, f79 // A8 * B2
  2020. nop __LINE__
  2021. }
  2022. ;;
  2023. { .mfb
  2024. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  2025. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  2026. nop __LINE__
  2027. }
  2028. { .mfb
  2029. nop __LINE__
  2030. (p3) FMA f72 = f40, f57, f72 // A1 * B2
  2031. nop __LINE__
  2032. }
  2033. ;;
  2034. { .mfb
  2035. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  2036. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  2037. nop __LINE__
  2038. }
  2039. { .mfb
  2040. nop __LINE__
  2041. (p3) FMA f73 = f41, f57, f73 // A2 * B2
  2042. nop __LINE__
  2043. }
  2044. ;;
  2045. { .mfb
  2046. (p4) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  2047. (p3) FMA f66 = f42, f56, f66 // A3 * B1
  2048. nop __LINE__
  2049. }
  2050. { .mfb
  2051. nop __LINE__
  2052. (p3) FMA f74 = f42, f57, f74 // A3 * B2
  2053. nop __LINE__
  2054. }
  2055. ;;
  2056. { .mfb
  2057. (p4) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  2058. (p3) FMA f67 = f43, f56, f67 // A4 * B1
  2059. nop __LINE__
  2060. }
  2061. { .mfb
  2062. nop __LINE__
  2063. (p3) FMA f75 = f43, f57, f75 // A4 * B2
  2064. nop __LINE__
  2065. }
  2066. ;;
  2067. { .mfb
  2068. nop __LINE__
  2069. (p3) FMA f68 = f44, f56, f68 // A5 * B1
  2070. nop __LINE__
  2071. }
  2072. { .mfb
  2073. nop __LINE__
  2074. (p3) FMA f76 = f44, f57, f76 // A5 * B2
  2075. nop __LINE__
  2076. }
  2077. ;;
  2078. { .mfb
  2079. nop __LINE__
  2080. (p3) FMA f69 = f45, f56, f69 // A6 * B1
  2081. nop __LINE__
  2082. }
  2083. { .mfb
  2084. nop __LINE__
  2085. (p3) FMA f77 = f45, f57, f77 // A6 * B2
  2086. nop __LINE__
  2087. }
  2088. ;;
  2089. { .mfb
  2090. nop __LINE__
  2091. (p3) FMA f70 = f46, f56, f70 // A7 * B1
  2092. nop __LINE__
  2093. }
  2094. { .mfb
  2095. nop __LINE__
  2096. (p3) FMA f78 = f46, f57, f78 // A7 * B2
  2097. nop __LINE__
  2098. }
  2099. ;;
  2100. { .mfi
  2101. nop __LINE__
  2102. (p3) FMA f71 = f47, f56, f71 // A8 * B1
  2103. adds L = -1, L
  2104. }
  2105. { .mfb
  2106. nop __LINE__
  2107. (p3) FMA f79 = f47, f57, f79 // A8 * B2
  2108. br.cloop.sptk.few .L093
  2109. }
  2110. ;;
  2111. .align 8
  2112. .L098:
  2113. #if defined(LN) || defined(RT)
  2114. #ifdef LN
  2115. adds r2 = -8, KK
  2116. #else
  2117. adds r2 = -2, KK
  2118. #endif
  2119. ;;
  2120. shladd r2 = r2, BASE_SHIFT, r0
  2121. ;;
  2122. shladd AOFFSET = r2, 3, AORIG
  2123. shladd BOFFSET = r2, 1, B
  2124. ;;
  2125. #endif
  2126. adds AOFFSET2 = 4 * SIZE, AOFFSET
  2127. adds BOFFSET2 = 4 * SIZE, BOFFSET
  2128. ;;
  2129. #if defined(LN) || defined(LT)
  2130. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  2131. ;;
  2132. LDFPD f34, f35 = [BOFFSET], 2 * SIZE
  2133. ;;
  2134. LDFPD f36, f37 = [BOFFSET], 2 * SIZE
  2135. ;;
  2136. LDFPD f38, f39 = [BOFFSET], 2 * SIZE
  2137. ;;
  2138. LDFPD f40, f41 = [BOFFSET], 2 * SIZE
  2139. ;;
  2140. LDFPD f42, f43 = [BOFFSET], 2 * SIZE
  2141. ;;
  2142. LDFPD f44, f45 = [BOFFSET], 2 * SIZE
  2143. ;;
  2144. LDFPD f46, f47 = [BOFFSET]
  2145. adds BOFFSET = -14 * SIZE, BOFFSET
  2146. ;;
  2147. FSUB f64 = f32, f64
  2148. FSUB f72 = f33, f72
  2149. FSUB f65 = f34, f65
  2150. FSUB f73 = f35, f73
  2151. FSUB f66 = f36, f66
  2152. FSUB f74 = f37, f74
  2153. FSUB f67 = f38, f67
  2154. FSUB f75 = f39, f75
  2155. FSUB f68 = f40, f68
  2156. FSUB f76 = f41, f76
  2157. FSUB f69 = f42, f69
  2158. FSUB f77 = f43, f77
  2159. FSUB f70 = f44, f70
  2160. FSUB f78 = f45, f78
  2161. FSUB f71 = f46, f71
  2162. FSUB f79 = f47, f79
  2163. ;;
  2164. #else
  2165. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  2166. ;;
  2167. LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  2168. ;;
  2169. LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  2170. ;;
  2171. LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  2172. ;;
  2173. LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  2174. ;;
  2175. LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  2176. ;;
  2177. LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  2178. ;;
  2179. LDFPD f46, f47 = [AOFFSET]
  2180. adds AOFFSET = -14 * SIZE, AOFFSET
  2181. ;;
  2182. FSUB f64 = f32, f64
  2183. FSUB f65 = f33, f65
  2184. FSUB f66 = f34, f66
  2185. FSUB f67 = f35, f67
  2186. FSUB f68 = f36, f68
  2187. FSUB f69 = f37, f69
  2188. FSUB f70 = f38, f70
  2189. FSUB f71 = f39, f71
  2190. ;;
  2191. FSUB f72 = f40, f72
  2192. FSUB f73 = f41, f73
  2193. FSUB f74 = f42, f74
  2194. FSUB f75 = f43, f75
  2195. FSUB f76 = f44, f76
  2196. FSUB f77 = f45, f77
  2197. FSUB f78 = f46, f78
  2198. FSUB f79 = f47, f79
  2199. ;;
  2200. #endif
  2201. #ifdef LN
  2202. adds AOFFSET = 62 * SIZE, AOFFSET
  2203. ;;
  2204. LDFPD f33, f32 = [AOFFSET]
  2205. adds AOFFSET = - 2 * SIZE, AOFFSET
  2206. ;;
  2207. LDFPD f35, f34 = [AOFFSET]
  2208. adds AOFFSET = - 2 * SIZE, AOFFSET
  2209. ;;
  2210. LDFPD f37, f36 = [AOFFSET]
  2211. adds AOFFSET = - 2 * SIZE, AOFFSET
  2212. ;;
  2213. LDFPD f39, f38 = [AOFFSET]
  2214. adds AOFFSET = - 2 * SIZE, AOFFSET
  2215. ;;
  2216. LDFD f40 = [AOFFSET], -2 * SIZE
  2217. ;;
  2218. LDFPD f42, f41 = [AOFFSET]
  2219. adds AOFFSET = - 2 * SIZE, AOFFSET
  2220. ;;
  2221. LDFPD f44, f43 = [AOFFSET]
  2222. adds AOFFSET = - 2 * SIZE, AOFFSET
  2223. ;;
  2224. LDFPD f46, f45 = [AOFFSET]
  2225. adds AOFFSET = - 4 * SIZE, AOFFSET
  2226. ;;
  2227. LDFPD f48, f47 = [AOFFSET]
  2228. adds AOFFSET = - 2 * SIZE, AOFFSET
  2229. ;;
  2230. LDFPD f50, f49 = [AOFFSET]
  2231. adds AOFFSET = - 2 * SIZE, AOFFSET
  2232. ;;
  2233. LDFPD f52, f51 = [AOFFSET]
  2234. adds AOFFSET = - 4 * SIZE, AOFFSET
  2235. ;;
  2236. LDFD f53 = [AOFFSET], -2 * SIZE
  2237. ;;
  2238. LDFPD f55, f54 = [AOFFSET]
  2239. adds AOFFSET = - 2 * SIZE, AOFFSET
  2240. ;;
  2241. LDFPD f57, f56 = [AOFFSET]
  2242. adds AOFFSET = - 6 * SIZE, AOFFSET
  2243. ;;
  2244. LDFPD f59, f58 = [AOFFSET]
  2245. adds AOFFSET = - 2 * SIZE, AOFFSET
  2246. ;;
  2247. LDFPD f61, f60 = [AOFFSET]
  2248. adds AOFFSET = - 6 * SIZE, AOFFSET
  2249. ;;
  2250. LDFD f16 = [AOFFSET], -2 * SIZE
  2251. ;;
  2252. LDFPD f18, f17 = [AOFFSET]
  2253. adds AOFFSET = - 8 * SIZE, AOFFSET
  2254. ;;
  2255. LDFPD f20, f19 = [AOFFSET]
  2256. adds AOFFSET = - 8 * SIZE, AOFFSET
  2257. ;;
  2258. LDFD f21 = [AOFFSET]
  2259. ;;
  2260. FMPY f71 = f71, f32
  2261. FMPY f79 = f79, f32
  2262. ;;
  2263. FNMA f70 = f71, f33, f70
  2264. FNMA f78 = f79, f33, f78
  2265. ;;
  2266. FNMA f69 = f71, f34, f69
  2267. FNMA f77 = f79, f34, f77
  2268. ;;
  2269. FNMA f68 = f71, f35, f68
  2270. FNMA f76 = f79, f35, f76
  2271. ;;
  2272. FNMA f67 = f71, f36, f67
  2273. FNMA f75 = f79, f36, f75
  2274. ;;
  2275. FNMA f66 = f71, f37, f66
  2276. FNMA f74 = f79, f37, f74
  2277. ;;
  2278. FNMA f65 = f71, f38, f65
  2279. FNMA f73 = f79, f38, f73
  2280. ;;
  2281. FNMA f64 = f71, f39, f64
  2282. FNMA f72 = f79, f39, f72
  2283. ;;
  2284. FMPY f70 = f70, f40
  2285. FMPY f78 = f78, f40
  2286. ;;
  2287. FNMA f69 = f70, f41, f69
  2288. FNMA f77 = f78, f41, f77
  2289. ;;
  2290. FNMA f68 = f70, f42, f68
  2291. FNMA f76 = f78, f42, f76
  2292. ;;
  2293. FNMA f67 = f70, f43, f67
  2294. FNMA f75 = f78, f43, f75
  2295. ;;
  2296. FNMA f66 = f70, f44, f66
  2297. FNMA f74 = f78, f44, f74
  2298. ;;
  2299. FNMA f65 = f70, f45, f65
  2300. FNMA f73 = f78, f45, f73
  2301. ;;
  2302. FNMA f64 = f70, f46, f64
  2303. FNMA f72 = f78, f46, f72
  2304. ;;
  2305. FMPY f69 = f69, f47
  2306. FMPY f77 = f77, f47
  2307. ;;
  2308. FNMA f68 = f69, f48, f68
  2309. FNMA f76 = f77, f48, f76
  2310. ;;
  2311. FNMA f67 = f69, f49, f67
  2312. FNMA f75 = f77, f49, f75
  2313. ;;
  2314. FNMA f66 = f69, f50, f66
  2315. FNMA f74 = f77, f50, f74
  2316. ;;
  2317. FNMA f65 = f69, f51, f65
  2318. FNMA f73 = f77, f51, f73
  2319. ;;
  2320. FNMA f64 = f69, f52, f64
  2321. FNMA f72 = f77, f52, f72
  2322. ;;
  2323. FMPY f68 = f68, f53
  2324. FMPY f76 = f76, f53
  2325. ;;
  2326. FNMA f67 = f68, f54, f67
  2327. FNMA f75 = f76, f54, f75
  2328. ;;
  2329. FNMA f66 = f68, f55, f66
  2330. FNMA f74 = f76, f55, f74
  2331. ;;
  2332. FNMA f65 = f68, f56, f65
  2333. FNMA f73 = f76, f56, f73
  2334. ;;
  2335. FNMA f64 = f68, f57, f64
  2336. FNMA f72 = f76, f57, f72
  2337. ;;
  2338. FMPY f67 = f67, f58
  2339. FMPY f75 = f75, f58
  2340. ;;
  2341. FNMA f66 = f67, f59, f66
  2342. FNMA f74 = f75, f59, f74
  2343. ;;
  2344. FNMA f65 = f67, f60, f65
  2345. FNMA f73 = f75, f60, f73
  2346. ;;
  2347. FNMA f64 = f67, f61, f64
  2348. FNMA f72 = f75, f61, f72
  2349. ;;
  2350. FMPY f66 = f66, f16
  2351. FMPY f74 = f74, f16
  2352. ;;
  2353. FNMA f65 = f66, f17, f65
  2354. FNMA f73 = f74, f17, f73
  2355. ;;
  2356. FNMA f64 = f66, f18, f64
  2357. FNMA f72 = f74, f18, f72
  2358. ;;
  2359. FMPY f65 = f65, f19
  2360. FMPY f73 = f73, f19
  2361. ;;
  2362. FNMA f64 = f65, f20, f64
  2363. FNMA f72 = f73, f20, f72
  2364. ;;
  2365. FMPY f64 = f64, f21
  2366. FMPY f72 = f72, f21
  2367. ;;
  2368. adds BOFFSET = 8 * SIZE, BOFFSET
  2369. adds BOFFSET2 = 8 * SIZE, BOFFSET2
  2370. ;;
  2371. STFD [BOFFSET] = f68, SIZE
  2372. STFD [BOFFSET2] = f70, SIZE
  2373. ;;
  2374. STFD [BOFFSET] = f76, SIZE
  2375. STFD [BOFFSET2] = f78, SIZE
  2376. ;;
  2377. STFD [BOFFSET] = f69, SIZE
  2378. STFD [BOFFSET2] = f71, SIZE
  2379. ;;
  2380. STFD [BOFFSET] = f77, - 11 * SIZE
  2381. STFD [BOFFSET2] = f79, - 11 * SIZE
  2382. ;;
  2383. STFD [BOFFSET] = f64, SIZE
  2384. STFD [BOFFSET2] = f66, SIZE
  2385. ;;
  2386. STFD [BOFFSET] = f72, SIZE
  2387. STFD [BOFFSET2] = f74, SIZE
  2388. ;;
  2389. STFD [BOFFSET] = f65, SIZE
  2390. STFD [BOFFSET2] = f67, SIZE
  2391. ;;
  2392. STFD [BOFFSET] = f73, - 3 * SIZE
  2393. STFD [BOFFSET2] = f75, - 3 * SIZE
  2394. ;;
  2395. adds C1 = -8 * SIZE, C1
  2396. adds C2 = -8 * SIZE, C2
  2397. ;;
  2398. #endif
  2399. #ifdef LT
  2400. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  2401. ;;
  2402. LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  2403. ;;
  2404. LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  2405. ;;
  2406. LDFPD f38, f39 = [AOFFSET]
  2407. adds AOFFSET = 3 * SIZE, AOFFSET
  2408. ;;
  2409. LDFD f40 = [AOFFSET], 1 * SIZE
  2410. ;;
  2411. LDFPD f41, f42 = [AOFFSET], 2 * SIZE
  2412. ;;
  2413. LDFPD f43, f44 = [AOFFSET], 2 * SIZE
  2414. ;;
  2415. LDFPD f45, f46 = [AOFFSET]
  2416. adds AOFFSET = 4 * SIZE, AOFFSET
  2417. ;;
  2418. LDFPD f47, f48 = [AOFFSET], 2 * SIZE
  2419. ;;
  2420. LDFPD f49, f50 = [AOFFSET], 2 * SIZE
  2421. ;;
  2422. LDFPD f51, f52 = [AOFFSET]
  2423. adds AOFFSET = 5 * SIZE, AOFFSET
  2424. ;;
  2425. LDFD f53 = [AOFFSET], 1 * SIZE
  2426. ;;
  2427. LDFPD f54, f55 = [AOFFSET], 2 * SIZE
  2428. ;;
  2429. LDFPD f56, f57 = [AOFFSET]
  2430. adds AOFFSET = 6 * SIZE, AOFFSET
  2431. ;;
  2432. LDFPD f58, f59 = [AOFFSET], 2 * SIZE
  2433. ;;
  2434. LDFPD f60, f61 = [AOFFSET]
  2435. adds AOFFSET = 7 * SIZE, AOFFSET
  2436. ;;
  2437. LDFD f16 = [AOFFSET], 1 * SIZE
  2438. ;;
  2439. LDFPD f17, f18 = [AOFFSET]
  2440. adds AOFFSET = 8 * SIZE, AOFFSET
  2441. ;;
  2442. LDFPD f19, f20 = [AOFFSET]
  2443. adds AOFFSET = 9 * SIZE, AOFFSET
  2444. ;;
  2445. LDFD f21 = [AOFFSET]
  2446. adds AOFFSET = -63 * SIZE, AOFFSET
  2447. ;;
  2448. FMPY f64 = f64, f32
  2449. FMPY f72 = f72, f32
  2450. ;;
  2451. FNMA f65 = f64, f33, f65
  2452. FNMA f73 = f72, f33, f73
  2453. ;;
  2454. FNMA f66 = f64, f34, f66
  2455. FNMA f74 = f72, f34, f74
  2456. ;;
  2457. FNMA f67 = f64, f35, f67
  2458. FNMA f75 = f72, f35, f75
  2459. ;;
  2460. FNMA f68 = f64, f36, f68
  2461. FNMA f76 = f72, f36, f76
  2462. ;;
  2463. FNMA f69 = f64, f37, f69
  2464. FNMA f77 = f72, f37, f77
  2465. ;;
  2466. FNMA f70 = f64, f38, f70
  2467. FNMA f78 = f72, f38, f78
  2468. ;;
  2469. FNMA f71 = f64, f39, f71
  2470. FNMA f79 = f72, f39, f79
  2471. ;;
  2472. FMPY f65 = f65, f40
  2473. FMPY f73 = f73, f40
  2474. ;;
  2475. FNMA f66 = f65, f41, f66
  2476. FNMA f74 = f73, f41, f74
  2477. ;;
  2478. FNMA f67 = f65, f42, f67
  2479. FNMA f75 = f73, f42, f75
  2480. ;;
  2481. FNMA f68 = f65, f43, f68
  2482. FNMA f76 = f73, f43, f76
  2483. ;;
  2484. FNMA f69 = f65, f44, f69
  2485. FNMA f77 = f73, f44, f77
  2486. ;;
  2487. FNMA f70 = f65, f45, f70
  2488. FNMA f78 = f73, f45, f78
  2489. ;;
  2490. FNMA f71 = f65, f46, f71
  2491. FNMA f79 = f73, f46, f79
  2492. ;;
  2493. FMPY f66 = f66, f47
  2494. FMPY f74 = f74, f47
  2495. ;;
  2496. FNMA f67 = f66, f48, f67
  2497. FNMA f75 = f74, f48, f75
  2498. ;;
  2499. FNMA f68 = f66, f49, f68
  2500. FNMA f76 = f74, f49, f76
  2501. ;;
  2502. FNMA f69 = f66, f50, f69
  2503. FNMA f77 = f74, f50, f77
  2504. ;;
  2505. FNMA f70 = f66, f51, f70
  2506. FNMA f78 = f74, f51, f78
  2507. ;;
  2508. FNMA f71 = f66, f52, f71
  2509. FNMA f79 = f74, f52, f79
  2510. ;;
  2511. FMPY f67 = f67, f53
  2512. FMPY f75 = f75, f53
  2513. ;;
  2514. FNMA f68 = f67, f54, f68
  2515. FNMA f76 = f75, f54, f76
  2516. ;;
  2517. FNMA f69 = f67, f55, f69
  2518. FNMA f77 = f75, f55, f77
  2519. ;;
  2520. FNMA f70 = f67, f56, f70
  2521. FNMA f78 = f75, f56, f78
  2522. ;;
  2523. FNMA f71 = f67, f57, f71
  2524. FNMA f79 = f75, f57, f79
  2525. ;;
  2526. FMPY f68 = f68, f58
  2527. FMPY f76 = f76, f58
  2528. ;;
  2529. FNMA f69 = f68, f59, f69
  2530. FNMA f77 = f76, f59, f77
  2531. ;;
  2532. FNMA f70 = f68, f60, f70
  2533. FNMA f78 = f76, f60, f78
  2534. ;;
  2535. FNMA f71 = f68, f61, f71
  2536. FNMA f79 = f76, f61, f79
  2537. ;;
  2538. FMPY f69 = f69, f16
  2539. FMPY f77 = f77, f16
  2540. ;;
  2541. FNMA f70 = f69, f17, f70
  2542. FNMA f78 = f77, f17, f78
  2543. ;;
  2544. FNMA f71 = f69, f18, f71
  2545. FNMA f79 = f77, f18, f79
  2546. ;;
  2547. FMPY f70 = f70, f19
  2548. FMPY f78 = f78, f19
  2549. ;;
  2550. FNMA f71 = f70, f20, f71
  2551. FNMA f79 = f78, f20, f79
  2552. ;;
  2553. FMPY f71 = f71, f21
  2554. FMPY f79 = f79, f21
  2555. ;;
  2556. STFD [BOFFSET] = f64, SIZE
  2557. STFD [BOFFSET2] = f66, SIZE
  2558. ;;
  2559. STFD [BOFFSET] = f72, SIZE
  2560. STFD [BOFFSET2] = f74, SIZE
  2561. ;;
  2562. STFD [BOFFSET] = f65, SIZE
  2563. STFD [BOFFSET2] = f67, SIZE
  2564. ;;
  2565. STFD [BOFFSET] = f73, 5 * SIZE
  2566. STFD [BOFFSET2] = f75, 5 * SIZE
  2567. ;;
  2568. STFD [BOFFSET] = f68, SIZE
  2569. STFD [BOFFSET2] = f70, SIZE
  2570. ;;
  2571. STFD [BOFFSET] = f76, SIZE
  2572. STFD [BOFFSET2] = f78, SIZE
  2573. ;;
  2574. STFD [BOFFSET] = f69, SIZE
  2575. STFD [BOFFSET2] = f71, SIZE
  2576. ;;
  2577. STFD [BOFFSET] = f77, -11 * SIZE
  2578. STFD [BOFFSET2] = f79, -11 * SIZE
  2579. ;;
  2580. adds C9 = 4 * SIZE, C1
  2581. ;;
  2582. #endif
  2583. #ifdef RN
  2584. LDFPD f32, f33 = [BOFFSET]
  2585. adds BOFFSET = 3 * SIZE, BOFFSET
  2586. ;;
  2587. LDFD f34 = [BOFFSET], -3 * SIZE
  2588. ;;
  2589. FMPY f64 = f64, f32
  2590. FMPY f68 = f68, f32
  2591. FMPY f65 = f65, f32
  2592. FMPY f69 = f69, f32
  2593. FMPY f66 = f66, f32
  2594. FMPY f70 = f70, f32
  2595. FMPY f67 = f67, f32
  2596. FMPY f71 = f71, f32
  2597. ;;
  2598. FNMA f72 = f64, f33, f72
  2599. FNMA f76 = f68, f33, f76
  2600. FNMA f73 = f65, f33, f73
  2601. FNMA f77 = f69, f33, f77
  2602. FNMA f74 = f66, f33, f74
  2603. FNMA f78 = f70, f33, f78
  2604. FNMA f75 = f67, f33, f75
  2605. FNMA f79 = f71, f33, f79
  2606. ;;
  2607. FMPY f72 = f72, f34
  2608. FMPY f76 = f76, f34
  2609. FMPY f73 = f73, f34
  2610. FMPY f77 = f77, f34
  2611. FMPY f74 = f74, f34
  2612. FMPY f78 = f78, f34
  2613. FMPY f75 = f75, f34
  2614. FMPY f79 = f79, f34
  2615. ;;
  2616. STFD [AOFFSET] = f64, SIZE
  2617. STFD [AOFFSET2] = f68, SIZE
  2618. ;;
  2619. STFD [AOFFSET] = f65, SIZE
  2620. STFD [AOFFSET2] = f69, SIZE
  2621. ;;
  2622. STFD [AOFFSET] = f66, SIZE
  2623. STFD [AOFFSET2] = f70, SIZE
  2624. ;;
  2625. STFD [AOFFSET] = f67, 5 * SIZE
  2626. STFD [AOFFSET2] = f71, 5 * SIZE
  2627. ;;
  2628. STFD [AOFFSET] = f72, SIZE
  2629. STFD [AOFFSET2] = f76, SIZE
  2630. ;;
  2631. STFD [AOFFSET] = f73, SIZE
  2632. STFD [AOFFSET2] = f77, SIZE
  2633. ;;
  2634. STFD [AOFFSET] = f74, SIZE
  2635. STFD [AOFFSET2] = f78, SIZE
  2636. ;;
  2637. STFD [AOFFSET] = f75, -11 * SIZE
  2638. STFD [AOFFSET2] = f79, -11 * SIZE
  2639. ;;
  2640. #endif
  2641. #ifdef RT
  2642. adds BOFFSET = 2 * SIZE, BOFFSET
  2643. ;;
  2644. LDFPD f33, f32 = [BOFFSET]
  2645. adds BOFFSET = - 2 * SIZE, BOFFSET
  2646. ;;
  2647. LDFD f34 = [BOFFSET]
  2648. ;;
  2649. FMPY f72 = f72, f32
  2650. FMPY f76 = f76, f32
  2651. FMPY f73 = f73, f32
  2652. FMPY f77 = f77, f32
  2653. FMPY f74 = f74, f32
  2654. FMPY f78 = f78, f32
  2655. FMPY f75 = f75, f32
  2656. FMPY f79 = f79, f32
  2657. ;;
  2658. FNMA f64 = f72, f33, f64
  2659. FNMA f68 = f76, f33, f68
  2660. FNMA f65 = f73, f33, f65
  2661. FNMA f69 = f77, f33, f69
  2662. FNMA f66 = f74, f33, f66
  2663. FNMA f70 = f78, f33, f70
  2664. FNMA f67 = f75, f33, f67
  2665. FNMA f71 = f79, f33, f71
  2666. ;;
  2667. FMPY f64 = f64, f34
  2668. FMPY f68 = f68, f34
  2669. FMPY f65 = f65, f34
  2670. FMPY f69 = f69, f34
  2671. FMPY f66 = f66, f34
  2672. FMPY f70 = f70, f34
  2673. FMPY f67 = f67, f34
  2674. FMPY f71 = f71, f34
  2675. ;;
  2676. adds AOFFSET = 8 * SIZE, AOFFSET
  2677. adds AOFFSET2 = 8 * SIZE, AOFFSET2
  2678. ;;
  2679. STFD [AOFFSET] = f72, SIZE
  2680. STFD [AOFFSET2] = f76, SIZE
  2681. ;;
  2682. STFD [AOFFSET] = f73, SIZE
  2683. STFD [AOFFSET2] = f77, SIZE
  2684. ;;
  2685. STFD [AOFFSET] = f74, SIZE
  2686. STFD [AOFFSET2] = f78, SIZE
  2687. ;;
  2688. STFD [AOFFSET] = f75, - 11 * SIZE
  2689. STFD [AOFFSET2] = f79, - 11 * SIZE
  2690. ;;
  2691. STFD [AOFFSET] = f64, SIZE
  2692. STFD [AOFFSET2] = f68, SIZE
  2693. ;;
  2694. STFD [AOFFSET] = f65, SIZE
  2695. STFD [AOFFSET2] = f69, SIZE
  2696. ;;
  2697. STFD [AOFFSET] = f66, SIZE
  2698. STFD [AOFFSET2] = f70, SIZE
  2699. ;;
  2700. STFD [AOFFSET] = f67, - 3 * SIZE
  2701. STFD [AOFFSET2] = f71, - 3 * SIZE
  2702. ;;
  2703. #endif
  2704. adds C9 = 4 * SIZE, C1
  2705. ;;
  2706. { .mmf
  2707. STFD [C1 ] = f64, SIZE
  2708. STFD [C9 ] = f68, SIZE
  2709. mov f64 = f0
  2710. }
  2711. ;;
  2712. { .mmi
  2713. STFD [C1 ] = f65, SIZE
  2714. STFD [C9 ] = f69, SIZE
  2715. adds C10 = 4 * SIZE, C2
  2716. }
  2717. ;;
  2718. { .mmi
  2719. STFD [C1 ] = f66, SIZE
  2720. STFD [C9 ] = f70, SIZE
  2721. }
  2722. ;;
  2723. { .mmi
  2724. #ifndef LN
  2725. STFD [C1 ] = f67, 5 * SIZE
  2726. #else
  2727. STFD [C1 ] = f67, - 3 * SIZE
  2728. #endif
  2729. STFD [C9 ] = f71
  2730. adds C11 = 4 * SIZE, C3
  2731. }
  2732. ;;
  2733. { .mmf
  2734. STFD [C2 ] = f72, SIZE
  2735. STFD [C10] = f76, SIZE
  2736. mov f72 = f0
  2737. }
  2738. ;;
  2739. { .mmi
  2740. STFD [C2 ] = f73, SIZE
  2741. STFD [C10] = f77, SIZE
  2742. }
  2743. ;;
  2744. { .mmi
  2745. STFD [C2 ] = f74, SIZE
  2746. STFD [C10] = f78, SIZE
  2747. adds C12 = 4 * SIZE, C4
  2748. }
  2749. ;;
  2750. { .mmi
  2751. #ifndef LN
  2752. STFD [C2 ] = f75, 5 * SIZE
  2753. #else
  2754. STFD [C2 ] = f75, - 3 * SIZE
  2755. #endif
  2756. STFD [C10] = f79
  2757. }
  2758. ;;
  2759. { .mmf
  2760. cmp.ne p6, p0 = 1, I
  2761. }
  2762. ;;
  2763. adds I = -1, I
  2764. ;;
  2765. { .mmi
  2766. shladd r2 = K, BASE_SHIFT, r0
  2767. }
  2768. ;;
  2769. { .mmi
  2770. sub L = K, KK
  2771. }
  2772. ;;
  2773. { .mmi
  2774. #ifdef RT
  2775. shladd AORIG = r2, 3, AORIG
  2776. #else
  2777. nop __LINE__
  2778. #endif
  2779. }
  2780. ;;
  2781. { .mmi
  2782. #if defined(LT) || defined(RN)
  2783. shladd L = L, BASE_SHIFT, r0
  2784. #else
  2785. nop __LINE__
  2786. #endif
  2787. }
  2788. ;;
  2789. ;;
  2790. { .mmi
  2791. #if defined(LT) || defined(RN)
  2792. shladd AOFFSET = L, 3, AOFFSET
  2793. #else
  2794. nop __LINE__
  2795. #endif
  2796. }
  2797. ;;
  2798. { .mmi
  2799. #if defined(LT) || defined(RN)
  2800. shladd BOFFSET = L, 1, BOFFSET
  2801. #else
  2802. nop __LINE__
  2803. #endif
  2804. }
  2805. ;;
  2806. { .mmi
  2807. #ifdef LT
  2808. adds KK = 8, KK
  2809. #elif defined LN
  2810. adds KK = -8, KK
  2811. #else
  2812. nop __LINE__
  2813. #endif
  2814. }
  2815. ;;
  2816. { .mmi
  2817. #if defined(LT) || defined(RN)
  2818. mov L = KK
  2819. #else
  2820. sub L = K, KK
  2821. #endif
  2822. }
  2823. ;;
  2824. mov f64 = f0
  2825. mov f65 = f0
  2826. mov f66 = f0
  2827. mov f67 = f0
  2828. mov f72 = f0
  2829. mov f73 = f0
  2830. mov f74 = f0
  2831. mov f75 = f0
  2832. (p6) br.cond.dptk .L092
  2833. ;;
  2834. .align 8
  2835. .L100:
  2836. tbit.z p6, p7 = M, 2
  2837. (p6) br.cond.dptk .L110
  2838. ;;
  2839. { .mib
  2840. #if defined(LT) || defined(RN)
  2841. mov L = KK
  2842. #else
  2843. sub L = K, KK
  2844. #endif
  2845. }
  2846. ;;
  2847. { .mmi
  2848. cmp.ne p7, p0 = r0, L
  2849. adds BOFFSET = 0 * SIZE, B
  2850. shl r2 = K, 2 + BASE_SHIFT
  2851. }
  2852. { .mmi
  2853. shladd r3 = KK, BASE_SHIFT, r0
  2854. nop __LINE__
  2855. nop __LINE__
  2856. }
  2857. ;;
  2858. #if defined(LT) || defined(RN)
  2859. { .mmf
  2860. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  2861. mov f65 = f0
  2862. }
  2863. ;;
  2864. #else
  2865. { .mfi
  2866. shladd BOFFSET = r3, 1, B
  2867. #ifdef LN
  2868. sub AORIG = AORIG, r2
  2869. #else
  2870. nop __LINE__
  2871. #endif
  2872. }
  2873. ;;
  2874. { .mfi
  2875. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  2876. shladd AOFFSET = r3, 2, AORIG
  2877. }
  2878. ;;
  2879. #endif
  2880. { .mfi
  2881. adds L = 1, L
  2882. }
  2883. { .mfi
  2884. adds PREA = (PREFETCHSIZE + 8) * SIZE, AOFFSET
  2885. cmp.eq p3, p0 = r0, r0
  2886. }
  2887. ;;
  2888. { .mfi
  2889. tbit.z p12, p0 = L, 0
  2890. }
  2891. { .mfi
  2892. shr L = L, 1
  2893. }
  2894. ;;
  2895. { .mfi
  2896. adds L = -1, L
  2897. }
  2898. ;;
  2899. { .mfi
  2900. cmp.eq p6, p0 = -1, L
  2901. }
  2902. ;;
  2903. { .mmf
  2904. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  2905. }
  2906. { .mfi
  2907. mov ar.lc = L
  2908. }
  2909. ;;
  2910. { .mmf
  2911. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  2912. }
  2913. { .mfb
  2914. (p6) br.cond.dpnt .L108
  2915. }
  2916. ;;
  2917. .L102:
  2918. { .mfi
  2919. lfetch.nt1 [PREA], 8 * SIZE
  2920. FMA f64 = f32, f48, f64 // A1 * B1
  2921. cmp.ne p4, p5 = 0, L
  2922. }
  2923. { .mfi
  2924. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  2925. FMA f72 = f32, f49, f72 // A1 * B2
  2926. (p12) cmp.ne p3, p0 = 0, L
  2927. }
  2928. ;;
  2929. { .mfi
  2930. lfetch.nt1 [PREB], 4 * SIZE
  2931. FMA f65 = f33, f48, f65 // A2 * B1
  2932. adds C9 = 2 * SIZE, C1
  2933. }
  2934. { .mfi
  2935. nop __LINE__
  2936. FMA f73 = f33, f49, f73 // A2 * B2
  2937. adds C10 = 2 * SIZE, C2
  2938. }
  2939. ;;
  2940. { .mfb
  2941. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  2942. FMA f66 = f34, f48, f66 // A3 * B1
  2943. nop __LINE__
  2944. }
  2945. { .mfb
  2946. nop __LINE__
  2947. FMA f74 = f34, f49, f74 // A3 * B2
  2948. nop __LINE__
  2949. }
  2950. ;;
  2951. { .mfb
  2952. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  2953. FMA f67 = f35, f48, f67 // A4 * B1
  2954. nop __LINE__
  2955. }
  2956. { .mfb
  2957. nop __LINE__
  2958. FMA f75 = f35, f49, f75 // A4 * B2
  2959. nop __LINE__
  2960. }
  2961. ;;
  2962. { .mfb
  2963. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  2964. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  2965. nop __LINE__
  2966. }
  2967. { .mfb
  2968. nop __LINE__
  2969. (p3) FMA f72 = f40, f57, f72 // A1 * B2
  2970. nop __LINE__
  2971. }
  2972. ;;
  2973. { .mfb
  2974. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  2975. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  2976. nop __LINE__
  2977. }
  2978. { .mfb
  2979. nop __LINE__
  2980. (p3) FMA f73 = f41, f57, f73 // A2 * B2
  2981. nop __LINE__
  2982. }
  2983. ;;
  2984. { .mfb
  2985. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  2986. (p3) FMA f66 = f42, f56, f66 // A3 * B1
  2987. nop __LINE__
  2988. }
  2989. { .mfb
  2990. nop __LINE__
  2991. (p3) FMA f74 = f42, f57, f74 // A3 * B2
  2992. nop __LINE__
  2993. }
  2994. ;;
  2995. { .mfi
  2996. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  2997. (p3) FMA f67 = f43, f56, f67 // A4 * B1
  2998. adds L = -1, L
  2999. }
  3000. { .mfb
  3001. nop __LINE__
  3002. (p3) FMA f75 = f43, f57, f75 // A4 * B2
  3003. br.cloop.sptk.few .L102
  3004. }
  3005. ;;
  3006. .align 8
  3007. .L108:
  3008. #if defined(LN) || defined(RT)
  3009. #ifdef LN
  3010. adds r2 = -4, KK
  3011. #else
  3012. adds r2 = -2, KK
  3013. #endif
  3014. ;;
  3015. shladd r2 = r2, BASE_SHIFT, r0
  3016. ;;
  3017. shladd AOFFSET = r2, 2, AORIG
  3018. shladd BOFFSET = r2, 1, B
  3019. ;;
  3020. #endif
  3021. adds AOFFSET2 = 4 * SIZE, AOFFSET
  3022. adds BOFFSET2 = 4 * SIZE, BOFFSET
  3023. ;;
  3024. #if defined(LN) || defined(LT)
  3025. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  3026. ;;
  3027. LDFPD f34, f35 = [BOFFSET], 2 * SIZE
  3028. ;;
  3029. LDFPD f36, f37 = [BOFFSET], 2 * SIZE
  3030. ;;
  3031. LDFPD f38, f39 = [BOFFSET]
  3032. adds BOFFSET = -6 * SIZE, BOFFSET
  3033. ;;
  3034. FSUB f64 = f32, f64
  3035. FSUB f72 = f33, f72
  3036. ;;
  3037. FSUB f65 = f34, f65
  3038. FSUB f73 = f35, f73
  3039. ;;
  3040. FSUB f66 = f36, f66
  3041. FSUB f74 = f37, f74
  3042. ;;
  3043. FSUB f67 = f38, f67
  3044. FSUB f75 = f39, f75
  3045. ;;
  3046. #else
  3047. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  3048. ;;
  3049. LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  3050. ;;
  3051. LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  3052. ;;
  3053. LDFPD f38, f39 = [AOFFSET]
  3054. adds AOFFSET = -6 * SIZE, AOFFSET
  3055. ;;
  3056. FSUB f64 = f32, f64
  3057. FSUB f65 = f33, f65
  3058. FSUB f66 = f34, f66
  3059. FSUB f67 = f35, f67
  3060. FSUB f72 = f36, f72
  3061. FSUB f73 = f37, f73
  3062. FSUB f74 = f38, f74
  3063. FSUB f75 = f39, f75
  3064. ;;
  3065. #endif
  3066. #ifdef LN
  3067. adds AOFFSET = 14 * SIZE, AOFFSET
  3068. ;;
  3069. LDFPD f33, f32 = [AOFFSET]
  3070. adds AOFFSET = - 2 * SIZE, AOFFSET
  3071. ;;
  3072. LDFPD f35, f34 = [AOFFSET]
  3073. adds AOFFSET = - 2 * SIZE, AOFFSET
  3074. ;;
  3075. LDFD f36 = [AOFFSET], - 2 * SIZE
  3076. ;;
  3077. LDFPD f38, f37 = [AOFFSET]
  3078. adds AOFFSET = - 4 * SIZE, AOFFSET
  3079. ;;
  3080. LDFPD f40, f39 = [AOFFSET]
  3081. adds AOFFSET = - 4 * SIZE, AOFFSET
  3082. ;;
  3083. LDFD f41 = [AOFFSET]
  3084. ;;
  3085. FMPY f67 = f67, f32
  3086. FMPY f75 = f75, f32
  3087. ;;
  3088. FNMA f66 = f67, f33, f66
  3089. FNMA f74 = f75, f33, f74
  3090. ;;
  3091. FNMA f65 = f67, f34, f65
  3092. FNMA f73 = f75, f34, f73
  3093. ;;
  3094. FNMA f64 = f67, f35, f64
  3095. FNMA f72 = f75, f35, f72
  3096. ;;
  3097. FMPY f66 = f66, f36
  3098. FMPY f74 = f74, f36
  3099. ;;
  3100. FNMA f65 = f66, f37, f65
  3101. FNMA f73 = f74, f37, f73
  3102. ;;
  3103. FNMA f64 = f66, f38, f64
  3104. FNMA f72 = f74, f38, f72
  3105. ;;
  3106. FMPY f65 = f65, f39
  3107. FMPY f73 = f73, f39
  3108. ;;
  3109. FNMA f64 = f65, f40, f64
  3110. FNMA f72 = f73, f40, f72
  3111. ;;
  3112. FMPY f64 = f64, f41
  3113. FMPY f72 = f72, f41
  3114. ;;
  3115. STFD [BOFFSET] = f64, SIZE
  3116. STFD [BOFFSET2] = f66, SIZE
  3117. ;;
  3118. STFD [BOFFSET] = f72, SIZE
  3119. STFD [BOFFSET2] = f74, SIZE
  3120. ;;
  3121. STFD [BOFFSET] = f65, SIZE
  3122. STFD [BOFFSET2] = f67, SIZE
  3123. ;;
  3124. STFD [BOFFSET] = f73, -3 * SIZE
  3125. STFD [BOFFSET2] = f75, -3 * SIZE
  3126. ;;
  3127. adds C1 = -4 * SIZE, C1
  3128. adds C2 = -4 * SIZE, C2
  3129. ;;
  3130. #endif
  3131. #ifdef LT
  3132. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  3133. ;;
  3134. LDFPD f34, f35 = [AOFFSET]
  3135. adds AOFFSET = 3 * SIZE, AOFFSET
  3136. ;;
  3137. LDFD f36 = [AOFFSET], 1 * SIZE
  3138. ;;
  3139. LDFPD f37, f38 = [AOFFSET]
  3140. adds AOFFSET = 4 * SIZE, AOFFSET
  3141. ;;
  3142. LDFPD f39, f40 = [AOFFSET]
  3143. adds AOFFSET = 5 * SIZE, AOFFSET
  3144. ;;
  3145. LDFD f41 = [AOFFSET], -15 * SIZE
  3146. ;;
  3147. FMPY f64 = f64, f32
  3148. FMPY f72 = f72, f32
  3149. ;;
  3150. FNMA f65 = f64, f33, f65
  3151. FNMA f73 = f72, f33, f73
  3152. ;;
  3153. FNMA f66 = f64, f34, f66
  3154. FNMA f74 = f72, f34, f74
  3155. ;;
  3156. FNMA f67 = f64, f35, f67
  3157. FNMA f75 = f72, f35, f75
  3158. ;;
  3159. FMPY f65 = f65, f36
  3160. FMPY f73 = f73, f36
  3161. ;;
  3162. FNMA f66 = f65, f37, f66
  3163. FNMA f74 = f73, f37, f74
  3164. ;;
  3165. FNMA f67 = f65, f38, f67
  3166. FNMA f75 = f73, f38, f75
  3167. ;;
  3168. FMPY f66 = f66, f39
  3169. FMPY f74 = f74, f39
  3170. ;;
  3171. FNMA f67 = f66, f40, f67
  3172. FNMA f75 = f74, f40, f75
  3173. ;;
  3174. FMPY f67 = f67, f41
  3175. FMPY f75 = f75, f41
  3176. ;;
  3177. STFD [BOFFSET] = f64, SIZE
  3178. STFD [BOFFSET2] = f66, SIZE
  3179. ;;
  3180. STFD [BOFFSET] = f72, SIZE
  3181. STFD [BOFFSET2] = f74, SIZE
  3182. ;;
  3183. STFD [BOFFSET] = f65, SIZE
  3184. STFD [BOFFSET2] = f67, SIZE
  3185. ;;
  3186. STFD [BOFFSET] = f73, -3 * SIZE
  3187. STFD [BOFFSET2] = f75, -3 * SIZE
  3188. ;;
  3189. #endif
  3190. #ifdef RN
  3191. LDFPD f32, f33 = [BOFFSET]
  3192. adds BOFFSET = 3 * SIZE, BOFFSET
  3193. ;;
  3194. LDFD f34 = [BOFFSET], -3 * SIZE
  3195. ;;
  3196. FMPY f64 = f64, f32
  3197. FMPY f65 = f65, f32
  3198. FMPY f66 = f66, f32
  3199. FMPY f67 = f67, f32
  3200. ;;
  3201. FNMA f72 = f64, f33, f72
  3202. FNMA f73 = f65, f33, f73
  3203. FNMA f74 = f66, f33, f74
  3204. FNMA f75 = f67, f33, f75
  3205. ;;
  3206. FMPY f72 = f72, f34
  3207. FMPY f73 = f73, f34
  3208. FMPY f74 = f74, f34
  3209. FMPY f75 = f75, f34
  3210. ;;
  3211. STFD [AOFFSET] = f64, SIZE
  3212. STFD [AOFFSET2] = f72, SIZE
  3213. ;;
  3214. STFD [AOFFSET] = f65, SIZE
  3215. STFD [AOFFSET2] = f73, SIZE
  3216. ;;
  3217. STFD [AOFFSET] = f66, SIZE
  3218. STFD [AOFFSET2] = f74, SIZE
  3219. ;;
  3220. STFD [AOFFSET] = f67, -3 * SIZE
  3221. STFD [AOFFSET2] = f75, -3 * SIZE
  3222. ;;
  3223. #endif
  3224. #ifdef RT
  3225. adds BOFFSET = 2 * SIZE, BOFFSET
  3226. ;;
  3227. LDFPD f33, f32 = [BOFFSET]
  3228. adds BOFFSET = - 2 * SIZE, BOFFSET
  3229. ;;
  3230. LDFD f34 = [BOFFSET]
  3231. ;;
  3232. FMPY f72 = f72, f32
  3233. FMPY f73 = f73, f32
  3234. FMPY f74 = f74, f32
  3235. FMPY f75 = f75, f32
  3236. ;;
  3237. FNMA f64 = f72, f33, f64
  3238. FNMA f65 = f73, f33, f65
  3239. FNMA f66 = f74, f33, f66
  3240. FNMA f67 = f75, f33, f67
  3241. ;;
  3242. FMPY f64 = f64, f34
  3243. FMPY f65 = f65, f34
  3244. FMPY f66 = f66, f34
  3245. FMPY f67 = f67, f34
  3246. ;;
  3247. STFD [AOFFSET] = f64, SIZE
  3248. STFD [AOFFSET2] = f72, SIZE
  3249. ;;
  3250. STFD [AOFFSET] = f65, SIZE
  3251. STFD [AOFFSET2] = f73, SIZE
  3252. ;;
  3253. STFD [AOFFSET] = f66, SIZE
  3254. STFD [AOFFSET2] = f74, SIZE
  3255. ;;
  3256. STFD [AOFFSET] = f67, - 3 * SIZE
  3257. STFD [AOFFSET2] = f75, - 3 * SIZE
  3258. ;;
  3259. #endif
  3260. { .mmf
  3261. STFD [C1 ] = f64, SIZE
  3262. mov f64 = f0
  3263. }
  3264. ;;
  3265. { .mmi
  3266. STFD [C1 ] = f65, SIZE
  3267. }
  3268. ;;
  3269. { .mmi
  3270. STFD [C1 ] = f66, SIZE
  3271. }
  3272. ;;
  3273. { .mmi
  3274. #ifndef LN
  3275. STFD [C1 ] = f67, SIZE
  3276. #else
  3277. STFD [C1 ] = f67, - 3 * SIZE
  3278. #endif
  3279. }
  3280. ;;
  3281. { .mmf
  3282. STFD [C2 ] = f72, SIZE
  3283. mov f72 = f0
  3284. }
  3285. ;;
  3286. { .mmi
  3287. STFD [C2 ] = f73, SIZE
  3288. }
  3289. ;;
  3290. { .mmi
  3291. STFD [C2 ] = f74, SIZE
  3292. }
  3293. ;;
  3294. { .mmi
  3295. #ifndef LN
  3296. STFD [C2 ] = f75, SIZE
  3297. #else
  3298. STFD [C2 ] = f75, - 3 * SIZE
  3299. #endif
  3300. }
  3301. ;;
  3302. mov f65 = f0
  3303. mov f73 = f0
  3304. mov f66 = f0
  3305. mov f74 = f0
  3306. mov f67 = f0
  3307. mov f75 = f0
  3308. ;;
  3309. shladd r2 = K, BASE_SHIFT, r0
  3310. ;;
  3311. { .mmi
  3312. sub L = K, KK
  3313. }
  3314. ;;
  3315. { .mmi
  3316. #ifdef RT
  3317. shladd AORIG = r2, 2, AORIG
  3318. #else
  3319. nop __LINE__
  3320. #endif
  3321. }
  3322. ;;
  3323. { .mmi
  3324. #if defined(LT) || defined(RN)
  3325. shladd L = L, BASE_SHIFT, r0
  3326. #else
  3327. nop __LINE__
  3328. #endif
  3329. }
  3330. ;;
  3331. { .mmi
  3332. #if defined(LT) || defined(RN)
  3333. shladd AOFFSET = L, 2, AOFFSET
  3334. #else
  3335. nop __LINE__
  3336. #endif
  3337. }
  3338. ;;
  3339. { .mmi
  3340. #if defined(LT) || defined(RN)
  3341. shladd BOFFSET = L, 1, BOFFSET
  3342. #else
  3343. nop __LINE__
  3344. #endif
  3345. }
  3346. ;;
  3347. { .mmi
  3348. #ifdef LT
  3349. adds KK = 4, KK
  3350. #elif defined LN
  3351. adds KK = -4, KK
  3352. #else
  3353. nop __LINE__
  3354. #endif
  3355. }
  3356. ;;
  3357. { .mmi
  3358. #if defined(LT) || defined(RN)
  3359. mov L = KK
  3360. #else
  3361. sub L = K, KK
  3362. #endif
  3363. }
  3364. ;;
  3365. .align 8
  3366. .L110:
  3367. tbit.z p6, p7 = M, 1
  3368. (p6) br.cond.dptk .L120
  3369. ;;
  3370. { .mib
  3371. #if defined(LT) || defined(RN)
  3372. mov L = KK
  3373. #else
  3374. sub L = K, KK
  3375. #endif
  3376. }
  3377. ;;
  3378. { .mmi
  3379. cmp.ne p7, p0 = r0, L
  3380. adds BOFFSET = 0 * SIZE, B
  3381. shl r2 = K, 1 + BASE_SHIFT
  3382. }
  3383. { .mmi
  3384. shladd r3 = KK, BASE_SHIFT, r0
  3385. nop __LINE__
  3386. nop __LINE__
  3387. }
  3388. ;;
  3389. #if defined(LT) || defined(RN)
  3390. { .mmf
  3391. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  3392. }
  3393. ;;
  3394. #else
  3395. { .mfi
  3396. shladd BOFFSET = r3, 1, B
  3397. #ifdef LN
  3398. sub AORIG = AORIG, r2
  3399. #else
  3400. nop __LINE__
  3401. #endif
  3402. }
  3403. ;;
  3404. { .mfi
  3405. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  3406. shladd AOFFSET = r3, 1, AORIG
  3407. }
  3408. ;;
  3409. #endif
  3410. { .mfi
  3411. adds L = 1, L
  3412. }
  3413. { .mfi
  3414. adds PREA = (PREFETCHSIZE + 8) * SIZE, AOFFSET
  3415. cmp.eq p3, p0 = r0, r0
  3416. }
  3417. ;;
  3418. { .mfi
  3419. tbit.z p12, p0 = L, 0
  3420. }
  3421. { .mfi
  3422. shr L = L, 1
  3423. }
  3424. ;;
  3425. { .mmf
  3426. adds L = -1, L
  3427. }
  3428. ;;
  3429. { .mmf
  3430. cmp.eq p6, p0 = -1, L
  3431. }
  3432. ;;
  3433. { .mib
  3434. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  3435. mov ar.lc = L
  3436. (p6) br.cond.dpnt .L118
  3437. }
  3438. ;;
  3439. .L112:
  3440. { .mfi
  3441. lfetch.nt1 [PREA], 4 * SIZE
  3442. FMA f64 = f32, f48, f64 // A1 * B1
  3443. cmp.ne p4, p5 = 0, L
  3444. }
  3445. { .mfi
  3446. lfetch.nt1 [PREB], 4 * SIZE
  3447. FMA f72 = f32, f49, f72 // A1 * B2
  3448. (p12) cmp.ne p3, p0 = 0, L
  3449. }
  3450. ;;
  3451. { .mmf
  3452. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  3453. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  3454. FMA f65 = f33, f48, f65 // A2 * B1
  3455. }
  3456. { .mmf
  3457. nop __LINE__
  3458. nop __LINE__
  3459. FMA f73 = f33, f49, f73 // A2 * B2
  3460. }
  3461. ;;
  3462. { .mfb
  3463. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  3464. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  3465. nop __LINE__
  3466. }
  3467. { .mfb
  3468. nop __LINE__
  3469. (p3) FMA f72 = f40, f57, f72 // A1 * B2
  3470. nop __LINE__
  3471. }
  3472. ;;
  3473. { .mfi
  3474. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  3475. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  3476. adds L = -1, L
  3477. }
  3478. { .mfb
  3479. nop __LINE__
  3480. (p3) FMA f73 = f41, f57, f73 // A2 * B2
  3481. br.cloop.sptk.few .L112
  3482. }
  3483. ;;
  3484. .align 8
  3485. .L118:
  3486. #if defined(LN) || defined(RT)
  3487. #ifdef LN
  3488. adds r2 = -2, KK
  3489. #else
  3490. adds r2 = -2, KK
  3491. #endif
  3492. ;;
  3493. shladd r2 = r2, BASE_SHIFT, r0
  3494. ;;
  3495. shladd AOFFSET = r2, 1, AORIG
  3496. shladd BOFFSET = r2, 1, B
  3497. ;;
  3498. #endif
  3499. adds AOFFSET2 = 4 * SIZE, AOFFSET
  3500. adds BOFFSET2 = 4 * SIZE, BOFFSET
  3501. ;;
  3502. #if defined(LN) || defined(LT)
  3503. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  3504. ;;
  3505. LDFPD f34, f35 = [BOFFSET]
  3506. adds BOFFSET = -2 * SIZE, BOFFSET
  3507. ;;
  3508. FSUB f64 = f32, f64
  3509. FSUB f72 = f33, f72
  3510. FSUB f65 = f34, f65
  3511. FSUB f73 = f35, f73
  3512. ;;
  3513. #else
  3514. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  3515. ;;
  3516. LDFPD f34, f35 = [AOFFSET]
  3517. adds AOFFSET = -2 * SIZE, AOFFSET
  3518. ;;
  3519. FSUB f64 = f32, f64
  3520. FSUB f65 = f33, f65
  3521. FSUB f72 = f34, f72
  3522. FSUB f73 = f35, f73
  3523. ;;
  3524. #endif
  3525. #ifdef LN
  3526. adds AOFFSET = 2 * SIZE, AOFFSET
  3527. ;;
  3528. LDFPD f33, f32 = [AOFFSET]
  3529. adds AOFFSET = - 2 * SIZE, AOFFSET
  3530. ;;
  3531. LDFD f34 = [AOFFSET]
  3532. ;;
  3533. FMPY f65 = f65, f32
  3534. FMPY f73 = f73, f32
  3535. ;;
  3536. FNMA f64 = f65, f33, f64
  3537. FNMA f72 = f73, f33, f72
  3538. ;;
  3539. FMPY f64 = f64, f34
  3540. FMPY f72 = f72, f34
  3541. ;;
  3542. STFD [BOFFSET] = f64, SIZE
  3543. ;;
  3544. STFD [BOFFSET] = f72, SIZE
  3545. ;;
  3546. STFD [BOFFSET] = f65, SIZE
  3547. ;;
  3548. STFD [BOFFSET] = f73, - 3 * SIZE
  3549. ;;
  3550. adds C1 = -2 * SIZE, C1
  3551. adds C2 = -2 * SIZE, C2
  3552. ;;
  3553. #endif
  3554. #ifdef LT
  3555. LDFPD f32, f33 = [AOFFSET]
  3556. adds AOFFSET = 3 * SIZE, AOFFSET
  3557. ;;
  3558. LDFD f34 = [AOFFSET], - 3 * SIZE
  3559. ;;
  3560. FMPY f64 = f64, f32
  3561. FMPY f72 = f72, f32
  3562. ;;
  3563. FNMA f65 = f64, f33, f65
  3564. FNMA f73 = f72, f33, f73
  3565. ;;
  3566. FMPY f65 = f65, f34
  3567. FMPY f73 = f73, f34
  3568. ;;
  3569. STFD [BOFFSET] = f64, SIZE
  3570. ;;
  3571. STFD [BOFFSET] = f72, SIZE
  3572. ;;
  3573. STFD [BOFFSET] = f65, SIZE
  3574. ;;
  3575. STFD [BOFFSET] = f73, -3 * SIZE
  3576. ;;
  3577. #endif
  3578. #ifdef RN
  3579. LDFPD f32, f33 = [BOFFSET]
  3580. adds BOFFSET = 3 * SIZE, BOFFSET
  3581. ;;
  3582. LDFD f34 = [BOFFSET], -3 * SIZE
  3583. ;;
  3584. FMPY f64 = f64, f32
  3585. FMPY f65 = f65, f32
  3586. ;;
  3587. FNMA f72 = f64, f33, f72
  3588. FNMA f73 = f65, f33, f73
  3589. ;;
  3590. FMPY f72 = f72, f34
  3591. FMPY f73 = f73, f34
  3592. ;;
  3593. STFD [AOFFSET] = f64, SIZE
  3594. ;;
  3595. STFD [AOFFSET] = f65, SIZE
  3596. ;;
  3597. STFD [AOFFSET] = f72, SIZE
  3598. ;;
  3599. STFD [AOFFSET] = f73, -3 * SIZE
  3600. ;;
  3601. #endif
  3602. #ifdef RT
  3603. adds BOFFSET = 2 * SIZE, BOFFSET
  3604. ;;
  3605. LDFPD f33, f32 = [BOFFSET]
  3606. adds BOFFSET = - 2 * SIZE, BOFFSET
  3607. ;;
  3608. LDFD f34 = [BOFFSET]
  3609. ;;
  3610. FMPY f72 = f72, f32
  3611. FMPY f73 = f73, f32
  3612. ;;
  3613. FNMA f64 = f72, f33, f64
  3614. FNMA f65 = f73, f33, f65
  3615. ;;
  3616. FMPY f64 = f64, f34
  3617. FMPY f65 = f65, f34
  3618. ;;
  3619. STFD [AOFFSET] = f64, SIZE
  3620. ;;
  3621. STFD [AOFFSET] = f65, SIZE
  3622. ;;
  3623. STFD [AOFFSET] = f72, SIZE
  3624. ;;
  3625. STFD [AOFFSET] = f73, -3 * SIZE
  3626. ;;
  3627. #endif
  3628. STFD [C1 ] = f64, SIZE
  3629. mov f64 = f0
  3630. ;;
  3631. #ifndef LN
  3632. STFD [C1 ] = f65, SIZE
  3633. #else
  3634. STFD [C1 ] = f65, -SIZE
  3635. #endif
  3636. ;;
  3637. STFD [C2 ] = f72, SIZE
  3638. mov f72 = f0
  3639. ;;
  3640. #ifndef LN
  3641. STFD [C2 ] = f73, SIZE
  3642. #else
  3643. STFD [C2 ] = f73, -SIZE
  3644. #endif
  3645. ;;
  3646. mov f65 = f0
  3647. mov f73 = f0
  3648. ;;
  3649. shladd r2 = K, BASE_SHIFT, r0
  3650. ;;
  3651. sub L = K, KK
  3652. ;;
  3653. #ifdef RT
  3654. shladd AORIG = r2, 1, AORIG
  3655. #else
  3656. nop __LINE__
  3657. #endif
  3658. ;;
  3659. { .mmi
  3660. #if defined(LT) || defined(RN)
  3661. shladd L = L, BASE_SHIFT, r0
  3662. #else
  3663. nop __LINE__
  3664. #endif
  3665. }
  3666. ;;
  3667. { .mmi
  3668. #if defined(LT) || defined(RN)
  3669. shladd AOFFSET = L, 1, AOFFSET
  3670. #else
  3671. nop __LINE__
  3672. #endif
  3673. }
  3674. ;;
  3675. { .mmi
  3676. #if defined(LT) || defined(RN)
  3677. shladd BOFFSET = L, 1, BOFFSET
  3678. #else
  3679. nop __LINE__
  3680. #endif
  3681. }
  3682. ;;
  3683. { .mmi
  3684. #ifdef LT
  3685. adds KK = 2, KK
  3686. #elif defined LN
  3687. adds KK = -2, KK
  3688. #else
  3689. nop __LINE__
  3690. #endif
  3691. }
  3692. ;;
  3693. { .mmi
  3694. #if defined(LT) || defined(RN)
  3695. mov L = KK
  3696. #else
  3697. sub L = K, KK
  3698. #endif
  3699. }
  3700. ;;
  3701. .align 8
  3702. .L120:
  3703. tbit.z p6, p7 = M, 0
  3704. (p6) br.cond.dptk .L129
  3705. ;;
  3706. { .mib
  3707. #if defined(LT) || defined(RN)
  3708. mov L = KK
  3709. #else
  3710. sub L = K, KK
  3711. #endif
  3712. }
  3713. ;;
  3714. { .mmi
  3715. cmp.ne p7, p0 = r0, L
  3716. adds BOFFSET = 0 * SIZE, B
  3717. shl r2 = K, 0 + BASE_SHIFT
  3718. }
  3719. { .mmi
  3720. shladd r3 = KK, BASE_SHIFT, r0
  3721. nop __LINE__
  3722. nop __LINE__
  3723. }
  3724. ;;
  3725. #if defined(LT) || defined(RN)
  3726. { .mmf
  3727. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  3728. }
  3729. ;;
  3730. #else
  3731. { .mfi
  3732. shladd BOFFSET = r3, 1, B
  3733. #ifdef LN
  3734. sub AORIG = AORIG, r2
  3735. #else
  3736. nop __LINE__
  3737. #endif
  3738. }
  3739. ;;
  3740. { .mfi
  3741. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  3742. add AOFFSET = r3, AORIG
  3743. }
  3744. ;;
  3745. #endif
  3746. { .mmi
  3747. adds L = 1, L
  3748. adds PREA = (PREFETCHSIZE + 8) * SIZE, AOFFSET
  3749. cmp.eq p3, p0 = r0, r0
  3750. }
  3751. ;;
  3752. { .mii
  3753. tbit.z p12, p0 = L, 0
  3754. shr L = L, 1
  3755. }
  3756. ;;
  3757. { .mmi
  3758. adds L = -1, L
  3759. }
  3760. ;;
  3761. { .mmi
  3762. cmp.eq p6, p0 = -1, L
  3763. }
  3764. ;;
  3765. { .mib
  3766. (p7) LDFD f32 = [AOFFSET], 1 * SIZE
  3767. mov ar.lc = L
  3768. (p6) br.cond.dpnt .L128
  3769. }
  3770. ;;
  3771. .align 8
  3772. .L122:
  3773. { .mfi
  3774. FMA f64 = f32, f48, f64 // A1 * B1
  3775. cmp.ne p4, p5 = 0, L
  3776. }
  3777. { .mfi
  3778. nop __LINE__
  3779. FMA f72 = f32, f49, f72 // A1 * B2
  3780. (p12) cmp.ne p3, p0 = 0, L
  3781. }
  3782. ;;
  3783. { .mmi
  3784. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  3785. (p3) LDFD f40 = [AOFFSET], 1 * SIZE
  3786. nop __LINE__
  3787. }
  3788. { .mmi
  3789. nop __LINE__
  3790. nop __LINE__
  3791. nop __LINE__
  3792. }
  3793. ;;
  3794. { .mfi
  3795. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  3796. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  3797. adds L = -1, L
  3798. }
  3799. { .mfb
  3800. (p4) LDFD f32 = [AOFFSET], 1 * SIZE
  3801. (p3) FMA f72 = f40, f57, f72 // A1 * B2
  3802. br.cloop.sptk.few .L122
  3803. }
  3804. ;;
  3805. .L128:
  3806. #if defined(LN) || defined(RT)
  3807. #ifdef LN
  3808. adds r2 = -1, KK
  3809. #else
  3810. adds r2 = -2, KK
  3811. #endif
  3812. ;;
  3813. shladd r2 = r2, BASE_SHIFT, r0
  3814. ;;
  3815. add AOFFSET = r2, AORIG
  3816. shladd BOFFSET = r2, 1, B
  3817. ;;
  3818. #endif
  3819. adds AOFFSET2 = 4 * SIZE, AOFFSET
  3820. adds BOFFSET2 = 4 * SIZE, BOFFSET
  3821. ;;
  3822. #if defined(LN) || defined(LT)
  3823. LDFPD f32, f33 = [BOFFSET]
  3824. ;;
  3825. FSUB f64 = f32, f64
  3826. FSUB f72 = f33, f72
  3827. ;;
  3828. #else
  3829. LDFPD f32, f33 = [AOFFSET]
  3830. ;;
  3831. FSUB f64 = f32, f64
  3832. FSUB f72 = f33, f72
  3833. ;;
  3834. #endif
  3835. #ifdef LN
  3836. LDFD f32 = [AOFFSET]
  3837. ;;
  3838. FMPY f64 = f64, f32
  3839. FMPY f72 = f72, f32
  3840. ;;
  3841. { .mmi
  3842. STFD [BOFFSET] = f64, SIZE
  3843. adds C1 = -1 * SIZE, C1
  3844. }
  3845. ;;
  3846. { .mmi
  3847. STFD [BOFFSET] = f72, -SIZE
  3848. adds C2 = -1 * SIZE, C2
  3849. }
  3850. ;;
  3851. #endif
  3852. #ifdef LT
  3853. LDFD f32 = [AOFFSET]
  3854. ;;
  3855. FMPY f64 = f64, f32
  3856. FMPY f72 = f72, f32
  3857. ;;
  3858. STFD [BOFFSET] = f64, SIZE
  3859. ;;
  3860. STFD [BOFFSET] = f72, -SIZE
  3861. ;;
  3862. #endif
  3863. #ifdef RN
  3864. LDFPD f32, f33 = [BOFFSET]
  3865. adds BOFFSET = 3 * SIZE, BOFFSET
  3866. ;;
  3867. LDFD f34 = [BOFFSET], -3 * SIZE
  3868. ;;
  3869. FMPY f64 = f64, f32
  3870. ;;
  3871. FNMA f72 = f64, f33, f72
  3872. ;;
  3873. FMPY f72 = f72, f34
  3874. ;;
  3875. STFD [AOFFSET] = f64, SIZE
  3876. ;;
  3877. STFD [AOFFSET] = f72, -SIZE
  3878. ;;
  3879. #endif
  3880. #ifdef RT
  3881. adds BOFFSET = 2 * SIZE, BOFFSET
  3882. ;;
  3883. LDFPD f33, f32 = [BOFFSET]
  3884. adds BOFFSET = - 2 * SIZE, BOFFSET
  3885. ;;
  3886. LDFD f34 = [BOFFSET]
  3887. ;;
  3888. FMPY f72 = f72, f32
  3889. ;;
  3890. FNMA f64 = f72, f33, f64
  3891. ;;
  3892. FMPY f64 = f64, f34
  3893. ;;
  3894. STFD [AOFFSET] = f64, SIZE
  3895. ;;
  3896. STFD [AOFFSET] = f72, -SIZE
  3897. ;;
  3898. #endif
  3899. #ifndef LN
  3900. STFD [C1 ] = f64, SIZE
  3901. #else
  3902. STFD [C1 ] = f64
  3903. #endif
  3904. #ifndef LN
  3905. STFD [C2 ] = f72, SIZE
  3906. #else
  3907. STFD [C2 ] = f72
  3908. #endif
  3909. mov f64 = f0
  3910. mov f72 = f0
  3911. ;;
  3912. shladd r2 = K, BASE_SHIFT, r0
  3913. ;;
  3914. sub L = K, KK
  3915. ;;
  3916. #ifdef RT
  3917. add AORIG = r2, AORIG
  3918. #else
  3919. nop __LINE__
  3920. #endif
  3921. ;;
  3922. #if defined(LT) || defined(RN)
  3923. shladd L = L, BASE_SHIFT, r0
  3924. #else
  3925. nop __LINE__
  3926. #endif
  3927. ;;
  3928. #if defined(LT) || defined(RN)
  3929. add AOFFSET = L, AOFFSET
  3930. #else
  3931. nop __LINE__
  3932. #endif
  3933. ;;
  3934. #if defined(LT) || defined(RN)
  3935. shladd BOFFSET = L, 1, BOFFSET
  3936. #else
  3937. nop __LINE__
  3938. #endif
  3939. ;;
  3940. #ifdef LT
  3941. adds KK = 1, KK
  3942. #elif defined LN
  3943. adds KK = -1, KK
  3944. #else
  3945. nop __LINE__
  3946. #endif
  3947. ;;
  3948. #if defined(LT) || defined(RN)
  3949. mov L = KK
  3950. #else
  3951. sub L = K, KK
  3952. #endif
  3953. ;;
  3954. .align 8
  3955. .L129:
  3956. #ifdef LN
  3957. shladd KK8 = K, BASE_SHIFT, r0
  3958. ;;
  3959. shladd B = KK8, 1, B
  3960. #endif
  3961. #if defined(LT) || defined(RN)
  3962. mov B = BOFFSET
  3963. #endif
  3964. #ifdef RN
  3965. adds KK = 2, KK
  3966. #endif
  3967. #ifdef RT
  3968. adds KK = -2, KK
  3969. #endif
  3970. ;;
  3971. mov AOFFSET = A
  3972. ;;
  3973. .align 16
  3974. .L050:
  3975. { .mib
  3976. setf.d f64 = r0
  3977. tbit.z p6, p0 = N, 2
  3978. (p6) br.cond.dpnt .L000
  3979. }
  3980. ;;
  3981. #ifdef RT
  3982. { .mmi
  3983. shladd r3 = LDC, 2, r0
  3984. nop __LINE__
  3985. shl r2 = K, 2 + BASE_SHIFT
  3986. }
  3987. ;;
  3988. { .mmi
  3989. sub B = B, r2
  3990. sub C = C, r3
  3991. nop __LINE__
  3992. }
  3993. #endif
  3994. ;;
  3995. { .mfi
  3996. setf.d f72 = r0
  3997. mov f80 = f0
  3998. shr I = M, 3
  3999. }
  4000. { .mfi
  4001. mov C1 = C // coffset1 = c + 0 * ldc
  4002. mov f88 = f0
  4003. #ifdef LN
  4004. add KK = M, OFFSET
  4005. #elif defined LT
  4006. mov KK = OFFSET
  4007. #else
  4008. nop __LINE__
  4009. #endif
  4010. }
  4011. ;;
  4012. { .mmf
  4013. cmp.eq p6, p7 = 0, I
  4014. #if defined(LN) || defined(RT)
  4015. mov AORIG = A
  4016. #else
  4017. mov AOFFSET = A
  4018. #endif
  4019. mov f65 = f0
  4020. }
  4021. { .mmf
  4022. add C2 = LDC, C // coffset2 = c + 1 * ldc
  4023. shladd C3 = LDC, 1, C // coffset3 = c + 2 * ldc
  4024. mov f73 = f0
  4025. }
  4026. ;;
  4027. { .mfi
  4028. #ifndef RT
  4029. shladd C = LDC, 2, C // coffset += 8 * ldc
  4030. #else
  4031. nop __LINE__
  4032. #endif
  4033. mov f81 = f0
  4034. #if defined(LT) || defined(RN)
  4035. mov L = KK
  4036. #else
  4037. sub L = K, KK
  4038. #endif
  4039. }{ .mfb
  4040. shladd C4 = LDC, 1, C2
  4041. mov f89 = f0
  4042. (p6) br.cond.dpnt .L060
  4043. }
  4044. ;;
  4045. .align 16
  4046. .L052:
  4047. { .mmi
  4048. cmp.ne p7, p0 = r0, L
  4049. adds BOFFSET = 0 * SIZE, B
  4050. shl r2 = K, 3 + BASE_SHIFT
  4051. }
  4052. { .mmi
  4053. shladd r3 = KK, BASE_SHIFT, r0
  4054. nop __LINE__
  4055. nop __LINE__
  4056. }
  4057. ;;
  4058. #if defined(LT) || defined(RN)
  4059. { .mmi
  4060. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  4061. nop __LINE__
  4062. nop __LINE__
  4063. }
  4064. ;;
  4065. #else
  4066. { .mfi
  4067. shladd BOFFSET = r3, 2, B
  4068. #ifdef LN
  4069. sub AORIG = AORIG, r2
  4070. #else
  4071. nop __LINE__
  4072. #endif
  4073. }
  4074. ;;
  4075. { .mfi
  4076. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  4077. shladd AOFFSET = r3, 3, AORIG
  4078. }
  4079. ;;
  4080. #endif
  4081. { .mfi
  4082. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  4083. mov f66 = f0
  4084. nop __LINE__
  4085. }
  4086. { .mfi
  4087. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  4088. mov f74 = f0
  4089. nop __LINE__
  4090. }
  4091. ;;
  4092. { .mmf
  4093. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  4094. setf.d f82 = r0
  4095. mov f90 = f0
  4096. }
  4097. ;;
  4098. { .mmf
  4099. (p7) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  4100. setf.d f67 = r0
  4101. mov f75 = f0
  4102. }
  4103. { .mfi
  4104. setf.d f83 = r0
  4105. mov f91 = f0
  4106. cmp.eq p3, p0 = r0, r0
  4107. }
  4108. ;;
  4109. { .mmf
  4110. (p7) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  4111. }
  4112. { .mfi
  4113. adds PREC = CPREFETCHSIZE * SIZE, C1
  4114. }
  4115. ;;
  4116. { .mmf
  4117. CPREFETCH [PREC], LDC
  4118. setf.d f68 = r0
  4119. mov f76 = f0
  4120. }
  4121. { .mfi
  4122. setf.d f84 = r0
  4123. mov f92 = f0
  4124. adds L = 1, L
  4125. }
  4126. ;;
  4127. { .mmf
  4128. CPREFETCH [PREC], LDC
  4129. }
  4130. { .mfi
  4131. adds PREA = (PREFETCHSIZE + 8) * SIZE, AOFFSET
  4132. }
  4133. ;;
  4134. { .mmf
  4135. CPREFETCH [PREC], LDC
  4136. setf.d f69 = r0
  4137. mov f77 = f0
  4138. }
  4139. { .mfi
  4140. setf.d f85 = r0
  4141. mov f93 = f0
  4142. adds PREB = (PREFETCHSIZE - 8) * SIZE, BOFFSET
  4143. }
  4144. ;;
  4145. { .mmf
  4146. CPREFETCH [PREC]
  4147. }
  4148. ;;
  4149. { .mfi
  4150. setf.d f70 = r0
  4151. mov f78 = f0
  4152. tbit.z p12, p0 = L, 0
  4153. }
  4154. { .mfi
  4155. setf.d f86 = r0
  4156. mov f94 = f0
  4157. shr L = L, 1
  4158. }
  4159. ;;
  4160. { .mfi
  4161. setf.d f71 = r0
  4162. adds L = -1, L
  4163. }
  4164. ;;
  4165. { .mfi
  4166. setf.d f87 = r0
  4167. mov f79 = f0
  4168. mov ar.lc = L
  4169. }
  4170. { .mfb
  4171. cmp.eq p6, p0 = -1, L
  4172. mov f95 = f0
  4173. (p6) br.cond.dpnt .L058
  4174. }
  4175. ;;
  4176. .align 8
  4177. .L053:
  4178. { .mfb
  4179. lfetch.nt1 [PREA], 16 * SIZE
  4180. FMA f64 = f32, f48, f64 // A1 * B1
  4181. nop __LINE__
  4182. }
  4183. { .mfi
  4184. nop __LINE__
  4185. FMA f72 = f32, f49, f72 // A1 * B2
  4186. (p12) cmp.ne p3, p0 = 0, L
  4187. }
  4188. ;;
  4189. { .mfi
  4190. lfetch.nt1 [PREB], 8 * SIZE
  4191. FMA f80 = f32, f50, f80 // A1 * B3
  4192. cmp.ne p4, p5 = 0, L
  4193. }
  4194. { .mfi
  4195. nop __LINE__
  4196. FMA f88 = f32, f51, f88 // A1 * B4
  4197. adds C9 = 4 * SIZE, C1
  4198. }
  4199. ;;
  4200. { .mfi
  4201. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  4202. FMA f65 = f33, f48, f65 // A2 * B1
  4203. adds C10 = 4 * SIZE, C2
  4204. }
  4205. { .mfi
  4206. nop __LINE__
  4207. FMA f73 = f33, f49, f73 // A2 * B2
  4208. adds C11 = 4 * SIZE, C3
  4209. }
  4210. ;;
  4211. { .mfi
  4212. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  4213. FMA f81 = f33, f50, f81 // A2 * B3
  4214. adds C12 = 4 * SIZE, C4
  4215. }
  4216. { .mfb
  4217. nop __LINE__
  4218. FMA f89 = f33, f51, f89 // A2 * B4
  4219. nop __LINE__
  4220. }
  4221. ;;
  4222. { .mfb
  4223. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  4224. FMA f66 = f34, f48, f66 // A3 * B1
  4225. nop __LINE__
  4226. }
  4227. { .mfb
  4228. nop __LINE__
  4229. FMA f74 = f34, f49, f74 // A3 * B2
  4230. nop __LINE__
  4231. }
  4232. ;;
  4233. { .mfb
  4234. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  4235. FMA f82 = f34, f50, f82 // A3 * B3
  4236. nop __LINE__
  4237. }
  4238. { .mfb
  4239. nop __LINE__
  4240. FMA f90 = f34, f51, f90 // A3 * B4
  4241. nop __LINE__
  4242. }
  4243. ;;
  4244. { .mfb
  4245. (p3) LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  4246. FMA f67 = f35, f48, f67 // A4 * B1
  4247. nop __LINE__
  4248. }
  4249. { .mfb
  4250. nop __LINE__
  4251. FMA f75 = f35, f49, f75 // A4 * B2
  4252. nop __LINE__
  4253. }
  4254. ;;
  4255. { .mfb
  4256. (p3) LDFPD f46, f47 = [AOFFSET], 2 * SIZE
  4257. FMA f83 = f35, f50, f83 // A4 * B3
  4258. nop __LINE__
  4259. }
  4260. { .mfb
  4261. nop __LINE__
  4262. FMA f91 = f35, f51, f91 // A4 * B4
  4263. nop __LINE__
  4264. }
  4265. ;;
  4266. { .mfb
  4267. nop __LINE__
  4268. FMA f68 = f36, f48, f68 // A5 * B1
  4269. nop __LINE__
  4270. }
  4271. { .mfb
  4272. nop __LINE__
  4273. FMA f76 = f36, f49, f76 // A5 * B2
  4274. nop __LINE__
  4275. }
  4276. ;;
  4277. { .mfb
  4278. nop __LINE__
  4279. FMA f84 = f36, f50, f84 // A5 * B3
  4280. nop __LINE__
  4281. }
  4282. { .mfb
  4283. nop __LINE__
  4284. FMA f92 = f36, f51, f92 // A5 * B4
  4285. nop __LINE__
  4286. }
  4287. ;;
  4288. { .mfb
  4289. nop __LINE__
  4290. FMA f69 = f37, f48, f69 // A6 * B1
  4291. nop __LINE__
  4292. }
  4293. { .mfb
  4294. nop __LINE__
  4295. FMA f77 = f37, f49, f77 // A6 * B2
  4296. nop __LINE__
  4297. }
  4298. ;;
  4299. { .mfb
  4300. nop __LINE__
  4301. FMA f85 = f37, f50, f85 // A6 * B3
  4302. nop __LINE__
  4303. }
  4304. { .mfb
  4305. nop __LINE__
  4306. FMA f93 = f37, f51, f93 // A6 * B4
  4307. nop __LINE__
  4308. }
  4309. ;;
  4310. { .mfb
  4311. nop __LINE__
  4312. FMA f70 = f38, f48, f70 // A7 * B1
  4313. nop __LINE__
  4314. }
  4315. { .mfb
  4316. nop __LINE__
  4317. FMA f78 = f38, f49, f78 // A7 * B2
  4318. nop __LINE__
  4319. }
  4320. ;;
  4321. { .mfb
  4322. nop __LINE__
  4323. FMA f86 = f38, f50, f86 // A7 * B3
  4324. nop __LINE__
  4325. }
  4326. { .mfb
  4327. nop __LINE__
  4328. FMA f94 = f38, f51, f94 // A7 * B4
  4329. nop __LINE__
  4330. }
  4331. ;;
  4332. { .mfb
  4333. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  4334. FMA f71 = f39, f48, f71 // A8 * B1
  4335. nop __LINE__
  4336. }
  4337. { .mfb
  4338. nop __LINE__
  4339. FMA f79 = f39, f49, f79 // A8 * B2
  4340. nop __LINE__
  4341. }
  4342. ;;
  4343. { .mfb
  4344. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  4345. FMA f87 = f39, f50, f87 // A8 * B3
  4346. nop __LINE__
  4347. }
  4348. { .mfb
  4349. nop __LINE__
  4350. FMA f95 = f39, f51, f95 // A8 * B4
  4351. nop __LINE__
  4352. }
  4353. ;;
  4354. { .mfb
  4355. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  4356. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  4357. nop __LINE__
  4358. }
  4359. { .mfb
  4360. nop __LINE__
  4361. (p3) FMA f72 = f40, f57, f72 // A1 * B2
  4362. nop __LINE__
  4363. }
  4364. ;;
  4365. { .mfb
  4366. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  4367. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  4368. nop __LINE__
  4369. }
  4370. { .mfb
  4371. nop __LINE__
  4372. (p3) FMA f88 = f40, f59, f88 // A1 * B4
  4373. nop __LINE__
  4374. }
  4375. ;;
  4376. { .mfb
  4377. (p4) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  4378. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  4379. nop __LINE__
  4380. }
  4381. { .mfb
  4382. nop __LINE__
  4383. (p3) FMA f73 = f41, f57, f73 // A2 * B2
  4384. nop __LINE__
  4385. }
  4386. ;;
  4387. { .mfb
  4388. (p4) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  4389. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  4390. nop __LINE__
  4391. }
  4392. { .mfb
  4393. nop __LINE__
  4394. (p3) FMA f89 = f41, f59, f89 // A2 * B4
  4395. nop __LINE__
  4396. }
  4397. ;;
  4398. { .mfb
  4399. nop __LINE__
  4400. (p3) FMA f66 = f42, f56, f66 // A3 * B1
  4401. nop __LINE__
  4402. }
  4403. { .mfb
  4404. nop __LINE__
  4405. (p3) FMA f74 = f42, f57, f74 // A3 * B2
  4406. nop __LINE__
  4407. }
  4408. ;;
  4409. { .mfb
  4410. nop __LINE__
  4411. (p3) FMA f82 = f42, f58, f82 // A3 * B3
  4412. nop __LINE__
  4413. }
  4414. { .mfb
  4415. nop __LINE__
  4416. (p3) FMA f90 = f42, f59, f90 // A3 * B4
  4417. nop __LINE__
  4418. }
  4419. ;;
  4420. { .mfb
  4421. nop __LINE__
  4422. (p3) FMA f67 = f43, f56, f67 // A4 * B1
  4423. nop __LINE__
  4424. }
  4425. { .mfb
  4426. nop __LINE__
  4427. (p3) FMA f75 = f43, f57, f75 // A4 * B2
  4428. nop __LINE__
  4429. }
  4430. ;;
  4431. { .mfb
  4432. nop __LINE__
  4433. (p3) FMA f83 = f43, f58, f83 // A4 * B3
  4434. nop __LINE__
  4435. }
  4436. { .mfb
  4437. nop __LINE__
  4438. (p3) FMA f91 = f43, f59, f91 // A4 * B4
  4439. nop __LINE__
  4440. }
  4441. ;;
  4442. { .mfb
  4443. nop __LINE__
  4444. (p3) FMA f68 = f44, f56, f68 // A5 * B1
  4445. nop __LINE__
  4446. }
  4447. { .mfb
  4448. nop __LINE__
  4449. (p3) FMA f76 = f44, f57, f76 // A5 * B2
  4450. nop __LINE__
  4451. }
  4452. ;;
  4453. { .mfb
  4454. nop __LINE__
  4455. (p3) FMA f84 = f44, f58, f84 // A5 * B3
  4456. nop __LINE__
  4457. }
  4458. { .mfb
  4459. nop __LINE__
  4460. (p3) FMA f92 = f44, f59, f92 // A5 * B4
  4461. nop __LINE__
  4462. }
  4463. ;;
  4464. { .mfb
  4465. nop __LINE__
  4466. (p3) FMA f69 = f45, f56, f69 // A6 * B1
  4467. nop __LINE__
  4468. }
  4469. { .mfb
  4470. nop __LINE__
  4471. (p3) FMA f77 = f45, f57, f77 // A6 * B2
  4472. nop __LINE__
  4473. }
  4474. ;;
  4475. { .mfb
  4476. nop __LINE__
  4477. (p3) FMA f85 = f45, f58, f85 // A6 * B3
  4478. nop __LINE__
  4479. }
  4480. { .mfb
  4481. nop __LINE__
  4482. (p3) FMA f93 = f45, f59, f93 // A6 * B4
  4483. nop __LINE__
  4484. }
  4485. ;;
  4486. { .mfb
  4487. nop __LINE__
  4488. (p3) FMA f70 = f46, f56, f70 // A7 * B1
  4489. nop __LINE__
  4490. }
  4491. { .mfb
  4492. nop __LINE__
  4493. (p3) FMA f78 = f46, f57, f78 // A7 * B2
  4494. nop __LINE__
  4495. }
  4496. ;;
  4497. { .mfb
  4498. nop __LINE__
  4499. (p3) FMA f86 = f46, f58, f86 // A7 * B3
  4500. nop __LINE__
  4501. }
  4502. { .mfb
  4503. nop __LINE__
  4504. (p3) FMA f94 = f46, f59, f94 // A7 * B4
  4505. nop __LINE__
  4506. }
  4507. ;;
  4508. { .mfb
  4509. nop __LINE__
  4510. (p3) FMA f71 = f47, f56, f71 // A8 * B1
  4511. nop __LINE__
  4512. }
  4513. { .mfb
  4514. nop __LINE__
  4515. (p3) FMA f79 = f47, f57, f79 // A8 * B2
  4516. nop __LINE__
  4517. }
  4518. ;;
  4519. { .mfi
  4520. nop __LINE__
  4521. (p3) FMA f87 = f47, f58, f87 // A8 * B3
  4522. adds L = -1, L
  4523. }
  4524. { .mfb
  4525. nop __LINE__
  4526. (p3) FMA f95 = f47, f59, f95 // A8 * B4
  4527. br.cloop.sptk.few .L053
  4528. }
  4529. ;;
  4530. .align 8
  4531. .L058:
  4532. #if defined(LN) || defined(RT)
  4533. #ifdef LN
  4534. adds r2 = -8, KK
  4535. #else
  4536. adds r2 = -4, KK
  4537. #endif
  4538. ;;
  4539. shladd r2 = r2, BASE_SHIFT, r0
  4540. ;;
  4541. shladd AOFFSET = r2, 3, AORIG
  4542. shladd BOFFSET = r2, 2, B
  4543. ;;
  4544. #endif
  4545. adds AOFFSET2 = 4 * SIZE, AOFFSET
  4546. adds BOFFSET2 = 4 * SIZE, BOFFSET
  4547. ;;
  4548. #if defined(LN) || defined(LT)
  4549. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  4550. ;;
  4551. LDFPD f34, f35 = [BOFFSET], 2 * SIZE
  4552. ;;
  4553. LDFPD f36, f37 = [BOFFSET], 2 * SIZE
  4554. ;;
  4555. LDFPD f38, f39 = [BOFFSET], 2 * SIZE
  4556. ;;
  4557. LDFPD f40, f41 = [BOFFSET], 2 * SIZE
  4558. ;;
  4559. LDFPD f42, f43 = [BOFFSET], 2 * SIZE
  4560. ;;
  4561. LDFPD f44, f45 = [BOFFSET], 2 * SIZE
  4562. ;;
  4563. LDFPD f46, f47 = [BOFFSET], 2 * SIZE
  4564. ;;
  4565. LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  4566. ;;
  4567. LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  4568. ;;
  4569. LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  4570. ;;
  4571. LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  4572. ;;
  4573. LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  4574. ;;
  4575. LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  4576. ;;
  4577. LDFPD f60, f61 = [BOFFSET], 2 * SIZE
  4578. ;;
  4579. LDFPD f62, f63 = [BOFFSET]
  4580. adds BOFFSET = -30 * SIZE, BOFFSET
  4581. ;;
  4582. FSUB f64 = f32, f64
  4583. FSUB f72 = f33, f72
  4584. FSUB f80 = f34, f80
  4585. FSUB f88 = f35, f88
  4586. FSUB f65 = f36, f65
  4587. FSUB f73 = f37, f73
  4588. FSUB f81 = f38, f81
  4589. FSUB f89 = f39, f89
  4590. FSUB f66 = f40, f66
  4591. FSUB f74 = f41, f74
  4592. FSUB f82 = f42, f82
  4593. FSUB f90 = f43, f90
  4594. FSUB f67 = f44, f67
  4595. FSUB f75 = f45, f75
  4596. FSUB f83 = f46, f83
  4597. FSUB f91 = f47, f91
  4598. FSUB f68 = f48, f68
  4599. FSUB f76 = f49, f76
  4600. FSUB f84 = f50, f84
  4601. FSUB f92 = f51, f92
  4602. FSUB f69 = f52, f69
  4603. FSUB f77 = f53, f77
  4604. FSUB f85 = f54, f85
  4605. FSUB f93 = f55, f93
  4606. FSUB f70 = f56, f70
  4607. FSUB f78 = f57, f78
  4608. FSUB f86 = f58, f86
  4609. FSUB f94 = f59, f94
  4610. FSUB f71 = f60, f71
  4611. FSUB f79 = f61, f79
  4612. FSUB f87 = f62, f87
  4613. FSUB f95 = f63, f95
  4614. ;;
  4615. #else
  4616. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  4617. ;;
  4618. LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  4619. ;;
  4620. LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  4621. ;;
  4622. LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  4623. ;;
  4624. LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  4625. ;;
  4626. LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  4627. ;;
  4628. LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  4629. ;;
  4630. LDFPD f46, f47 = [AOFFSET], 2 * SIZE
  4631. ;;
  4632. LDFPD f48, f49 = [AOFFSET], 2 * SIZE
  4633. ;;
  4634. LDFPD f50, f51 = [AOFFSET], 2 * SIZE
  4635. ;;
  4636. LDFPD f52, f53 = [AOFFSET], 2 * SIZE
  4637. ;;
  4638. LDFPD f54, f55 = [AOFFSET], 2 * SIZE
  4639. ;;
  4640. LDFPD f56, f57 = [AOFFSET], 2 * SIZE
  4641. ;;
  4642. LDFPD f58, f59 = [AOFFSET], 2 * SIZE
  4643. ;;
  4644. LDFPD f60, f61 = [AOFFSET], 2 * SIZE
  4645. ;;
  4646. LDFPD f62, f63 = [AOFFSET]
  4647. adds AOFFSET = -30 * SIZE, AOFFSET
  4648. ;;
  4649. FSUB f64 = f32, f64
  4650. FSUB f65 = f33, f65
  4651. FSUB f66 = f34, f66
  4652. FSUB f67 = f35, f67
  4653. FSUB f68 = f36, f68
  4654. FSUB f69 = f37, f69
  4655. FSUB f70 = f38, f70
  4656. FSUB f71 = f39, f71
  4657. ;;
  4658. FSUB f72 = f40, f72
  4659. FSUB f73 = f41, f73
  4660. FSUB f74 = f42, f74
  4661. FSUB f75 = f43, f75
  4662. FSUB f76 = f44, f76
  4663. FSUB f77 = f45, f77
  4664. FSUB f78 = f46, f78
  4665. FSUB f79 = f47, f79
  4666. ;;
  4667. FSUB f80 = f48, f80
  4668. FSUB f81 = f49, f81
  4669. FSUB f82 = f50, f82
  4670. FSUB f83 = f51, f83
  4671. FSUB f84 = f52, f84
  4672. FSUB f85 = f53, f85
  4673. FSUB f86 = f54, f86
  4674. FSUB f87 = f55, f87
  4675. FSUB f88 = f56, f88
  4676. FSUB f89 = f57, f89
  4677. FSUB f90 = f58, f90
  4678. FSUB f91 = f59, f91
  4679. FSUB f92 = f60, f92
  4680. FSUB f93 = f61, f93
  4681. FSUB f94 = f62, f94
  4682. FSUB f95 = f63, f95
  4683. ;;
  4684. #endif
  4685. #ifdef LN
  4686. adds AOFFSET = 62 * SIZE, AOFFSET
  4687. ;;
  4688. LDFPD f33, f32 = [AOFFSET]
  4689. adds AOFFSET = - 2 * SIZE, AOFFSET
  4690. ;;
  4691. LDFPD f35, f34 = [AOFFSET]
  4692. adds AOFFSET = - 2 * SIZE, AOFFSET
  4693. ;;
  4694. LDFPD f37, f36 = [AOFFSET]
  4695. adds AOFFSET = - 2 * SIZE, AOFFSET
  4696. ;;
  4697. LDFPD f39, f38 = [AOFFSET]
  4698. adds AOFFSET = - 2 * SIZE, AOFFSET
  4699. ;;
  4700. LDFD f40 = [AOFFSET], -2 * SIZE
  4701. ;;
  4702. LDFPD f42, f41 = [AOFFSET]
  4703. adds AOFFSET = - 2 * SIZE, AOFFSET
  4704. ;;
  4705. LDFPD f44, f43 = [AOFFSET]
  4706. adds AOFFSET = - 2 * SIZE, AOFFSET
  4707. ;;
  4708. LDFPD f46, f45 = [AOFFSET]
  4709. adds AOFFSET = - 4 * SIZE, AOFFSET
  4710. ;;
  4711. LDFPD f48, f47 = [AOFFSET]
  4712. adds AOFFSET = - 2 * SIZE, AOFFSET
  4713. ;;
  4714. LDFPD f50, f49 = [AOFFSET]
  4715. adds AOFFSET = - 2 * SIZE, AOFFSET
  4716. ;;
  4717. LDFPD f52, f51 = [AOFFSET]
  4718. adds AOFFSET = - 4 * SIZE, AOFFSET
  4719. ;;
  4720. LDFD f53 = [AOFFSET], -2 * SIZE
  4721. ;;
  4722. LDFPD f55, f54 = [AOFFSET]
  4723. adds AOFFSET = - 2 * SIZE, AOFFSET
  4724. ;;
  4725. LDFPD f57, f56 = [AOFFSET]
  4726. adds AOFFSET = - 6 * SIZE, AOFFSET
  4727. ;;
  4728. LDFPD f59, f58 = [AOFFSET]
  4729. adds AOFFSET = - 2 * SIZE, AOFFSET
  4730. ;;
  4731. LDFPD f61, f60 = [AOFFSET]
  4732. adds AOFFSET = - 6 * SIZE, AOFFSET
  4733. ;;
  4734. LDFD f16 = [AOFFSET], -2 * SIZE
  4735. ;;
  4736. LDFPD f18, f17 = [AOFFSET]
  4737. adds AOFFSET = - 8 * SIZE, AOFFSET
  4738. ;;
  4739. LDFPD f20, f19 = [AOFFSET]
  4740. adds AOFFSET = - 8 * SIZE, AOFFSET
  4741. ;;
  4742. LDFD f21 = [AOFFSET]
  4743. ;;
  4744. FMPY f71 = f71, f32
  4745. FMPY f79 = f79, f32
  4746. FMPY f87 = f87, f32
  4747. FMPY f95 = f95, f32
  4748. ;;
  4749. FNMA f70 = f71, f33, f70
  4750. FNMA f78 = f79, f33, f78
  4751. FNMA f86 = f87, f33, f86
  4752. FNMA f94 = f95, f33, f94
  4753. ;;
  4754. FNMA f69 = f71, f34, f69
  4755. FNMA f77 = f79, f34, f77
  4756. FNMA f85 = f87, f34, f85
  4757. FNMA f93 = f95, f34, f93
  4758. ;;
  4759. FNMA f68 = f71, f35, f68
  4760. FNMA f76 = f79, f35, f76
  4761. FNMA f84 = f87, f35, f84
  4762. FNMA f92 = f95, f35, f92
  4763. ;;
  4764. FNMA f67 = f71, f36, f67
  4765. FNMA f75 = f79, f36, f75
  4766. FNMA f83 = f87, f36, f83
  4767. FNMA f91 = f95, f36, f91
  4768. ;;
  4769. FNMA f66 = f71, f37, f66
  4770. FNMA f74 = f79, f37, f74
  4771. FNMA f82 = f87, f37, f82
  4772. FNMA f90 = f95, f37, f90
  4773. ;;
  4774. FNMA f65 = f71, f38, f65
  4775. FNMA f73 = f79, f38, f73
  4776. FNMA f81 = f87, f38, f81
  4777. FNMA f89 = f95, f38, f89
  4778. ;;
  4779. FNMA f64 = f71, f39, f64
  4780. FNMA f72 = f79, f39, f72
  4781. FNMA f80 = f87, f39, f80
  4782. FNMA f88 = f95, f39, f88
  4783. ;;
  4784. FMPY f70 = f70, f40
  4785. FMPY f78 = f78, f40
  4786. FMPY f86 = f86, f40
  4787. FMPY f94 = f94, f40
  4788. ;;
  4789. FNMA f69 = f70, f41, f69
  4790. FNMA f77 = f78, f41, f77
  4791. FNMA f85 = f86, f41, f85
  4792. FNMA f93 = f94, f41, f93
  4793. ;;
  4794. FNMA f68 = f70, f42, f68
  4795. FNMA f76 = f78, f42, f76
  4796. FNMA f84 = f86, f42, f84
  4797. FNMA f92 = f94, f42, f92
  4798. ;;
  4799. FNMA f67 = f70, f43, f67
  4800. FNMA f75 = f78, f43, f75
  4801. FNMA f83 = f86, f43, f83
  4802. FNMA f91 = f94, f43, f91
  4803. ;;
  4804. FNMA f66 = f70, f44, f66
  4805. FNMA f74 = f78, f44, f74
  4806. FNMA f82 = f86, f44, f82
  4807. FNMA f90 = f94, f44, f90
  4808. ;;
  4809. FNMA f65 = f70, f45, f65
  4810. FNMA f73 = f78, f45, f73
  4811. FNMA f81 = f86, f45, f81
  4812. FNMA f89 = f94, f45, f89
  4813. ;;
  4814. FNMA f64 = f70, f46, f64
  4815. FNMA f72 = f78, f46, f72
  4816. FNMA f80 = f86, f46, f80
  4817. FNMA f88 = f94, f46, f88
  4818. ;;
  4819. FMPY f69 = f69, f47
  4820. FMPY f77 = f77, f47
  4821. FMPY f85 = f85, f47
  4822. FMPY f93 = f93, f47
  4823. ;;
  4824. FNMA f68 = f69, f48, f68
  4825. FNMA f76 = f77, f48, f76
  4826. FNMA f84 = f85, f48, f84
  4827. FNMA f92 = f93, f48, f92
  4828. ;;
  4829. FNMA f67 = f69, f49, f67
  4830. FNMA f75 = f77, f49, f75
  4831. FNMA f83 = f85, f49, f83
  4832. FNMA f91 = f93, f49, f91
  4833. ;;
  4834. FNMA f66 = f69, f50, f66
  4835. FNMA f74 = f77, f50, f74
  4836. FNMA f82 = f85, f50, f82
  4837. FNMA f90 = f93, f50, f90
  4838. ;;
  4839. FNMA f65 = f69, f51, f65
  4840. FNMA f73 = f77, f51, f73
  4841. FNMA f81 = f85, f51, f81
  4842. FNMA f89 = f93, f51, f89
  4843. ;;
  4844. FNMA f64 = f69, f52, f64
  4845. FNMA f72 = f77, f52, f72
  4846. FNMA f80 = f85, f52, f80
  4847. FNMA f88 = f93, f52, f88
  4848. ;;
  4849. FMPY f68 = f68, f53
  4850. FMPY f76 = f76, f53
  4851. FMPY f84 = f84, f53
  4852. FMPY f92 = f92, f53
  4853. ;;
  4854. FNMA f67 = f68, f54, f67
  4855. FNMA f75 = f76, f54, f75
  4856. FNMA f83 = f84, f54, f83
  4857. FNMA f91 = f92, f54, f91
  4858. ;;
  4859. FNMA f66 = f68, f55, f66
  4860. FNMA f74 = f76, f55, f74
  4861. FNMA f82 = f84, f55, f82
  4862. FNMA f90 = f92, f55, f90
  4863. ;;
  4864. FNMA f65 = f68, f56, f65
  4865. FNMA f73 = f76, f56, f73
  4866. FNMA f81 = f84, f56, f81
  4867. FNMA f89 = f92, f56, f89
  4868. ;;
  4869. FNMA f64 = f68, f57, f64
  4870. FNMA f72 = f76, f57, f72
  4871. FNMA f80 = f84, f57, f80
  4872. FNMA f88 = f92, f57, f88
  4873. ;;
  4874. FMPY f67 = f67, f58
  4875. FMPY f75 = f75, f58
  4876. FMPY f83 = f83, f58
  4877. FMPY f91 = f91, f58
  4878. ;;
  4879. FNMA f66 = f67, f59, f66
  4880. FNMA f74 = f75, f59, f74
  4881. FNMA f82 = f83, f59, f82
  4882. FNMA f90 = f91, f59, f90
  4883. ;;
  4884. FNMA f65 = f67, f60, f65
  4885. FNMA f73 = f75, f60, f73
  4886. FNMA f81 = f83, f60, f81
  4887. FNMA f89 = f91, f60, f89
  4888. ;;
  4889. FNMA f64 = f67, f61, f64
  4890. FNMA f72 = f75, f61, f72
  4891. FNMA f80 = f83, f61, f80
  4892. FNMA f88 = f91, f61, f88
  4893. ;;
  4894. FMPY f66 = f66, f16
  4895. FMPY f74 = f74, f16
  4896. FMPY f82 = f82, f16
  4897. FMPY f90 = f90, f16
  4898. ;;
  4899. FNMA f65 = f66, f17, f65
  4900. FNMA f73 = f74, f17, f73
  4901. FNMA f81 = f82, f17, f81
  4902. FNMA f89 = f90, f17, f89
  4903. ;;
  4904. FNMA f64 = f66, f18, f64
  4905. FNMA f72 = f74, f18, f72
  4906. FNMA f80 = f82, f18, f80
  4907. FNMA f88 = f90, f18, f88
  4908. ;;
  4909. FMPY f65 = f65, f19
  4910. FMPY f73 = f73, f19
  4911. FMPY f81 = f81, f19
  4912. FMPY f89 = f89, f19
  4913. ;;
  4914. FNMA f64 = f65, f20, f64
  4915. FNMA f72 = f73, f20, f72
  4916. FNMA f80 = f81, f20, f80
  4917. FNMA f88 = f89, f20, f88
  4918. ;;
  4919. FMPY f64 = f64, f21
  4920. FMPY f72 = f72, f21
  4921. FMPY f80 = f80, f21
  4922. FMPY f88 = f88, f21
  4923. ;;
  4924. adds BOFFSET = 24 * SIZE, BOFFSET
  4925. adds BOFFSET2 = 24 * SIZE, BOFFSET2
  4926. ;;
  4927. STFD [BOFFSET] = f70, SIZE
  4928. STFD [BOFFSET2] = f71, SIZE
  4929. ;;
  4930. STFD [BOFFSET] = f78, SIZE
  4931. STFD [BOFFSET2] = f79, SIZE
  4932. ;;
  4933. STFD [BOFFSET] = f86, SIZE
  4934. STFD [BOFFSET2] = f87, SIZE
  4935. ;;
  4936. STFD [BOFFSET] = f94, - 11 * SIZE
  4937. STFD [BOFFSET2] = f95, - 11 * SIZE
  4938. ;;
  4939. STFD [BOFFSET] = f68, SIZE
  4940. STFD [BOFFSET2] = f69, SIZE
  4941. ;;
  4942. STFD [BOFFSET] = f76, SIZE
  4943. STFD [BOFFSET2] = f77, SIZE
  4944. ;;
  4945. STFD [BOFFSET] = f84, SIZE
  4946. STFD [BOFFSET2] = f85, SIZE
  4947. ;;
  4948. STFD [BOFFSET] = f92, - 11 * SIZE
  4949. STFD [BOFFSET2] = f93, - 11 * SIZE
  4950. ;;
  4951. STFD [BOFFSET] = f66, SIZE
  4952. STFD [BOFFSET2] = f67, SIZE
  4953. ;;
  4954. STFD [BOFFSET] = f74, SIZE
  4955. STFD [BOFFSET2] = f75, SIZE
  4956. ;;
  4957. STFD [BOFFSET] = f82, SIZE
  4958. STFD [BOFFSET2] = f83, SIZE
  4959. ;;
  4960. STFD [BOFFSET] = f90, - 11 * SIZE
  4961. STFD [BOFFSET2] = f91, - 11 * SIZE
  4962. ;;
  4963. STFD [BOFFSET] = f64, SIZE
  4964. STFD [BOFFSET2] = f65, SIZE
  4965. ;;
  4966. STFD [BOFFSET] = f72, SIZE
  4967. STFD [BOFFSET2] = f73, SIZE
  4968. ;;
  4969. STFD [BOFFSET] = f80, SIZE
  4970. STFD [BOFFSET2] = f81, SIZE
  4971. ;;
  4972. STFD [BOFFSET] = f88, - 3 * SIZE
  4973. STFD [BOFFSET2] = f89, - 3 * SIZE
  4974. ;;
  4975. adds C1 = -8 * SIZE, C1
  4976. adds C2 = -8 * SIZE, C2
  4977. adds C3 = -8 * SIZE, C3
  4978. adds C4 = -8 * SIZE, C4
  4979. ;;
  4980. #endif
  4981. #ifdef LT
  4982. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  4983. ;;
  4984. LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  4985. ;;
  4986. LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  4987. ;;
  4988. LDFPD f38, f39 = [AOFFSET]
  4989. adds AOFFSET = 3 * SIZE, AOFFSET
  4990. ;;
  4991. LDFD f40 = [AOFFSET], 1 * SIZE
  4992. ;;
  4993. LDFPD f41, f42 = [AOFFSET], 2 * SIZE
  4994. ;;
  4995. LDFPD f43, f44 = [AOFFSET], 2 * SIZE
  4996. ;;
  4997. LDFPD f45, f46 = [AOFFSET]
  4998. adds AOFFSET = 4 * SIZE, AOFFSET
  4999. ;;
  5000. LDFPD f47, f48 = [AOFFSET], 2 * SIZE
  5001. ;;
  5002. LDFPD f49, f50 = [AOFFSET], 2 * SIZE
  5003. ;;
  5004. LDFPD f51, f52 = [AOFFSET]
  5005. adds AOFFSET = 5 * SIZE, AOFFSET
  5006. ;;
  5007. LDFD f53 = [AOFFSET], 1 * SIZE
  5008. ;;
  5009. LDFPD f54, f55 = [AOFFSET], 2 * SIZE
  5010. ;;
  5011. LDFPD f56, f57 = [AOFFSET]
  5012. adds AOFFSET = 6 * SIZE, AOFFSET
  5013. ;;
  5014. LDFPD f58, f59 = [AOFFSET], 2 * SIZE
  5015. ;;
  5016. LDFPD f60, f61 = [AOFFSET]
  5017. adds AOFFSET = 7 * SIZE, AOFFSET
  5018. ;;
  5019. LDFD f16 = [AOFFSET], 1 * SIZE
  5020. ;;
  5021. LDFPD f17, f18 = [AOFFSET]
  5022. adds AOFFSET = 8 * SIZE, AOFFSET
  5023. ;;
  5024. LDFPD f19, f20 = [AOFFSET]
  5025. adds AOFFSET = 9 * SIZE, AOFFSET
  5026. ;;
  5027. LDFD f21 = [AOFFSET]
  5028. adds AOFFSET = -63 * SIZE, AOFFSET
  5029. ;;
  5030. FMPY f64 = f64, f32
  5031. FMPY f72 = f72, f32
  5032. FMPY f80 = f80, f32
  5033. FMPY f88 = f88, f32
  5034. ;;
  5035. FNMA f65 = f64, f33, f65
  5036. FNMA f73 = f72, f33, f73
  5037. FNMA f81 = f80, f33, f81
  5038. FNMA f89 = f88, f33, f89
  5039. ;;
  5040. FNMA f66 = f64, f34, f66
  5041. FNMA f74 = f72, f34, f74
  5042. FNMA f82 = f80, f34, f82
  5043. FNMA f90 = f88, f34, f90
  5044. ;;
  5045. FNMA f67 = f64, f35, f67
  5046. FNMA f75 = f72, f35, f75
  5047. FNMA f83 = f80, f35, f83
  5048. FNMA f91 = f88, f35, f91
  5049. ;;
  5050. FNMA f68 = f64, f36, f68
  5051. FNMA f76 = f72, f36, f76
  5052. FNMA f84 = f80, f36, f84
  5053. FNMA f92 = f88, f36, f92
  5054. ;;
  5055. FNMA f69 = f64, f37, f69
  5056. FNMA f77 = f72, f37, f77
  5057. FNMA f85 = f80, f37, f85
  5058. FNMA f93 = f88, f37, f93
  5059. ;;
  5060. FNMA f70 = f64, f38, f70
  5061. FNMA f78 = f72, f38, f78
  5062. FNMA f86 = f80, f38, f86
  5063. FNMA f94 = f88, f38, f94
  5064. ;;
  5065. FNMA f71 = f64, f39, f71
  5066. FNMA f79 = f72, f39, f79
  5067. FNMA f87 = f80, f39, f87
  5068. FNMA f95 = f88, f39, f95
  5069. ;;
  5070. FMPY f65 = f65, f40
  5071. FMPY f73 = f73, f40
  5072. FMPY f81 = f81, f40
  5073. FMPY f89 = f89, f40
  5074. ;;
  5075. FNMA f66 = f65, f41, f66
  5076. FNMA f74 = f73, f41, f74
  5077. FNMA f82 = f81, f41, f82
  5078. FNMA f90 = f89, f41, f90
  5079. ;;
  5080. FNMA f67 = f65, f42, f67
  5081. FNMA f75 = f73, f42, f75
  5082. FNMA f83 = f81, f42, f83
  5083. FNMA f91 = f89, f42, f91
  5084. ;;
  5085. FNMA f68 = f65, f43, f68
  5086. FNMA f76 = f73, f43, f76
  5087. FNMA f84 = f81, f43, f84
  5088. FNMA f92 = f89, f43, f92
  5089. ;;
  5090. FNMA f69 = f65, f44, f69
  5091. FNMA f77 = f73, f44, f77
  5092. FNMA f85 = f81, f44, f85
  5093. FNMA f93 = f89, f44, f93
  5094. ;;
  5095. FNMA f70 = f65, f45, f70
  5096. FNMA f78 = f73, f45, f78
  5097. FNMA f86 = f81, f45, f86
  5098. FNMA f94 = f89, f45, f94
  5099. ;;
  5100. FNMA f71 = f65, f46, f71
  5101. FNMA f79 = f73, f46, f79
  5102. FNMA f87 = f81, f46, f87
  5103. FNMA f95 = f89, f46, f95
  5104. ;;
  5105. FMPY f66 = f66, f47
  5106. FMPY f74 = f74, f47
  5107. FMPY f82 = f82, f47
  5108. FMPY f90 = f90, f47
  5109. ;;
  5110. FNMA f67 = f66, f48, f67
  5111. FNMA f75 = f74, f48, f75
  5112. FNMA f83 = f82, f48, f83
  5113. FNMA f91 = f90, f48, f91
  5114. ;;
  5115. FNMA f68 = f66, f49, f68
  5116. FNMA f76 = f74, f49, f76
  5117. FNMA f84 = f82, f49, f84
  5118. FNMA f92 = f90, f49, f92
  5119. ;;
  5120. FNMA f69 = f66, f50, f69
  5121. FNMA f77 = f74, f50, f77
  5122. FNMA f85 = f82, f50, f85
  5123. FNMA f93 = f90, f50, f93
  5124. ;;
  5125. FNMA f70 = f66, f51, f70
  5126. FNMA f78 = f74, f51, f78
  5127. FNMA f86 = f82, f51, f86
  5128. FNMA f94 = f90, f51, f94
  5129. ;;
  5130. FNMA f71 = f66, f52, f71
  5131. FNMA f79 = f74, f52, f79
  5132. FNMA f87 = f82, f52, f87
  5133. FNMA f95 = f90, f52, f95
  5134. ;;
  5135. FMPY f67 = f67, f53
  5136. FMPY f75 = f75, f53
  5137. FMPY f83 = f83, f53
  5138. FMPY f91 = f91, f53
  5139. ;;
  5140. FNMA f68 = f67, f54, f68
  5141. FNMA f76 = f75, f54, f76
  5142. FNMA f84 = f83, f54, f84
  5143. FNMA f92 = f91, f54, f92
  5144. ;;
  5145. FNMA f69 = f67, f55, f69
  5146. FNMA f77 = f75, f55, f77
  5147. FNMA f85 = f83, f55, f85
  5148. FNMA f93 = f91, f55, f93
  5149. ;;
  5150. FNMA f70 = f67, f56, f70
  5151. FNMA f78 = f75, f56, f78
  5152. FNMA f86 = f83, f56, f86
  5153. FNMA f94 = f91, f56, f94
  5154. ;;
  5155. FNMA f71 = f67, f57, f71
  5156. FNMA f79 = f75, f57, f79
  5157. FNMA f87 = f83, f57, f87
  5158. FNMA f95 = f91, f57, f95
  5159. ;;
  5160. FMPY f68 = f68, f58
  5161. FMPY f76 = f76, f58
  5162. FMPY f84 = f84, f58
  5163. FMPY f92 = f92, f58
  5164. ;;
  5165. FNMA f69 = f68, f59, f69
  5166. FNMA f77 = f76, f59, f77
  5167. FNMA f85 = f84, f59, f85
  5168. FNMA f93 = f92, f59, f93
  5169. ;;
  5170. FNMA f70 = f68, f60, f70
  5171. FNMA f78 = f76, f60, f78
  5172. FNMA f86 = f84, f60, f86
  5173. FNMA f94 = f92, f60, f94
  5174. ;;
  5175. FNMA f71 = f68, f61, f71
  5176. FNMA f79 = f76, f61, f79
  5177. FNMA f87 = f84, f61, f87
  5178. FNMA f95 = f92, f61, f95
  5179. ;;
  5180. FMPY f69 = f69, f16
  5181. FMPY f77 = f77, f16
  5182. FMPY f85 = f85, f16
  5183. FMPY f93 = f93, f16
  5184. ;;
  5185. FNMA f70 = f69, f17, f70
  5186. FNMA f78 = f77, f17, f78
  5187. FNMA f86 = f85, f17, f86
  5188. FNMA f94 = f93, f17, f94
  5189. ;;
  5190. FNMA f71 = f69, f18, f71
  5191. FNMA f79 = f77, f18, f79
  5192. FNMA f87 = f85, f18, f87
  5193. FNMA f95 = f93, f18, f95
  5194. ;;
  5195. FMPY f70 = f70, f19
  5196. FMPY f78 = f78, f19
  5197. FMPY f86 = f86, f19
  5198. FMPY f94 = f94, f19
  5199. ;;
  5200. FNMA f71 = f70, f20, f71
  5201. FNMA f79 = f78, f20, f79
  5202. FNMA f87 = f86, f20, f87
  5203. FNMA f95 = f94, f20, f95
  5204. ;;
  5205. FMPY f71 = f71, f21
  5206. FMPY f79 = f79, f21
  5207. FMPY f87 = f87, f21
  5208. FMPY f95 = f95, f21
  5209. ;;
  5210. STFD [BOFFSET] = f64, SIZE
  5211. STFD [BOFFSET2] = f65, SIZE
  5212. ;;
  5213. STFD [BOFFSET] = f72, SIZE
  5214. STFD [BOFFSET2] = f73, SIZE
  5215. ;;
  5216. STFD [BOFFSET] = f80, SIZE
  5217. STFD [BOFFSET2] = f81, SIZE
  5218. ;;
  5219. STFD [BOFFSET] = f88, 5 * SIZE
  5220. STFD [BOFFSET2] = f89, 5 * SIZE
  5221. ;;
  5222. STFD [BOFFSET] = f66, SIZE
  5223. STFD [BOFFSET2] = f67, SIZE
  5224. ;;
  5225. STFD [BOFFSET] = f74, SIZE
  5226. STFD [BOFFSET2] = f75, SIZE
  5227. ;;
  5228. STFD [BOFFSET] = f82, SIZE
  5229. STFD [BOFFSET2] = f83, SIZE
  5230. ;;
  5231. STFD [BOFFSET] = f90, 5 * SIZE
  5232. STFD [BOFFSET2] = f91, 5 * SIZE
  5233. ;;
  5234. STFD [BOFFSET] = f68, SIZE
  5235. STFD [BOFFSET2] = f69, SIZE
  5236. ;;
  5237. STFD [BOFFSET] = f76, SIZE
  5238. STFD [BOFFSET2] = f77, SIZE
  5239. ;;
  5240. STFD [BOFFSET] = f84, SIZE
  5241. STFD [BOFFSET2] = f85, SIZE
  5242. ;;
  5243. STFD [BOFFSET] = f92, 5 * SIZE
  5244. STFD [BOFFSET2] = f93, 5 * SIZE
  5245. ;;
  5246. STFD [BOFFSET] = f70, SIZE
  5247. STFD [BOFFSET2] = f71, SIZE
  5248. ;;
  5249. STFD [BOFFSET] = f78, SIZE
  5250. STFD [BOFFSET2] = f79, SIZE
  5251. ;;
  5252. STFD [BOFFSET] = f86, SIZE
  5253. STFD [BOFFSET2] = f87, SIZE
  5254. ;;
  5255. STFD [BOFFSET] = f94
  5256. STFD [BOFFSET2] = f95
  5257. adds C9 = 4 * SIZE, C1
  5258. adds BOFFSET = - 27 * SIZE, BOFFSET
  5259. adds BOFFSET2 = - 27 * SIZE, BOFFSET2
  5260. ;;
  5261. #endif
  5262. #ifdef RN
  5263. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  5264. ;;
  5265. LDFPD f34, f35 = [BOFFSET]
  5266. adds BOFFSET = 3 * SIZE, BOFFSET
  5267. ;;
  5268. LDFD f36 = [BOFFSET], 1 * SIZE
  5269. ;;
  5270. LDFPD f37, f38 = [BOFFSET]
  5271. adds BOFFSET = 4 * SIZE, BOFFSET
  5272. ;;
  5273. LDFPD f39, f40 = [BOFFSET]
  5274. adds BOFFSET = 5 * SIZE, BOFFSET
  5275. ;;
  5276. LDFD f41 = [BOFFSET], -15 * SIZE
  5277. ;;
  5278. FMPY f64 = f64, f32
  5279. FMPY f68 = f68, f32
  5280. FMPY f65 = f65, f32
  5281. FMPY f69 = f69, f32
  5282. FMPY f66 = f66, f32
  5283. FMPY f70 = f70, f32
  5284. FMPY f67 = f67, f32
  5285. FMPY f71 = f71, f32
  5286. ;;
  5287. FNMA f72 = f64, f33, f72
  5288. FNMA f76 = f68, f33, f76
  5289. FNMA f73 = f65, f33, f73
  5290. FNMA f77 = f69, f33, f77
  5291. FNMA f74 = f66, f33, f74
  5292. FNMA f78 = f70, f33, f78
  5293. FNMA f75 = f67, f33, f75
  5294. FNMA f79 = f71, f33, f79
  5295. ;;
  5296. FNMA f80 = f64, f34, f80
  5297. FNMA f84 = f68, f34, f84
  5298. FNMA f81 = f65, f34, f81
  5299. FNMA f85 = f69, f34, f85
  5300. FNMA f82 = f66, f34, f82
  5301. FNMA f86 = f70, f34, f86
  5302. FNMA f83 = f67, f34, f83
  5303. FNMA f87 = f71, f34, f87
  5304. ;;
  5305. FNMA f88 = f64, f35, f88
  5306. FNMA f92 = f68, f35, f92
  5307. FNMA f89 = f65, f35, f89
  5308. FNMA f93 = f69, f35, f93
  5309. FNMA f90 = f66, f35, f90
  5310. FNMA f94 = f70, f35, f94
  5311. FNMA f91 = f67, f35, f91
  5312. FNMA f95 = f71, f35, f95
  5313. ;;
  5314. FMPY f72 = f72, f36
  5315. FMPY f76 = f76, f36
  5316. FMPY f73 = f73, f36
  5317. FMPY f77 = f77, f36
  5318. FMPY f74 = f74, f36
  5319. FMPY f78 = f78, f36
  5320. FMPY f75 = f75, f36
  5321. FMPY f79 = f79, f36
  5322. ;;
  5323. FNMA f80 = f72, f37, f80
  5324. FNMA f84 = f76, f37, f84
  5325. FNMA f81 = f73, f37, f81
  5326. FNMA f85 = f77, f37, f85
  5327. FNMA f82 = f74, f37, f82
  5328. FNMA f86 = f78, f37, f86
  5329. FNMA f83 = f75, f37, f83
  5330. FNMA f87 = f79, f37, f87
  5331. ;;
  5332. FNMA f88 = f72, f38, f88
  5333. FNMA f92 = f76, f38, f92
  5334. FNMA f89 = f73, f38, f89
  5335. FNMA f93 = f77, f38, f93
  5336. FNMA f90 = f74, f38, f90
  5337. FNMA f94 = f78, f38, f94
  5338. FNMA f91 = f75, f38, f91
  5339. FNMA f95 = f79, f38, f95
  5340. ;;
  5341. FMPY f80 = f80, f39
  5342. FMPY f84 = f84, f39
  5343. FMPY f81 = f81, f39
  5344. FMPY f85 = f85, f39
  5345. FMPY f82 = f82, f39
  5346. FMPY f86 = f86, f39
  5347. FMPY f83 = f83, f39
  5348. FMPY f87 = f87, f39
  5349. ;;
  5350. FNMA f88 = f80, f40, f88
  5351. FNMA f92 = f84, f40, f92
  5352. FNMA f89 = f81, f40, f89
  5353. FNMA f93 = f85, f40, f93
  5354. FNMA f90 = f82, f40, f90
  5355. FNMA f94 = f86, f40, f94
  5356. FNMA f91 = f83, f40, f91
  5357. FNMA f95 = f87, f40, f95
  5358. ;;
  5359. FMPY f88 = f88, f41
  5360. FMPY f92 = f92, f41
  5361. FMPY f89 = f89, f41
  5362. FMPY f93 = f93, f41
  5363. FMPY f90 = f90, f41
  5364. FMPY f94 = f94, f41
  5365. FMPY f91 = f91, f41
  5366. FMPY f95 = f95, f41
  5367. ;;
  5368. STFD [AOFFSET] = f64, SIZE
  5369. STFD [AOFFSET2] = f68, SIZE
  5370. ;;
  5371. STFD [AOFFSET] = f65, SIZE
  5372. STFD [AOFFSET2] = f69, SIZE
  5373. ;;
  5374. STFD [AOFFSET] = f66, SIZE
  5375. STFD [AOFFSET2] = f70, SIZE
  5376. ;;
  5377. STFD [AOFFSET] = f67, 5 * SIZE
  5378. STFD [AOFFSET2] = f71, 5 * SIZE
  5379. ;;
  5380. STFD [AOFFSET] = f72, SIZE
  5381. STFD [AOFFSET2] = f76, SIZE
  5382. ;;
  5383. STFD [AOFFSET] = f73, SIZE
  5384. STFD [AOFFSET2] = f77, SIZE
  5385. ;;
  5386. STFD [AOFFSET] = f74, SIZE
  5387. STFD [AOFFSET2] = f78, SIZE
  5388. ;;
  5389. STFD [AOFFSET] = f75, 5 * SIZE
  5390. STFD [AOFFSET2] = f79, 5 * SIZE
  5391. ;;
  5392. STFD [AOFFSET] = f80, SIZE
  5393. STFD [AOFFSET2] = f84, SIZE
  5394. ;;
  5395. STFD [AOFFSET] = f81, SIZE
  5396. STFD [AOFFSET2] = f85, SIZE
  5397. ;;
  5398. STFD [AOFFSET] = f82, SIZE
  5399. STFD [AOFFSET2] = f86, SIZE
  5400. ;;
  5401. STFD [AOFFSET] = f83, 5 * SIZE
  5402. STFD [AOFFSET2] = f87, 5 * SIZE
  5403. ;;
  5404. STFD [AOFFSET] = f88, SIZE
  5405. STFD [AOFFSET2] = f92, SIZE
  5406. ;;
  5407. STFD [AOFFSET] = f89, SIZE
  5408. STFD [AOFFSET2] = f93, SIZE
  5409. ;;
  5410. STFD [AOFFSET] = f90, SIZE
  5411. STFD [AOFFSET2] = f94, SIZE
  5412. ;;
  5413. STFD [AOFFSET] = f91, -27 * SIZE
  5414. STFD [AOFFSET2] = f95, -27 * SIZE
  5415. ;;
  5416. #endif
  5417. #ifdef RT
  5418. adds BOFFSET = 14 * SIZE, BOFFSET
  5419. ;;
  5420. LDFPD f33, f32 = [BOFFSET]
  5421. adds BOFFSET = - 2 * SIZE, BOFFSET
  5422. ;;
  5423. LDFPD f35, f34 = [BOFFSET]
  5424. adds BOFFSET = - 2 * SIZE, BOFFSET
  5425. ;;
  5426. LDFD f36 = [BOFFSET], -2 * SIZE
  5427. ;;
  5428. LDFPD f38, f37 = [BOFFSET]
  5429. adds BOFFSET = - 4 * SIZE, BOFFSET
  5430. ;;
  5431. LDFPD f40, f39 = [BOFFSET]
  5432. adds BOFFSET = - 4 * SIZE, BOFFSET
  5433. ;;
  5434. LDFD f41 = [BOFFSET]
  5435. ;;
  5436. FMPY f88 = f88, f32
  5437. FMPY f92 = f92, f32
  5438. FMPY f89 = f89, f32
  5439. FMPY f93 = f93, f32
  5440. FMPY f90 = f90, f32
  5441. FMPY f94 = f94, f32
  5442. FMPY f91 = f91, f32
  5443. FMPY f95 = f95, f32
  5444. ;;
  5445. FNMA f80 = f88, f33, f80
  5446. FNMA f84 = f92, f33, f84
  5447. FNMA f81 = f89, f33, f81
  5448. FNMA f85 = f93, f33, f85
  5449. FNMA f82 = f90, f33, f82
  5450. FNMA f86 = f94, f33, f86
  5451. FNMA f83 = f91, f33, f83
  5452. FNMA f87 = f95, f33, f87
  5453. ;;
  5454. FNMA f72 = f88, f34, f72
  5455. FNMA f76 = f92, f34, f76
  5456. FNMA f73 = f89, f34, f73
  5457. FNMA f77 = f93, f34, f77
  5458. FNMA f74 = f90, f34, f74
  5459. FNMA f78 = f94, f34, f78
  5460. FNMA f75 = f91, f34, f75
  5461. FNMA f79 = f95, f34, f79
  5462. ;;
  5463. FNMA f64 = f88, f35, f64
  5464. FNMA f68 = f92, f35, f68
  5465. FNMA f65 = f89, f35, f65
  5466. FNMA f69 = f93, f35, f69
  5467. FNMA f66 = f90, f35, f66
  5468. FNMA f70 = f94, f35, f70
  5469. FNMA f67 = f91, f35, f67
  5470. FNMA f71 = f95, f35, f71
  5471. ;;
  5472. FMPY f80 = f80, f36
  5473. FMPY f84 = f84, f36
  5474. FMPY f81 = f81, f36
  5475. FMPY f85 = f85, f36
  5476. FMPY f82 = f82, f36
  5477. FMPY f86 = f86, f36
  5478. FMPY f83 = f83, f36
  5479. FMPY f87 = f87, f36
  5480. ;;
  5481. FNMA f72 = f80, f37, f72
  5482. FNMA f76 = f84, f37, f76
  5483. FNMA f73 = f81, f37, f73
  5484. FNMA f77 = f85, f37, f77
  5485. FNMA f74 = f82, f37, f74
  5486. FNMA f78 = f86, f37, f78
  5487. FNMA f75 = f83, f37, f75
  5488. FNMA f79 = f87, f37, f79
  5489. ;;
  5490. FNMA f64 = f80, f38, f64
  5491. FNMA f68 = f84, f38, f68
  5492. FNMA f65 = f81, f38, f65
  5493. FNMA f69 = f85, f38, f69
  5494. FNMA f66 = f82, f38, f66
  5495. FNMA f70 = f86, f38, f70
  5496. FNMA f67 = f83, f38, f67
  5497. FNMA f71 = f87, f38, f71
  5498. ;;
  5499. FMPY f72 = f72, f39
  5500. FMPY f76 = f76, f39
  5501. FMPY f73 = f73, f39
  5502. FMPY f77 = f77, f39
  5503. FMPY f74 = f74, f39
  5504. FMPY f78 = f78, f39
  5505. FMPY f75 = f75, f39
  5506. FMPY f79 = f79, f39
  5507. ;;
  5508. FNMA f64 = f72, f40, f64
  5509. FNMA f68 = f76, f40, f68
  5510. FNMA f65 = f73, f40, f65
  5511. FNMA f69 = f77, f40, f69
  5512. FNMA f66 = f74, f40, f66
  5513. FNMA f70 = f78, f40, f70
  5514. FNMA f67 = f75, f40, f67
  5515. FNMA f71 = f79, f40, f71
  5516. ;;
  5517. FMPY f64 = f64, f41
  5518. FMPY f68 = f68, f41
  5519. FMPY f65 = f65, f41
  5520. FMPY f69 = f69, f41
  5521. FMPY f66 = f66, f41
  5522. FMPY f70 = f70, f41
  5523. FMPY f67 = f67, f41
  5524. FMPY f71 = f71, f41
  5525. ;;
  5526. adds AOFFSET = 24 * SIZE, AOFFSET
  5527. adds AOFFSET2 = 24 * SIZE, AOFFSET2
  5528. ;;
  5529. STFD [AOFFSET] = f88, SIZE
  5530. STFD [AOFFSET2] = f92, SIZE
  5531. ;;
  5532. STFD [AOFFSET] = f89, SIZE
  5533. STFD [AOFFSET2] = f93, SIZE
  5534. ;;
  5535. STFD [AOFFSET] = f90, SIZE
  5536. STFD [AOFFSET2] = f94, SIZE
  5537. ;;
  5538. STFD [AOFFSET] = f91, - 11 * SIZE
  5539. STFD [AOFFSET2] = f95, - 11 * SIZE
  5540. ;;
  5541. STFD [AOFFSET] = f80, SIZE
  5542. STFD [AOFFSET2] = f84, SIZE
  5543. ;;
  5544. STFD [AOFFSET] = f81, SIZE
  5545. STFD [AOFFSET2] = f85, SIZE
  5546. ;;
  5547. STFD [AOFFSET] = f82, SIZE
  5548. STFD [AOFFSET2] = f86, SIZE
  5549. ;;
  5550. STFD [AOFFSET] = f83, - 11 * SIZE
  5551. STFD [AOFFSET2] = f87, - 11 * SIZE
  5552. ;;
  5553. STFD [AOFFSET] = f72, SIZE
  5554. STFD [AOFFSET2] = f76, SIZE
  5555. ;;
  5556. STFD [AOFFSET] = f73, SIZE
  5557. STFD [AOFFSET2] = f77, SIZE
  5558. ;;
  5559. STFD [AOFFSET] = f74, SIZE
  5560. STFD [AOFFSET2] = f78, SIZE
  5561. ;;
  5562. STFD [AOFFSET] = f75, - 11 * SIZE
  5563. STFD [AOFFSET2] = f79, - 11 * SIZE
  5564. ;;
  5565. STFD [AOFFSET] = f64, SIZE
  5566. STFD [AOFFSET2] = f68, SIZE
  5567. ;;
  5568. STFD [AOFFSET] = f65, SIZE
  5569. STFD [AOFFSET2] = f69, SIZE
  5570. ;;
  5571. STFD [AOFFSET] = f66, SIZE
  5572. STFD [AOFFSET2] = f70, SIZE
  5573. ;;
  5574. STFD [AOFFSET] = f67, - 3 * SIZE
  5575. STFD [AOFFSET2] = f71, - 3 * SIZE
  5576. ;;
  5577. #endif
  5578. adds C9 = 4 * SIZE, C1
  5579. ;;
  5580. { .mmf
  5581. STFD [C1 ] = f64, SIZE
  5582. STFD [C9 ] = f68, SIZE
  5583. mov f64 = f0
  5584. }
  5585. ;;
  5586. { .mmi
  5587. STFD [C1 ] = f65, SIZE
  5588. STFD [C9 ] = f69, SIZE
  5589. adds C10 = 4 * SIZE, C2
  5590. }
  5591. ;;
  5592. { .mmi
  5593. STFD [C1 ] = f66, SIZE
  5594. STFD [C9 ] = f70, SIZE
  5595. }
  5596. ;;
  5597. { .mmi
  5598. #ifndef LN
  5599. STFD [C1 ] = f67, 5 * SIZE
  5600. #else
  5601. STFD [C1 ] = f67, - 3 * SIZE
  5602. #endif
  5603. STFD [C9 ] = f71
  5604. adds C11 = 4 * SIZE, C3
  5605. }
  5606. ;;
  5607. { .mmf
  5608. STFD [C2 ] = f72, SIZE
  5609. STFD [C10] = f76, SIZE
  5610. mov f72 = f0
  5611. }
  5612. ;;
  5613. { .mmi
  5614. STFD [C2 ] = f73, SIZE
  5615. STFD [C10] = f77, SIZE
  5616. }
  5617. ;;
  5618. { .mmi
  5619. STFD [C2 ] = f74, SIZE
  5620. STFD [C10] = f78, SIZE
  5621. adds C12 = 4 * SIZE, C4
  5622. }
  5623. ;;
  5624. { .mmi
  5625. #ifndef LN
  5626. STFD [C2 ] = f75, 5 * SIZE
  5627. #else
  5628. STFD [C2 ] = f75, - 3 * SIZE
  5629. #endif
  5630. STFD [C10] = f79
  5631. }
  5632. ;;
  5633. { .mmf
  5634. STFD [C3 ] = f80, SIZE
  5635. STFD [C11] = f84, SIZE
  5636. }
  5637. ;;
  5638. { .mmi
  5639. STFD [C3 ] = f81, SIZE
  5640. STFD [C11] = f85, SIZE
  5641. }
  5642. ;;
  5643. { .mmi
  5644. STFD [C3 ] = f82, SIZE
  5645. STFD [C11] = f86, SIZE
  5646. }
  5647. ;;
  5648. { .mmi
  5649. #ifndef LN
  5650. STFD [C3 ] = f83, 5 * SIZE
  5651. #else
  5652. STFD [C3 ] = f83, - 3 * SIZE
  5653. #endif
  5654. STFD [C11] = f87
  5655. }
  5656. ;;
  5657. { .mmf
  5658. STFD [C4 ] = f88, SIZE
  5659. STFD [C12] = f92, SIZE
  5660. }
  5661. ;;
  5662. { .mmi
  5663. STFD [C4 ] = f89, SIZE
  5664. STFD [C12] = f93, SIZE
  5665. }
  5666. ;;
  5667. { .mmi
  5668. STFD [C4 ] = f90, SIZE
  5669. STFD [C12] = f94, SIZE
  5670. }
  5671. ;;
  5672. { .mmi
  5673. #ifndef LN
  5674. STFD [C4 ] = f91, 5 * SIZE
  5675. #else
  5676. STFD [C4 ] = f91, - 3 * SIZE
  5677. #endif
  5678. STFD [C12] = f95
  5679. cmp.ne p6, p0 = 1, I
  5680. }
  5681. ;;
  5682. adds I = -1, I
  5683. ;;
  5684. { .mmi
  5685. shladd r2 = K, BASE_SHIFT, r0
  5686. }
  5687. ;;
  5688. { .mmi
  5689. sub L = K, KK
  5690. }
  5691. ;;
  5692. { .mmi
  5693. #ifdef RT
  5694. shladd AORIG = r2, 3, AORIG
  5695. #else
  5696. nop __LINE__
  5697. #endif
  5698. }
  5699. ;;
  5700. { .mmi
  5701. #if defined(LT) || defined(RN)
  5702. shladd L = L, BASE_SHIFT, r0
  5703. #else
  5704. nop __LINE__
  5705. #endif
  5706. }
  5707. ;;
  5708. ;;
  5709. { .mmi
  5710. #if defined(LT) || defined(RN)
  5711. shladd AOFFSET = L, 3, AOFFSET
  5712. #else
  5713. nop __LINE__
  5714. #endif
  5715. }
  5716. ;;
  5717. { .mmi
  5718. #if defined(LT) || defined(RN)
  5719. shladd BOFFSET = L, 2, BOFFSET
  5720. #else
  5721. nop __LINE__
  5722. #endif
  5723. }
  5724. ;;
  5725. { .mmi
  5726. #ifdef LT
  5727. adds KK = 8, KK
  5728. #elif defined LN
  5729. adds KK = -8, KK
  5730. #else
  5731. nop __LINE__
  5732. #endif
  5733. }
  5734. ;;
  5735. { .mmi
  5736. #if defined(LT) || defined(RN)
  5737. mov L = KK
  5738. #else
  5739. sub L = K, KK
  5740. #endif
  5741. }
  5742. ;;
  5743. mov f64 = f0
  5744. mov f72 = f0
  5745. mov f80 = f0
  5746. mov f88 = f0
  5747. mov f65 = f0
  5748. mov f73 = f0
  5749. mov f81 = f0
  5750. mov f89 = f0
  5751. { .mmb
  5752. (p6) br.cond.dptk .L052
  5753. }
  5754. ;;
  5755. .align 8
  5756. .L060:
  5757. tbit.z p6, p7 = M, 2
  5758. (p6) br.cond.dptk .L070
  5759. ;;
  5760. { .mib
  5761. #if defined(LT) || defined(RN)
  5762. mov L = KK
  5763. #else
  5764. sub L = K, KK
  5765. #endif
  5766. }
  5767. ;;
  5768. { .mmi
  5769. cmp.ne p7, p0 = r0, L
  5770. adds BOFFSET = 0 * SIZE, B
  5771. shl r2 = K, 2 + BASE_SHIFT
  5772. }
  5773. { .mmi
  5774. shladd r3 = KK, BASE_SHIFT, r0
  5775. nop __LINE__
  5776. nop __LINE__
  5777. }
  5778. ;;
  5779. #if defined(LT) || defined(RN)
  5780. { .mmf
  5781. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  5782. mov f65 = f0
  5783. }
  5784. ;;
  5785. #else
  5786. { .mfi
  5787. shladd BOFFSET = r3, 2, B
  5788. #ifdef LN
  5789. sub AORIG = AORIG, r2
  5790. #else
  5791. nop __LINE__
  5792. #endif
  5793. }
  5794. ;;
  5795. { .mfi
  5796. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  5797. shladd AOFFSET = r3, 2, AORIG
  5798. }
  5799. ;;
  5800. #endif
  5801. { .mfi
  5802. adds L = 1, L
  5803. }
  5804. { .mfi
  5805. adds PREA = (PREFETCHSIZE + 8) * SIZE, AOFFSET
  5806. cmp.eq p3, p0 = r0, r0
  5807. }
  5808. ;;
  5809. { .mfi
  5810. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  5811. tbit.z p12, p0 = L, 0
  5812. }
  5813. { .mfi
  5814. shr L = L, 1
  5815. }
  5816. ;;
  5817. { .mfi
  5818. adds L = -1, L
  5819. }
  5820. ;;
  5821. { .mfi
  5822. cmp.eq p6, p0 = -1, L
  5823. }
  5824. ;;
  5825. { .mmf
  5826. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  5827. }
  5828. { .mfi
  5829. mov ar.lc = L
  5830. }
  5831. ;;
  5832. mov f66 = f0
  5833. mov f67 = f0
  5834. mov f74 = f0
  5835. mov f75 = f0
  5836. mov f82 = f0
  5837. mov f83 = f0
  5838. mov f90 = f0
  5839. mov f91 = f0
  5840. ;;
  5841. { .mmf
  5842. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  5843. }
  5844. { .mfb
  5845. (p6) br.cond.dpnt .L068
  5846. }
  5847. ;;
  5848. .align 8
  5849. .L062:
  5850. { .mfi
  5851. lfetch.nt1 [PREA], 8 * SIZE
  5852. FMA f64 = f32, f48, f64 // A1 * B1
  5853. cmp.ne p4, p5 = 0, L
  5854. }
  5855. { .mfi
  5856. nop __LINE__
  5857. FMA f72 = f32, f49, f72 // A1 * B2
  5858. (p12) cmp.ne p3, p0 = 0, L
  5859. }
  5860. ;;
  5861. { .mfi
  5862. lfetch.nt1 [PREB], 8 * SIZE
  5863. FMA f80 = f32, f50, f80 // A1 * B3
  5864. (p5) adds C9 = 2 * SIZE, C1
  5865. }
  5866. { .mfi
  5867. nop __LINE__
  5868. FMA f88 = f32, f51, f88 // A1 * B4
  5869. (p5) adds C10 = 2 * SIZE, C2
  5870. }
  5871. ;;
  5872. { .mfi
  5873. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  5874. FMA f65 = f33, f48, f65 // A2 * B1
  5875. (p5) adds C11 = 2 * SIZE, C3
  5876. }
  5877. { .mfi
  5878. nop __LINE__
  5879. FMA f73 = f33, f49, f73 // A2 * B2
  5880. (p5) adds C12 = 2 * SIZE, C4
  5881. }
  5882. ;;
  5883. { .mfb
  5884. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  5885. FMA f81 = f33, f50, f81 // A2 * B3
  5886. nop __LINE__
  5887. }
  5888. { .mfb
  5889. nop __LINE__
  5890. FMA f89 = f33, f51, f89 // A2 * B4
  5891. nop __LINE__
  5892. }
  5893. ;;
  5894. { .mfb
  5895. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  5896. FMA f66 = f34, f48, f66 // A3 * B1
  5897. nop __LINE__
  5898. }
  5899. { .mfb
  5900. nop __LINE__
  5901. FMA f74 = f34, f49, f74 // A3 * B2
  5902. nop __LINE__
  5903. }
  5904. ;;
  5905. { .mfb
  5906. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  5907. FMA f82 = f34, f50, f82 // A3 * B3
  5908. nop __LINE__
  5909. }
  5910. { .mfb
  5911. nop __LINE__
  5912. FMA f90 = f34, f51, f90 // A3 * B4
  5913. nop __LINE__
  5914. }
  5915. ;;
  5916. { .mfb
  5917. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  5918. FMA f67 = f35, f48, f67 // A4 * B1
  5919. }
  5920. { .mfb
  5921. nop __LINE__
  5922. FMA f75 = f35, f49, f75 // A4 * B2
  5923. nop __LINE__
  5924. }
  5925. { .mfb
  5926. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  5927. FMA f83 = f35, f50, f83 // A4 * B3
  5928. nop __LINE__
  5929. }
  5930. { .mfb
  5931. nop __LINE__
  5932. FMA f91 = f35, f51, f91 // A4 * B4
  5933. nop __LINE__
  5934. }
  5935. ;;
  5936. { .mfb
  5937. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  5938. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  5939. nop __LINE__
  5940. }
  5941. { .mfb
  5942. nop __LINE__
  5943. (p3) FMA f72 = f40, f57, f72 // A1 * B2
  5944. nop __LINE__
  5945. }
  5946. ;;
  5947. { .mfb
  5948. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  5949. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  5950. nop __LINE__
  5951. }
  5952. { .mfb
  5953. nop __LINE__
  5954. (p3) FMA f88 = f40, f59, f88 // A1 * B4
  5955. nop __LINE__
  5956. }
  5957. ;;
  5958. { .mfb
  5959. nop __LINE__
  5960. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  5961. nop __LINE__
  5962. }
  5963. { .mfb
  5964. nop __LINE__
  5965. (p3) FMA f73 = f41, f57, f73 // A2 * B2
  5966. nop __LINE__
  5967. }
  5968. ;;
  5969. { .mfb
  5970. nop __LINE__
  5971. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  5972. nop __LINE__
  5973. }
  5974. { .mfb
  5975. nop __LINE__
  5976. (p3) FMA f89 = f41, f59, f89 // A2 * B4
  5977. nop __LINE__
  5978. }
  5979. ;;
  5980. { .mfb
  5981. nop __LINE__
  5982. (p3) FMA f66 = f42, f56, f66 // A3 * B1
  5983. nop __LINE__
  5984. }
  5985. { .mfb
  5986. nop __LINE__
  5987. (p3) FMA f74 = f42, f57, f74 // A3 * B2
  5988. nop __LINE__
  5989. }
  5990. ;;
  5991. { .mfb
  5992. nop __LINE__
  5993. (p3) FMA f82 = f42, f58, f82 // A3 * B3
  5994. nop __LINE__
  5995. }
  5996. { .mfb
  5997. nop __LINE__
  5998. (p3) FMA f90 = f42, f59, f90 // A3 * B4
  5999. nop __LINE__
  6000. }
  6001. ;;
  6002. { .mfb
  6003. nop __LINE__
  6004. (p3) FMA f67 = f43, f56, f67 // A4 * B1
  6005. nop __LINE__
  6006. }
  6007. { .mfb
  6008. nop __LINE__
  6009. (p3) FMA f75 = f43, f57, f75 // A4 * B2
  6010. nop __LINE__
  6011. }
  6012. ;;
  6013. { .mfi
  6014. nop __LINE__
  6015. (p3) FMA f83 = f43, f58, f83 // A4 * B3
  6016. adds L = -1, L
  6017. }
  6018. { .mfb
  6019. nop __LINE__
  6020. (p3) FMA f91 = f43, f59, f91 // A4 * B4
  6021. br.cloop.sptk.few .L062
  6022. }
  6023. ;;
  6024. .align 8
  6025. .L068:
  6026. #if defined(LN) || defined(RT)
  6027. #ifdef LN
  6028. adds r2 = -4, KK
  6029. #else
  6030. adds r2 = -4, KK
  6031. #endif
  6032. ;;
  6033. shladd r2 = r2, BASE_SHIFT, r0
  6034. ;;
  6035. shladd AOFFSET = r2, 2, AORIG
  6036. shladd BOFFSET = r2, 2, B
  6037. ;;
  6038. #endif
  6039. adds AOFFSET2 = 4 * SIZE, AOFFSET
  6040. adds BOFFSET2 = 4 * SIZE, BOFFSET
  6041. ;;
  6042. #if defined(LN) || defined(LT)
  6043. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  6044. ;;
  6045. LDFPD f34, f35 = [BOFFSET], 2 * SIZE
  6046. ;;
  6047. LDFPD f36, f37 = [BOFFSET], 2 * SIZE
  6048. ;;
  6049. LDFPD f38, f39 = [BOFFSET], 2 * SIZE
  6050. ;;
  6051. LDFPD f40, f41 = [BOFFSET], 2 * SIZE
  6052. ;;
  6053. LDFPD f42, f43 = [BOFFSET], 2 * SIZE
  6054. ;;
  6055. LDFPD f44, f45 = [BOFFSET], 2 * SIZE
  6056. ;;
  6057. LDFPD f46, f47 = [BOFFSET]
  6058. adds BOFFSET = -14 * SIZE, BOFFSET
  6059. ;;
  6060. FSUB f64 = f32, f64
  6061. FSUB f72 = f33, f72
  6062. FSUB f80 = f34, f80
  6063. FSUB f88 = f35, f88
  6064. ;;
  6065. FSUB f65 = f36, f65
  6066. FSUB f73 = f37, f73
  6067. FSUB f81 = f38, f81
  6068. FSUB f89 = f39, f89
  6069. ;;
  6070. FSUB f66 = f40, f66
  6071. FSUB f74 = f41, f74
  6072. FSUB f82 = f42, f82
  6073. FSUB f90 = f43, f90
  6074. ;;
  6075. FSUB f67 = f44, f67
  6076. FSUB f75 = f45, f75
  6077. FSUB f83 = f46, f83
  6078. FSUB f91 = f47, f91
  6079. ;;
  6080. #else
  6081. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  6082. ;;
  6083. LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  6084. ;;
  6085. LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  6086. ;;
  6087. LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  6088. ;;
  6089. LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  6090. ;;
  6091. LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  6092. ;;
  6093. LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  6094. ;;
  6095. LDFPD f46, f47 = [AOFFSET]
  6096. adds AOFFSET = -14 * SIZE, AOFFSET
  6097. ;;
  6098. FSUB f64 = f32, f64
  6099. FSUB f65 = f33, f65
  6100. FSUB f66 = f34, f66
  6101. FSUB f67 = f35, f67
  6102. FSUB f72 = f36, f72
  6103. FSUB f73 = f37, f73
  6104. FSUB f74 = f38, f74
  6105. FSUB f75 = f39, f75
  6106. FSUB f80 = f40, f80
  6107. FSUB f81 = f41, f81
  6108. FSUB f82 = f42, f82
  6109. FSUB f83 = f43, f83
  6110. FSUB f88 = f44, f88
  6111. FSUB f89 = f45, f89
  6112. FSUB f90 = f46, f90
  6113. FSUB f91 = f47, f91
  6114. ;;
  6115. #endif
  6116. #ifdef LN
  6117. adds AOFFSET = 14 * SIZE, AOFFSET
  6118. ;;
  6119. LDFPD f33, f32 = [AOFFSET]
  6120. adds AOFFSET = - 2 * SIZE, AOFFSET
  6121. ;;
  6122. LDFPD f35, f34 = [AOFFSET]
  6123. adds AOFFSET = - 2 * SIZE, AOFFSET
  6124. ;;
  6125. LDFD f36 = [AOFFSET], - 2 * SIZE
  6126. ;;
  6127. LDFPD f38, f37 = [AOFFSET]
  6128. adds AOFFSET = - 4 * SIZE, AOFFSET
  6129. ;;
  6130. LDFPD f40, f39 = [AOFFSET]
  6131. adds AOFFSET = - 4 * SIZE, AOFFSET
  6132. ;;
  6133. LDFD f41 = [AOFFSET]
  6134. ;;
  6135. FMPY f67 = f67, f32
  6136. FMPY f75 = f75, f32
  6137. FMPY f83 = f83, f32
  6138. FMPY f91 = f91, f32
  6139. ;;
  6140. FNMA f66 = f67, f33, f66
  6141. FNMA f74 = f75, f33, f74
  6142. FNMA f82 = f83, f33, f82
  6143. FNMA f90 = f91, f33, f90
  6144. ;;
  6145. FNMA f65 = f67, f34, f65
  6146. FNMA f73 = f75, f34, f73
  6147. FNMA f81 = f83, f34, f81
  6148. FNMA f89 = f91, f34, f89
  6149. ;;
  6150. FNMA f64 = f67, f35, f64
  6151. FNMA f72 = f75, f35, f72
  6152. FNMA f80 = f83, f35, f80
  6153. FNMA f88 = f91, f35, f88
  6154. ;;
  6155. FMPY f66 = f66, f36
  6156. FMPY f74 = f74, f36
  6157. FMPY f82 = f82, f36
  6158. FMPY f90 = f90, f36
  6159. ;;
  6160. FNMA f65 = f66, f37, f65
  6161. FNMA f73 = f74, f37, f73
  6162. FNMA f81 = f82, f37, f81
  6163. FNMA f89 = f90, f37, f89
  6164. ;;
  6165. FNMA f64 = f66, f38, f64
  6166. FNMA f72 = f74, f38, f72
  6167. FNMA f80 = f82, f38, f80
  6168. FNMA f88 = f90, f38, f88
  6169. ;;
  6170. FMPY f65 = f65, f39
  6171. FMPY f73 = f73, f39
  6172. FMPY f81 = f81, f39
  6173. FMPY f89 = f89, f39
  6174. ;;
  6175. FNMA f64 = f65, f40, f64
  6176. FNMA f72 = f73, f40, f72
  6177. FNMA f80 = f81, f40, f80
  6178. FNMA f88 = f89, f40, f88
  6179. ;;
  6180. FMPY f64 = f64, f41
  6181. FMPY f72 = f72, f41
  6182. FMPY f80 = f80, f41
  6183. FMPY f88 = f88, f41
  6184. ;;
  6185. adds BOFFSET = 8 * SIZE, BOFFSET
  6186. adds BOFFSET2 = 8 * SIZE, BOFFSET2
  6187. ;;
  6188. STFD [BOFFSET] = f66, SIZE
  6189. STFD [BOFFSET2] = f67, SIZE
  6190. ;;
  6191. STFD [BOFFSET] = f74, SIZE
  6192. STFD [BOFFSET2] = f75, SIZE
  6193. ;;
  6194. STFD [BOFFSET] = f82, SIZE
  6195. STFD [BOFFSET2] = f83, SIZE
  6196. ;;
  6197. STFD [BOFFSET] = f90, - 11 * SIZE
  6198. STFD [BOFFSET2] = f91, - 11 * SIZE
  6199. ;;
  6200. STFD [BOFFSET] = f64, SIZE
  6201. STFD [BOFFSET2] = f65, SIZE
  6202. ;;
  6203. STFD [BOFFSET] = f72, SIZE
  6204. STFD [BOFFSET2] = f73, SIZE
  6205. ;;
  6206. STFD [BOFFSET] = f80, SIZE
  6207. STFD [BOFFSET2] = f81, SIZE
  6208. ;;
  6209. STFD [BOFFSET] = f88, -3 * SIZE
  6210. STFD [BOFFSET2] = f89, -3 * SIZE
  6211. ;;
  6212. adds C1 = -4 * SIZE, C1
  6213. adds C2 = -4 * SIZE, C2
  6214. adds C3 = -4 * SIZE, C3
  6215. adds C4 = -4 * SIZE, C4
  6216. ;;
  6217. #endif
  6218. #ifdef LT
  6219. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  6220. ;;
  6221. LDFPD f34, f35 = [AOFFSET]
  6222. adds AOFFSET = 3 * SIZE, AOFFSET
  6223. ;;
  6224. LDFD f36 = [AOFFSET], 1 * SIZE
  6225. ;;
  6226. LDFPD f37, f38 = [AOFFSET]
  6227. adds AOFFSET = 4 * SIZE, AOFFSET
  6228. ;;
  6229. LDFPD f39, f40 = [AOFFSET]
  6230. adds AOFFSET = 5 * SIZE, AOFFSET
  6231. ;;
  6232. LDFD f41 = [AOFFSET], -15 * SIZE
  6233. ;;
  6234. FMPY f64 = f64, f32
  6235. FMPY f72 = f72, f32
  6236. FMPY f80 = f80, f32
  6237. FMPY f88 = f88, f32
  6238. ;;
  6239. FNMA f65 = f64, f33, f65
  6240. FNMA f73 = f72, f33, f73
  6241. FNMA f81 = f80, f33, f81
  6242. FNMA f89 = f88, f33, f89
  6243. ;;
  6244. FNMA f66 = f64, f34, f66
  6245. FNMA f74 = f72, f34, f74
  6246. FNMA f82 = f80, f34, f82
  6247. FNMA f90 = f88, f34, f90
  6248. ;;
  6249. FNMA f67 = f64, f35, f67
  6250. FNMA f75 = f72, f35, f75
  6251. FNMA f83 = f80, f35, f83
  6252. FNMA f91 = f88, f35, f91
  6253. ;;
  6254. FMPY f65 = f65, f36
  6255. FMPY f73 = f73, f36
  6256. FMPY f81 = f81, f36
  6257. FMPY f89 = f89, f36
  6258. ;;
  6259. FNMA f66 = f65, f37, f66
  6260. FNMA f74 = f73, f37, f74
  6261. FNMA f82 = f81, f37, f82
  6262. FNMA f90 = f89, f37, f90
  6263. ;;
  6264. FNMA f67 = f65, f38, f67
  6265. FNMA f75 = f73, f38, f75
  6266. FNMA f83 = f81, f38, f83
  6267. FNMA f91 = f89, f38, f91
  6268. ;;
  6269. FMPY f66 = f66, f39
  6270. FMPY f74 = f74, f39
  6271. FMPY f82 = f82, f39
  6272. FMPY f90 = f90, f39
  6273. ;;
  6274. FNMA f67 = f66, f40, f67
  6275. FNMA f75 = f74, f40, f75
  6276. FNMA f83 = f82, f40, f83
  6277. FNMA f91 = f90, f40, f91
  6278. ;;
  6279. FMPY f67 = f67, f41
  6280. FMPY f75 = f75, f41
  6281. FMPY f83 = f83, f41
  6282. FMPY f91 = f91, f41
  6283. ;;
  6284. STFD [BOFFSET] = f64, SIZE
  6285. STFD [BOFFSET2] = f65, SIZE
  6286. ;;
  6287. STFD [BOFFSET] = f72, SIZE
  6288. STFD [BOFFSET2] = f73, SIZE
  6289. ;;
  6290. STFD [BOFFSET] = f80, SIZE
  6291. STFD [BOFFSET2] = f81, SIZE
  6292. ;;
  6293. STFD [BOFFSET] = f88, 5 * SIZE
  6294. STFD [BOFFSET2] = f89, 5 * SIZE
  6295. ;;
  6296. STFD [BOFFSET] = f66, SIZE
  6297. STFD [BOFFSET2] = f67, SIZE
  6298. ;;
  6299. STFD [BOFFSET] = f74, SIZE
  6300. STFD [BOFFSET2] = f75, SIZE
  6301. ;;
  6302. STFD [BOFFSET] = f82, SIZE
  6303. STFD [BOFFSET2] = f83, SIZE
  6304. ;;
  6305. STFD [BOFFSET] = f90, -11 * SIZE
  6306. STFD [BOFFSET2] = f91, -11 * SIZE
  6307. ;;
  6308. #endif
  6309. #ifdef RN
  6310. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  6311. ;;
  6312. LDFPD f34, f35 = [BOFFSET]
  6313. adds BOFFSET = 3 * SIZE, BOFFSET
  6314. ;;
  6315. LDFD f36 = [BOFFSET], 1 * SIZE
  6316. ;;
  6317. LDFPD f37, f38 = [BOFFSET]
  6318. adds BOFFSET = 4 * SIZE, BOFFSET
  6319. ;;
  6320. LDFPD f39, f40 = [BOFFSET]
  6321. adds BOFFSET = 5 * SIZE, BOFFSET
  6322. ;;
  6323. LDFD f41 = [BOFFSET], -15 * SIZE
  6324. ;;
  6325. FMPY f64 = f64, f32
  6326. FMPY f65 = f65, f32
  6327. FMPY f66 = f66, f32
  6328. FMPY f67 = f67, f32
  6329. ;;
  6330. FNMA f72 = f64, f33, f72
  6331. FNMA f73 = f65, f33, f73
  6332. FNMA f74 = f66, f33, f74
  6333. FNMA f75 = f67, f33, f75
  6334. ;;
  6335. FNMA f80 = f64, f34, f80
  6336. FNMA f81 = f65, f34, f81
  6337. FNMA f82 = f66, f34, f82
  6338. FNMA f83 = f67, f34, f83
  6339. ;;
  6340. FNMA f88 = f64, f35, f88
  6341. FNMA f89 = f65, f35, f89
  6342. FNMA f90 = f66, f35, f90
  6343. FNMA f91 = f67, f35, f91
  6344. ;;
  6345. FMPY f72 = f72, f36
  6346. FMPY f73 = f73, f36
  6347. FMPY f74 = f74, f36
  6348. FMPY f75 = f75, f36
  6349. ;;
  6350. FNMA f80 = f72, f37, f80
  6351. FNMA f81 = f73, f37, f81
  6352. FNMA f82 = f74, f37, f82
  6353. FNMA f83 = f75, f37, f83
  6354. ;;
  6355. FNMA f88 = f72, f38, f88
  6356. FNMA f89 = f73, f38, f89
  6357. FNMA f90 = f74, f38, f90
  6358. FNMA f91 = f75, f38, f91
  6359. ;;
  6360. FMPY f80 = f80, f39
  6361. FMPY f81 = f81, f39
  6362. FMPY f82 = f82, f39
  6363. FMPY f83 = f83, f39
  6364. ;;
  6365. FNMA f88 = f80, f40, f88
  6366. FNMA f89 = f81, f40, f89
  6367. FNMA f90 = f82, f40, f90
  6368. FNMA f91 = f83, f40, f91
  6369. ;;
  6370. FMPY f88 = f88, f41
  6371. FMPY f89 = f89, f41
  6372. FMPY f90 = f90, f41
  6373. FMPY f91 = f91, f41
  6374. ;;
  6375. STFD [AOFFSET] = f64, SIZE
  6376. STFD [AOFFSET2] = f72, SIZE
  6377. ;;
  6378. STFD [AOFFSET] = f65, SIZE
  6379. STFD [AOFFSET2] = f73, SIZE
  6380. ;;
  6381. STFD [AOFFSET] = f66, SIZE
  6382. STFD [AOFFSET2] = f74, SIZE
  6383. ;;
  6384. STFD [AOFFSET] = f67, 5 * SIZE
  6385. STFD [AOFFSET2] = f75, 5 * SIZE
  6386. ;;
  6387. STFD [AOFFSET] = f80, SIZE
  6388. STFD [AOFFSET2] = f88, SIZE
  6389. ;;
  6390. STFD [AOFFSET] = f81, SIZE
  6391. STFD [AOFFSET2] = f89, SIZE
  6392. ;;
  6393. STFD [AOFFSET] = f82, SIZE
  6394. STFD [AOFFSET2] = f90, SIZE
  6395. ;;
  6396. STFD [AOFFSET] = f83, -11 * SIZE
  6397. STFD [AOFFSET2] = f91, -11 * SIZE
  6398. ;;
  6399. #endif
  6400. #ifdef RT
  6401. adds BOFFSET = 14 * SIZE, BOFFSET
  6402. ;;
  6403. LDFPD f33, f32 = [BOFFSET]
  6404. adds BOFFSET = - 2 * SIZE, BOFFSET
  6405. ;;
  6406. LDFPD f35, f34 = [BOFFSET]
  6407. adds BOFFSET = - 2 * SIZE, BOFFSET
  6408. ;;
  6409. LDFD f36 = [BOFFSET], - 2 * SIZE
  6410. ;;
  6411. LDFPD f38, f37 = [BOFFSET]
  6412. adds BOFFSET = - 4 * SIZE, BOFFSET
  6413. ;;
  6414. LDFPD f40, f39 = [BOFFSET]
  6415. adds BOFFSET = - 4 * SIZE, BOFFSET
  6416. ;;
  6417. LDFD f41 = [BOFFSET]
  6418. ;;
  6419. FMPY f88 = f88, f32
  6420. FMPY f89 = f89, f32
  6421. FMPY f90 = f90, f32
  6422. FMPY f91 = f91, f32
  6423. ;;
  6424. FNMA f80 = f88, f33, f80
  6425. FNMA f81 = f89, f33, f81
  6426. FNMA f82 = f90, f33, f82
  6427. FNMA f83 = f91, f33, f83
  6428. ;;
  6429. FNMA f72 = f88, f34, f72
  6430. FNMA f73 = f89, f34, f73
  6431. FNMA f74 = f90, f34, f74
  6432. FNMA f75 = f91, f34, f75
  6433. ;;
  6434. FNMA f64 = f88, f35, f64
  6435. FNMA f65 = f89, f35, f65
  6436. FNMA f66 = f90, f35, f66
  6437. FNMA f67 = f91, f35, f67
  6438. ;;
  6439. FMPY f80 = f80, f36
  6440. FMPY f81 = f81, f36
  6441. FMPY f82 = f82, f36
  6442. FMPY f83 = f83, f36
  6443. ;;
  6444. FNMA f72 = f80, f37, f72
  6445. FNMA f73 = f81, f37, f73
  6446. FNMA f74 = f82, f37, f74
  6447. FNMA f75 = f83, f37, f75
  6448. ;;
  6449. FNMA f64 = f80, f38, f64
  6450. FNMA f65 = f81, f38, f65
  6451. FNMA f66 = f82, f38, f66
  6452. FNMA f67 = f83, f38, f67
  6453. ;;
  6454. FMPY f72 = f72, f39
  6455. FMPY f73 = f73, f39
  6456. FMPY f74 = f74, f39
  6457. FMPY f75 = f75, f39
  6458. ;;
  6459. FNMA f64 = f72, f40, f64
  6460. FNMA f65 = f73, f40, f65
  6461. FNMA f66 = f74, f40, f66
  6462. FNMA f67 = f75, f40, f67
  6463. ;;
  6464. FMPY f64 = f64, f41
  6465. FMPY f65 = f65, f41
  6466. FMPY f66 = f66, f41
  6467. FMPY f67 = f67, f41
  6468. ;;
  6469. adds AOFFSET = 8 * SIZE, AOFFSET
  6470. adds AOFFSET2 = 8 * SIZE, AOFFSET2
  6471. ;;
  6472. STFD [AOFFSET] = f80, SIZE
  6473. STFD [AOFFSET2] = f88, SIZE
  6474. ;;
  6475. STFD [AOFFSET] = f81, SIZE
  6476. STFD [AOFFSET2] = f89, SIZE
  6477. ;;
  6478. STFD [AOFFSET] = f82, SIZE
  6479. STFD [AOFFSET2] = f90, SIZE
  6480. ;;
  6481. STFD [AOFFSET] = f83, - 11 * SIZE
  6482. STFD [AOFFSET2] = f91, - 11 * SIZE
  6483. ;;
  6484. STFD [AOFFSET] = f64, SIZE
  6485. STFD [AOFFSET2] = f72, SIZE
  6486. ;;
  6487. STFD [AOFFSET] = f65, SIZE
  6488. STFD [AOFFSET2] = f73, SIZE
  6489. ;;
  6490. STFD [AOFFSET] = f66, SIZE
  6491. STFD [AOFFSET2] = f74, SIZE
  6492. ;;
  6493. STFD [AOFFSET] = f67, - 3 * SIZE
  6494. STFD [AOFFSET2] = f75, - 3 * SIZE
  6495. ;;
  6496. #endif
  6497. { .mmf
  6498. STFD [C1 ] = f64, SIZE
  6499. mov f64 = f0
  6500. }
  6501. ;;
  6502. { .mmi
  6503. STFD [C1 ] = f65, SIZE
  6504. }
  6505. ;;
  6506. { .mmi
  6507. STFD [C1 ] = f66, SIZE
  6508. }
  6509. ;;
  6510. { .mmi
  6511. #ifndef LN
  6512. STFD [C1 ] = f67, SIZE
  6513. #else
  6514. STFD [C1 ] = f67, - 3 * SIZE
  6515. #endif
  6516. }
  6517. ;;
  6518. { .mmf
  6519. STFD [C2 ] = f72, SIZE
  6520. mov f72 = f0
  6521. }
  6522. ;;
  6523. { .mmi
  6524. STFD [C2 ] = f73, SIZE
  6525. }
  6526. ;;
  6527. { .mmi
  6528. STFD [C2 ] = f74, SIZE
  6529. }
  6530. ;;
  6531. { .mmi
  6532. #ifndef LN
  6533. STFD [C2 ] = f75, SIZE
  6534. #else
  6535. STFD [C2 ] = f75, - 3 * SIZE
  6536. #endif
  6537. }
  6538. ;;
  6539. { .mmf
  6540. STFD [C3 ] = f80, SIZE
  6541. mov f80 = f0
  6542. }
  6543. ;;
  6544. { .mmi
  6545. STFD [C3 ] = f81, SIZE
  6546. }
  6547. ;;
  6548. { .mmi
  6549. STFD [C3 ] = f82, SIZE
  6550. }
  6551. ;;
  6552. { .mmi
  6553. #ifndef LN
  6554. STFD [C3 ] = f83, SIZE
  6555. #else
  6556. STFD [C3 ] = f83, - 3 * SIZE
  6557. #endif
  6558. }
  6559. ;;
  6560. { .mmf
  6561. STFD [C4 ] = f88, SIZE
  6562. mov f88 = f0
  6563. }
  6564. ;;
  6565. { .mmi
  6566. STFD [C4 ] = f89, SIZE
  6567. }
  6568. ;;
  6569. { .mmi
  6570. STFD [C4 ] = f90, SIZE
  6571. }
  6572. ;;
  6573. { .mmi
  6574. #ifndef LN
  6575. STFD [C4 ] = f91, SIZE
  6576. #else
  6577. STFD [C4 ] = f91, - 3 * SIZE
  6578. #endif
  6579. nop __LINE__
  6580. }
  6581. ;;
  6582. mov f65 = f0
  6583. ;;
  6584. mov f73 = f0
  6585. ;;
  6586. shladd r2 = K, BASE_SHIFT, r0
  6587. ;;
  6588. { .mmi
  6589. sub L = K, KK
  6590. }
  6591. ;;
  6592. { .mmi
  6593. #ifdef RT
  6594. shladd AORIG = r2, 2, AORIG
  6595. #else
  6596. nop __LINE__
  6597. #endif
  6598. }
  6599. ;;
  6600. { .mmf
  6601. mov f81 = f0
  6602. }
  6603. ;;
  6604. { .mmi
  6605. #if defined(LT) || defined(RN)
  6606. shladd L = L, BASE_SHIFT, r0
  6607. #else
  6608. nop __LINE__
  6609. #endif
  6610. }
  6611. ;;
  6612. { .mmi
  6613. #if defined(LT) || defined(RN)
  6614. shladd AOFFSET = L, 2, AOFFSET
  6615. #else
  6616. nop __LINE__
  6617. #endif
  6618. }
  6619. ;;
  6620. { .mmi
  6621. #if defined(LT) || defined(RN)
  6622. shladd BOFFSET = L, 2, BOFFSET
  6623. #else
  6624. nop __LINE__
  6625. #endif
  6626. }
  6627. ;;
  6628. { .mmf
  6629. mov f89 = f0
  6630. }
  6631. ;;
  6632. { .mmi
  6633. #ifdef LT
  6634. adds KK = 4, KK
  6635. #elif defined LN
  6636. adds KK = -4, KK
  6637. #else
  6638. nop __LINE__
  6639. #endif
  6640. }
  6641. ;;
  6642. { .mmi
  6643. #if defined(LT) || defined(RN)
  6644. mov L = KK
  6645. #else
  6646. sub L = K, KK
  6647. #endif
  6648. }
  6649. ;;
  6650. .align 8
  6651. .L070:
  6652. tbit.z p6,p0 = M, 1
  6653. (p6) br.cond.dptk .L080
  6654. ;;
  6655. { .mib
  6656. #if defined(LT) || defined(RN)
  6657. mov L = KK
  6658. #else
  6659. sub L = K, KK
  6660. #endif
  6661. }
  6662. ;;
  6663. { .mmi
  6664. cmp.ne p7, p0 = r0, L
  6665. adds BOFFSET = 0 * SIZE, B
  6666. shl r2 = K, 1 + BASE_SHIFT
  6667. }
  6668. { .mmi
  6669. shladd r3 = KK, BASE_SHIFT, r0
  6670. nop __LINE__
  6671. nop __LINE__
  6672. }
  6673. ;;
  6674. #if defined(LT) || defined(RN)
  6675. { .mmf
  6676. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  6677. nop __LINE__
  6678. mov f65 = f0
  6679. }
  6680. ;;
  6681. #else
  6682. { .mfi
  6683. shladd BOFFSET = r3, 2, B
  6684. mov f65 = f0
  6685. #ifdef LN
  6686. sub AORIG = AORIG, r2
  6687. #else
  6688. nop __LINE__
  6689. #endif
  6690. }
  6691. ;;
  6692. { .mfi
  6693. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  6694. shladd AOFFSET = r3, 1, AORIG
  6695. }
  6696. ;;
  6697. #endif
  6698. ;;
  6699. mov f73 = f0
  6700. ;;
  6701. { .mfi
  6702. mov f81 = f0
  6703. adds L = 1, L
  6704. }
  6705. { .mfi
  6706. adds PREA = (PREFETCHSIZE + 8) * SIZE, AOFFSET
  6707. mov f89 = f0
  6708. cmp.eq p3, p0 = r0, r0
  6709. }
  6710. ;;
  6711. { .mfi
  6712. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  6713. tbit.z p12, p0 = L, 0
  6714. }
  6715. { .mfi
  6716. shr L = L, 1
  6717. }
  6718. ;;
  6719. { .mmf
  6720. adds L = -1, L
  6721. }
  6722. ;;
  6723. { .mmf
  6724. cmp.eq p6, p0 = -1, L
  6725. }
  6726. ;;
  6727. { .mib
  6728. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  6729. mov ar.lc = L
  6730. (p6) br.cond.dpnt .L078
  6731. }
  6732. ;;
  6733. .align 8
  6734. .L072:
  6735. { .mfb
  6736. lfetch.nt1 [PREA], 4 * SIZE
  6737. FMA f64 = f32, f48, f64 // A1 * B1
  6738. nop __LINE__
  6739. }
  6740. { .mfi
  6741. nop __LINE__
  6742. FMA f72 = f32, f49, f72 // A1 * B2
  6743. (p12) cmp.ne p3, p0 = 0, L
  6744. }
  6745. ;;
  6746. { .mfi
  6747. lfetch.nt1 [PREB], 8 * SIZE
  6748. FMA f80 = f32, f50, f80 // A1 * B3
  6749. cmp.ne p4, p5 = 0, L
  6750. }
  6751. { .mfb
  6752. nop __LINE__
  6753. FMA f88 = f32, f51, f88 // A1 * B4
  6754. nop __LINE__
  6755. }
  6756. ;;
  6757. { .mfi
  6758. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  6759. FMA f65 = f33, f48, f65 // A2 * B1
  6760. }
  6761. { .mfi
  6762. nop __LINE__
  6763. FMA f73 = f33, f49, f73 // A2 * B2
  6764. }
  6765. ;;
  6766. { .mfi
  6767. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  6768. FMA f81 = f33, f50, f81 // A2 * B3
  6769. }
  6770. { .mmf
  6771. nop __LINE__
  6772. nop __LINE__
  6773. FMA f89 = f33, f51, f89 // A2 * B4
  6774. }
  6775. ;;
  6776. { .mfb
  6777. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  6778. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  6779. nop __LINE__
  6780. }
  6781. { .mmf
  6782. nop __LINE__
  6783. nop __LINE__
  6784. (p3) FMA f72 = f40, f57, f72 // A1 * B2
  6785. }
  6786. ;;
  6787. { .mfb
  6788. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  6789. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  6790. nop __LINE__
  6791. }
  6792. { .mmf
  6793. nop __LINE__
  6794. nop __LINE__
  6795. (p3) FMA f88 = f40, f59, f88 // A1 * B4
  6796. }
  6797. ;;
  6798. { .mfb
  6799. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  6800. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  6801. nop __LINE__
  6802. }
  6803. { .mfb
  6804. nop __LINE__
  6805. (p3) FMA f73 = f41, f57, f73 // A2 * B2
  6806. nop __LINE__
  6807. }
  6808. ;;
  6809. { .mfi
  6810. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  6811. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  6812. adds L = -1, L
  6813. }
  6814. { .mfb
  6815. nop __LINE__
  6816. (p3) FMA f89 = f41, f59, f89 // A2 * B4
  6817. br.cloop.sptk.few .L072
  6818. }
  6819. ;;
  6820. .L078:
  6821. #if defined(LN) || defined(RT)
  6822. #ifdef LN
  6823. adds r2 = -2, KK
  6824. #else
  6825. adds r2 = -4, KK
  6826. #endif
  6827. ;;
  6828. shladd r2 = r2, BASE_SHIFT, r0
  6829. ;;
  6830. shladd AOFFSET = r2, 1, AORIG
  6831. shladd BOFFSET = r2, 2, B
  6832. ;;
  6833. #endif
  6834. adds AOFFSET2 = 4 * SIZE, AOFFSET
  6835. adds BOFFSET2 = 4 * SIZE, BOFFSET
  6836. ;;
  6837. #if defined(LN) || defined(LT)
  6838. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  6839. ;;
  6840. LDFPD f34, f35 = [BOFFSET], 2 * SIZE
  6841. ;;
  6842. LDFPD f36, f37 = [BOFFSET], 2 * SIZE
  6843. ;;
  6844. LDFPD f38, f39 = [BOFFSET]
  6845. adds BOFFSET = -6 * SIZE, BOFFSET
  6846. ;;
  6847. FSUB f64 = f32, f64
  6848. FSUB f72 = f33, f72
  6849. FSUB f80 = f34, f80
  6850. FSUB f88 = f35, f88
  6851. FSUB f65 = f36, f65
  6852. FSUB f73 = f37, f73
  6853. FSUB f81 = f38, f81
  6854. FSUB f89 = f39, f89
  6855. ;;
  6856. #else
  6857. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  6858. ;;
  6859. LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  6860. ;;
  6861. LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  6862. ;;
  6863. LDFPD f38, f39 = [AOFFSET]
  6864. adds AOFFSET = -6 * SIZE, AOFFSET
  6865. ;;
  6866. FSUB f64 = f32, f64
  6867. FSUB f65 = f33, f65
  6868. FSUB f72 = f34, f72
  6869. FSUB f73 = f35, f73
  6870. FSUB f80 = f36, f80
  6871. FSUB f81 = f37, f81
  6872. FSUB f88 = f38, f88
  6873. FSUB f89 = f39, f89
  6874. ;;
  6875. #endif
  6876. #ifdef LN
  6877. adds AOFFSET = 2 * SIZE, AOFFSET
  6878. ;;
  6879. LDFPD f33, f32 = [AOFFSET]
  6880. adds AOFFSET = - 2 * SIZE, AOFFSET
  6881. ;;
  6882. LDFD f34 = [AOFFSET]
  6883. ;;
  6884. FMPY f65 = f65, f32
  6885. FMPY f73 = f73, f32
  6886. FMPY f81 = f81, f32
  6887. FMPY f89 = f89, f32
  6888. ;;
  6889. FNMA f64 = f65, f33, f64
  6890. FNMA f72 = f73, f33, f72
  6891. FNMA f80 = f81, f33, f80
  6892. FNMA f88 = f89, f33, f88
  6893. ;;
  6894. FMPY f64 = f64, f34
  6895. FMPY f72 = f72, f34
  6896. FMPY f80 = f80, f34
  6897. FMPY f88 = f88, f34
  6898. ;;
  6899. STFD [BOFFSET] = f64, SIZE
  6900. STFD [BOFFSET2] = f65, SIZE
  6901. ;;
  6902. STFD [BOFFSET] = f72, SIZE
  6903. STFD [BOFFSET2] = f73, SIZE
  6904. ;;
  6905. STFD [BOFFSET] = f80, SIZE
  6906. STFD [BOFFSET2] = f81, SIZE
  6907. ;;
  6908. STFD [BOFFSET] = f88, - 3 * SIZE
  6909. STFD [BOFFSET2] = f89, - 3 * SIZE
  6910. ;;
  6911. adds C1 = -2 * SIZE, C1
  6912. adds C2 = -2 * SIZE, C2
  6913. adds C3 = -2 * SIZE, C3
  6914. adds C4 = -2 * SIZE, C4
  6915. ;;
  6916. #endif
  6917. #ifdef LT
  6918. LDFPD f32, f33 = [AOFFSET]
  6919. adds AOFFSET = 3 * SIZE, AOFFSET
  6920. ;;
  6921. LDFD f34 = [AOFFSET], - 3 * SIZE
  6922. ;;
  6923. FMPY f64 = f64, f32
  6924. FMPY f72 = f72, f32
  6925. FMPY f80 = f80, f32
  6926. FMPY f88 = f88, f32
  6927. ;;
  6928. FNMA f65 = f64, f33, f65
  6929. FNMA f73 = f72, f33, f73
  6930. FNMA f81 = f80, f33, f81
  6931. FNMA f89 = f88, f33, f89
  6932. ;;
  6933. FMPY f65 = f65, f34
  6934. FMPY f73 = f73, f34
  6935. FMPY f81 = f81, f34
  6936. FMPY f89 = f89, f34
  6937. ;;
  6938. STFD [BOFFSET] = f64, SIZE
  6939. STFD [BOFFSET2] = f65, SIZE
  6940. ;;
  6941. STFD [BOFFSET] = f72, SIZE
  6942. STFD [BOFFSET2] = f73, SIZE
  6943. ;;
  6944. STFD [BOFFSET] = f80, SIZE
  6945. STFD [BOFFSET2] = f81, SIZE
  6946. ;;
  6947. STFD [BOFFSET] = f88, -3 * SIZE
  6948. STFD [BOFFSET2] = f89, -3 * SIZE
  6949. ;;
  6950. #endif
  6951. #ifdef RN
  6952. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  6953. ;;
  6954. LDFPD f34, f35 = [BOFFSET]
  6955. adds BOFFSET = 3 * SIZE, BOFFSET
  6956. ;;
  6957. LDFD f36 = [BOFFSET], 1 * SIZE
  6958. ;;
  6959. LDFPD f37, f38 = [BOFFSET]
  6960. adds BOFFSET = 4 * SIZE, BOFFSET
  6961. ;;
  6962. LDFPD f39, f40 = [BOFFSET]
  6963. adds BOFFSET = 5 * SIZE, BOFFSET
  6964. ;;
  6965. LDFD f41 = [BOFFSET], -15 * SIZE
  6966. ;;
  6967. FMPY f64 = f64, f32
  6968. FMPY f65 = f65, f32
  6969. ;;
  6970. FNMA f72 = f64, f33, f72
  6971. FNMA f73 = f65, f33, f73
  6972. ;;
  6973. FNMA f80 = f64, f34, f80
  6974. FNMA f81 = f65, f34, f81
  6975. ;;
  6976. FNMA f88 = f64, f35, f88
  6977. FNMA f89 = f65, f35, f89
  6978. ;;
  6979. FMPY f72 = f72, f36
  6980. FMPY f73 = f73, f36
  6981. ;;
  6982. FNMA f80 = f72, f37, f80
  6983. FNMA f81 = f73, f37, f81
  6984. ;;
  6985. FNMA f88 = f72, f38, f88
  6986. FNMA f89 = f73, f38, f89
  6987. ;;
  6988. FMPY f80 = f80, f39
  6989. FMPY f81 = f81, f39
  6990. ;;
  6991. FNMA f88 = f80, f40, f88
  6992. FNMA f89 = f81, f40, f89
  6993. ;;
  6994. FMPY f88 = f88, f41
  6995. FMPY f89 = f89, f41
  6996. ;;
  6997. STFD [AOFFSET] = f64, SIZE
  6998. STFD [AOFFSET2] = f80, SIZE
  6999. ;;
  7000. STFD [AOFFSET] = f65, SIZE
  7001. STFD [AOFFSET2] = f81, SIZE
  7002. ;;
  7003. STFD [AOFFSET] = f72, SIZE
  7004. STFD [AOFFSET2] = f88, SIZE
  7005. ;;
  7006. STFD [AOFFSET] = f73, -3 * SIZE
  7007. STFD [AOFFSET2] = f89, -3 * SIZE
  7008. ;;
  7009. #endif
  7010. #ifdef RT
  7011. adds BOFFSET = 14 * SIZE, BOFFSET
  7012. ;;
  7013. LDFPD f33, f32 = [BOFFSET]
  7014. adds BOFFSET = - 2 * SIZE, BOFFSET
  7015. ;;
  7016. LDFPD f35, f34 = [BOFFSET]
  7017. adds BOFFSET = - 2 * SIZE, BOFFSET
  7018. ;;
  7019. LDFD f36 = [BOFFSET], - 2 * SIZE
  7020. ;;
  7021. LDFPD f38, f37 = [BOFFSET]
  7022. adds BOFFSET = - 4 * SIZE, BOFFSET
  7023. ;;
  7024. LDFPD f40, f39 = [BOFFSET]
  7025. adds BOFFSET = - 4 * SIZE, BOFFSET
  7026. ;;
  7027. LDFD f41 = [BOFFSET]
  7028. ;;
  7029. FMPY f88 = f88, f32
  7030. FMPY f89 = f89, f32
  7031. ;;
  7032. FNMA f80 = f88, f33, f80
  7033. FNMA f81 = f89, f33, f81
  7034. ;;
  7035. FNMA f72 = f88, f34, f72
  7036. FNMA f73 = f89, f34, f73
  7037. ;;
  7038. FNMA f64 = f88, f35, f64
  7039. FNMA f65 = f89, f35, f65
  7040. ;;
  7041. FMPY f80 = f80, f36
  7042. FMPY f81 = f81, f36
  7043. ;;
  7044. FNMA f72 = f80, f37, f72
  7045. FNMA f73 = f81, f37, f73
  7046. ;;
  7047. FNMA f64 = f80, f38, f64
  7048. FNMA f65 = f81, f38, f65
  7049. ;;
  7050. FMPY f72 = f72, f39
  7051. FMPY f73 = f73, f39
  7052. ;;
  7053. FNMA f64 = f72, f40, f64
  7054. FNMA f65 = f73, f40, f65
  7055. ;;
  7056. FMPY f64 = f64, f41
  7057. FMPY f65 = f65, f41
  7058. ;;
  7059. STFD [AOFFSET] = f64, SIZE
  7060. STFD [AOFFSET2] = f80, SIZE
  7061. ;;
  7062. STFD [AOFFSET] = f65, SIZE
  7063. STFD [AOFFSET2] = f81, SIZE
  7064. ;;
  7065. STFD [AOFFSET] = f72, SIZE
  7066. STFD [AOFFSET2] = f88, SIZE
  7067. ;;
  7068. STFD [AOFFSET] = f73, -3 * SIZE
  7069. STFD [AOFFSET2] = f89, -3 * SIZE
  7070. ;;
  7071. #endif
  7072. STFD [C1 ] = f64, SIZE
  7073. mov f64 = f0
  7074. ;;
  7075. #ifndef LN
  7076. STFD [C1 ] = f65, SIZE
  7077. #else
  7078. STFD [C1 ] = f65, -SIZE
  7079. #endif
  7080. ;;
  7081. STFD [C2 ] = f72, SIZE
  7082. mov f72 = f0
  7083. ;;
  7084. #ifndef LN
  7085. STFD [C2 ] = f73, SIZE
  7086. #else
  7087. STFD [C2 ] = f73, -SIZE
  7088. #endif
  7089. ;;
  7090. STFD [C3 ] = f80, SIZE
  7091. mov f80 = f0
  7092. ;;
  7093. #ifndef LN
  7094. STFD [C3 ] = f81, SIZE
  7095. #else
  7096. STFD [C3 ] = f81, - SIZE
  7097. #endif
  7098. ;;
  7099. STFD [C4 ] = f88, SIZE
  7100. mov f88 = f0
  7101. ;;
  7102. #ifndef LN
  7103. STFD [C4 ] = f89, SIZE
  7104. #else
  7105. STFD [C4 ] = f89, -SIZE
  7106. #endif
  7107. ;;
  7108. mov f96 = f0
  7109. ;;
  7110. mov f104 = f0
  7111. ;;
  7112. shladd r2 = K, BASE_SHIFT, r0
  7113. ;;
  7114. sub L = K, KK
  7115. ;;
  7116. #ifdef RT
  7117. shladd AORIG = r2, 1, AORIG
  7118. #else
  7119. nop __LINE__
  7120. #endif
  7121. ;;
  7122. mov f112 = f0
  7123. ;;
  7124. { .mmi
  7125. #if defined(LT) || defined(RN)
  7126. shladd L = L, BASE_SHIFT, r0
  7127. #else
  7128. nop __LINE__
  7129. #endif
  7130. }
  7131. ;;
  7132. { .mmi
  7133. #if defined(LT) || defined(RN)
  7134. shladd AOFFSET = L, 1, AOFFSET
  7135. #else
  7136. nop __LINE__
  7137. #endif
  7138. }
  7139. ;;
  7140. { .mmi
  7141. #if defined(LT) || defined(RN)
  7142. shladd BOFFSET = L, 2, BOFFSET
  7143. #else
  7144. nop __LINE__
  7145. #endif
  7146. }
  7147. ;;
  7148. { .mmf
  7149. mov f120 = f0
  7150. }
  7151. ;;
  7152. { .mmi
  7153. #ifdef LT
  7154. adds KK = 2, KK
  7155. #elif defined LN
  7156. adds KK = -2, KK
  7157. #else
  7158. nop __LINE__
  7159. #endif
  7160. }
  7161. ;;
  7162. { .mmi
  7163. #if defined(LT) || defined(RN)
  7164. mov L = KK
  7165. #else
  7166. sub L = K, KK
  7167. #endif
  7168. }
  7169. ;;
  7170. .align 8
  7171. .L080:
  7172. tbit.z p6,p7 = M, 0
  7173. (p6) br.cond.dptk .L089
  7174. { .mib
  7175. #if defined(LT) || defined(RN)
  7176. mov L = KK
  7177. #else
  7178. sub L = K, KK
  7179. #endif
  7180. }
  7181. ;;
  7182. { .mmi
  7183. cmp.ne p7, p0 = r0, L
  7184. adds BOFFSET = 0 * SIZE, B
  7185. shl r2 = K, 0 + BASE_SHIFT
  7186. }
  7187. { .mmi
  7188. shladd r3 = KK, BASE_SHIFT, r0
  7189. nop __LINE__
  7190. nop __LINE__
  7191. }
  7192. ;;
  7193. #if defined(LT) || defined(RN)
  7194. { .mmf
  7195. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  7196. }
  7197. ;;
  7198. #else
  7199. { .mfi
  7200. shladd BOFFSET = r3, 2, B
  7201. #ifdef LN
  7202. sub AORIG = AORIG, r2
  7203. #else
  7204. nop __LINE__
  7205. #endif
  7206. }
  7207. ;;
  7208. { .mfi
  7209. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  7210. add AOFFSET = r3, AORIG
  7211. }
  7212. ;;
  7213. #endif
  7214. { .mmi
  7215. adds L = 1, L
  7216. adds PREA = (PREFETCHSIZE + 8) * SIZE, AOFFSET
  7217. cmp.eq p3, p0 = r0, r0
  7218. }
  7219. ;;
  7220. { .mii
  7221. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  7222. tbit.z p12, p0 = L, 0
  7223. shr L = L, 1
  7224. }
  7225. ;;
  7226. { .mmi
  7227. adds L = -1, L
  7228. }
  7229. ;;
  7230. { .mmi
  7231. cmp.eq p6, p0 = -1, L
  7232. }
  7233. ;;
  7234. { .mib
  7235. (p7) LDFD f32 = [AOFFSET], 1 * SIZE
  7236. mov ar.lc = L
  7237. (p6) br.cond.dpnt .L088
  7238. }
  7239. ;;
  7240. .L082:
  7241. { .mfb
  7242. cmp.ne p4, p5 = 0, L
  7243. FMA f64 = f32, f48, f64 // A1 * B1
  7244. nop __LINE__
  7245. }
  7246. { .mfi
  7247. (p12) cmp.ne p3, p0 = 0, L
  7248. FMA f72 = f32, f49, f72 // A1 * B2
  7249. nop __LINE__
  7250. }
  7251. ;;
  7252. { .mfb
  7253. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  7254. FMA f80 = f32, f50, f80 // A1 * B3
  7255. nop __LINE__
  7256. }
  7257. { .mfb
  7258. (p3) LDFD f40 = [AOFFSET], 1 * SIZE
  7259. FMA f88 = f32, f51, f88 // A1 * B4
  7260. nop __LINE__
  7261. }
  7262. ;;
  7263. { .mfb
  7264. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  7265. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  7266. nop __LINE__
  7267. }
  7268. { .mfb
  7269. nop __LINE__
  7270. (p3) FMA f72 = f40, f57, f72 // A1 * B2
  7271. nop __LINE__
  7272. }
  7273. ;;
  7274. { .mmf
  7275. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  7276. (p4) LDFD f32 = [AOFFSET], 1 * SIZE
  7277. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  7278. }
  7279. { .mmf
  7280. nop __LINE__
  7281. nop __LINE__
  7282. (p3) FMA f88 = f40, f59, f88 // A1 * B4
  7283. }
  7284. ;;
  7285. { .mib
  7286. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  7287. nop __LINE__
  7288. nop __LINE__
  7289. }
  7290. { .mmb
  7291. nop __LINE__
  7292. adds L = -1, L
  7293. br.cloop.sptk.few .L082
  7294. }
  7295. ;;
  7296. .L088:
  7297. #if defined(LN) || defined(RT)
  7298. #ifdef LN
  7299. adds r2 = -1, KK
  7300. #else
  7301. adds r2 = -4, KK
  7302. #endif
  7303. ;;
  7304. shladd r2 = r2, BASE_SHIFT, r0
  7305. ;;
  7306. add AOFFSET = r2, AORIG
  7307. shladd BOFFSET = r2, 2, B
  7308. ;;
  7309. #endif
  7310. adds AOFFSET2 = 4 * SIZE, AOFFSET
  7311. adds BOFFSET2 = 4 * SIZE, BOFFSET
  7312. ;;
  7313. #if defined(LN) || defined(LT)
  7314. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  7315. ;;
  7316. LDFPD f34, f35 = [BOFFSET]
  7317. adds BOFFSET = -2 * SIZE, BOFFSET
  7318. ;;
  7319. FSUB f64 = f32, f64
  7320. FSUB f72 = f33, f72
  7321. FSUB f80 = f34, f80
  7322. FSUB f88 = f35, f88
  7323. ;;
  7324. #else
  7325. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  7326. ;;
  7327. LDFPD f34, f35 = [AOFFSET]
  7328. adds AOFFSET = -2 * SIZE, AOFFSET
  7329. ;;
  7330. FSUB f64 = f32, f64
  7331. FSUB f72 = f33, f72
  7332. FSUB f80 = f34, f80
  7333. FSUB f88 = f35, f88
  7334. ;;
  7335. #endif
  7336. #ifdef LN
  7337. LDFD f32 = [AOFFSET]
  7338. ;;
  7339. FMPY f64 = f64, f32
  7340. FMPY f72 = f72, f32
  7341. FMPY f80 = f80, f32
  7342. FMPY f88 = f88, f32
  7343. ;;
  7344. { .mmi
  7345. STFD [BOFFSET] = f64, SIZE
  7346. adds C1 = -1 * SIZE, C1
  7347. }
  7348. ;;
  7349. { .mmi
  7350. STFD [BOFFSET] = f72, SIZE
  7351. adds C2 = -1 * SIZE, C2
  7352. }
  7353. ;;
  7354. { .mmi
  7355. STFD [BOFFSET] = f80, SIZE
  7356. nop __LINE__
  7357. }
  7358. ;;
  7359. { .mmi
  7360. STFD [BOFFSET] = f88, - 3 * SIZE
  7361. }
  7362. ;;
  7363. adds C3 = -1 * SIZE, C3
  7364. adds C4 = -1 * SIZE, C4
  7365. ;;
  7366. #endif
  7367. #ifdef LT
  7368. LDFD f32 = [AOFFSET]
  7369. ;;
  7370. FMPY f64 = f64, f32
  7371. FMPY f72 = f72, f32
  7372. FMPY f80 = f80, f32
  7373. FMPY f88 = f88, f32
  7374. ;;
  7375. STFD [BOFFSET] = f64, SIZE
  7376. ;;
  7377. STFD [BOFFSET] = f72, SIZE
  7378. ;;
  7379. STFD [BOFFSET] = f80, SIZE
  7380. ;;
  7381. STFD [BOFFSET] = f88, -3 * SIZE
  7382. ;;
  7383. #endif
  7384. #ifdef RN
  7385. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  7386. ;;
  7387. LDFPD f34, f35 = [BOFFSET]
  7388. adds BOFFSET = 3 * SIZE, BOFFSET
  7389. ;;
  7390. LDFD f36 = [BOFFSET], 1 * SIZE
  7391. ;;
  7392. LDFPD f37, f38 = [BOFFSET]
  7393. adds BOFFSET = 4 * SIZE, BOFFSET
  7394. ;;
  7395. LDFPD f39, f40 = [BOFFSET]
  7396. adds BOFFSET = 5 * SIZE, BOFFSET
  7397. ;;
  7398. LDFD f41 = [BOFFSET], -15 * SIZE
  7399. FMPY f64 = f64, f32
  7400. ;;
  7401. FNMA f72 = f64, f33, f72
  7402. ;;
  7403. FNMA f80 = f64, f34, f80
  7404. ;;
  7405. FNMA f88 = f64, f35, f88
  7406. ;;
  7407. FMPY f72 = f72, f36
  7408. ;;
  7409. FNMA f80 = f72, f37, f80
  7410. ;;
  7411. FNMA f88 = f72, f38, f88
  7412. ;;
  7413. FMPY f80 = f80, f39
  7414. ;;
  7415. FNMA f88 = f80, f40, f88
  7416. ;;
  7417. FMPY f88 = f88, f41
  7418. ;;
  7419. STFD [AOFFSET] = f64, SIZE
  7420. ;;
  7421. STFD [AOFFSET] = f72, SIZE
  7422. ;;
  7423. STFD [AOFFSET] = f80, SIZE
  7424. ;;
  7425. STFD [AOFFSET] = f88, -3 * SIZE
  7426. ;;
  7427. #endif
  7428. #ifdef RT
  7429. adds BOFFSET = 14 * SIZE, BOFFSET
  7430. ;;
  7431. LDFPD f33, f32 = [BOFFSET]
  7432. adds BOFFSET = - 2 * SIZE, BOFFSET
  7433. ;;
  7434. LDFPD f35, f34 = [BOFFSET]
  7435. adds BOFFSET = - 2 * SIZE, BOFFSET
  7436. ;;
  7437. LDFD f36 = [BOFFSET], - 2 * SIZE
  7438. ;;
  7439. LDFPD f38, f37 = [BOFFSET]
  7440. adds BOFFSET = - 4 * SIZE, BOFFSET
  7441. ;;
  7442. LDFPD f40, f39 = [BOFFSET]
  7443. adds BOFFSET = - 4 * SIZE, BOFFSET
  7444. ;;
  7445. LDFD f41 = [BOFFSET]
  7446. ;;
  7447. FMPY f88 = f88, f32
  7448. ;;
  7449. FNMA f80 = f88, f33, f80
  7450. ;;
  7451. FNMA f72 = f88, f34, f72
  7452. ;;
  7453. FNMA f64 = f88, f35, f64
  7454. ;;
  7455. FMPY f80 = f80, f36
  7456. ;;
  7457. FNMA f72 = f80, f37, f72
  7458. ;;
  7459. FNMA f64 = f80, f38, f64
  7460. ;;
  7461. FMPY f72 = f72, f39
  7462. ;;
  7463. FNMA f64 = f72, f40, f64
  7464. ;;
  7465. FMPY f64 = f64, f41
  7466. ;;
  7467. STFD [AOFFSET] = f64, SIZE
  7468. ;;
  7469. STFD [AOFFSET] = f72, SIZE
  7470. ;;
  7471. STFD [AOFFSET] = f80, SIZE
  7472. ;;
  7473. STFD [AOFFSET] = f88, - 3 * SIZE
  7474. ;;
  7475. #endif
  7476. #ifndef LN
  7477. STFD [C1 ] = f64, SIZE
  7478. #else
  7479. STFD [C1 ] = f64
  7480. #endif
  7481. #ifndef LN
  7482. STFD [C2 ] = f72, SIZE
  7483. #else
  7484. STFD [C2 ] = f72
  7485. #endif
  7486. #ifndef LN
  7487. STFD [C3 ] = f80, SIZE
  7488. #else
  7489. STFD [C3 ] = f80
  7490. #endif
  7491. #ifndef LN
  7492. STFD [C4 ] = f88, SIZE
  7493. #else
  7494. STFD [C4 ] = f88
  7495. #endif
  7496. ;;
  7497. mov f64 = f0
  7498. mov f72 = f0
  7499. mov f80 = f0
  7500. mov f88 = f0
  7501. ;;
  7502. shladd r2 = K, BASE_SHIFT, r0
  7503. ;;
  7504. sub L = K, KK
  7505. ;;
  7506. #ifdef RT
  7507. add AORIG = r2, AORIG
  7508. #else
  7509. nop __LINE__
  7510. #endif
  7511. ;;
  7512. #if defined(LT) || defined(RN)
  7513. shladd L = L, BASE_SHIFT, r0
  7514. #else
  7515. nop __LINE__
  7516. #endif
  7517. ;;
  7518. #if defined(LT) || defined(RN)
  7519. add AOFFSET = L, AOFFSET
  7520. #else
  7521. nop __LINE__
  7522. #endif
  7523. ;;
  7524. #if defined(LT) || defined(RN)
  7525. shladd BOFFSET = L, 2, BOFFSET
  7526. #else
  7527. nop __LINE__
  7528. #endif
  7529. ;;
  7530. #ifdef LT
  7531. adds KK = 1, KK
  7532. #elif defined LN
  7533. adds KK = -1, KK
  7534. #else
  7535. nop __LINE__
  7536. #endif
  7537. ;;
  7538. #if defined(LT) || defined(RN)
  7539. mov L = KK
  7540. #else
  7541. sub L = K, KK
  7542. #endif
  7543. ;;
  7544. .align 8
  7545. .L089:
  7546. #ifdef LN
  7547. shladd KK8 = K, BASE_SHIFT, r0
  7548. ;;
  7549. shladd B = KK8, 2, B
  7550. #endif
  7551. #if defined(LT) || defined(RN)
  7552. mov B = BOFFSET
  7553. #endif
  7554. #ifdef RN
  7555. adds KK = 4, KK
  7556. #endif
  7557. #ifdef RT
  7558. adds KK = -4, KK
  7559. #endif
  7560. ;;
  7561. mov AOFFSET = A
  7562. ;;
  7563. .align 16
  7564. .L000:
  7565. shr J = N, 3
  7566. ;;
  7567. cmp.ge p6, p0 = 0, J
  7568. (p6) br.cond.dpnt .L999
  7569. ;;
  7570. .align 8
  7571. .L010:
  7572. #ifdef RT
  7573. { .mmi
  7574. shladd r3 = LDC, 3, r0
  7575. nop __LINE__
  7576. shl r2 = K, 3 + BASE_SHIFT
  7577. }
  7578. ;;
  7579. { .mmi
  7580. sub B = B, r2
  7581. sub C = C, r3
  7582. nop __LINE__
  7583. }
  7584. #endif
  7585. ;;
  7586. { .mfi
  7587. adds J = -1, J
  7588. mov f64 = f0
  7589. shr I = M, 3
  7590. }
  7591. { .mfi
  7592. mov C1 = C // coffset1 = c + 0 * ldc
  7593. mov f72 = f0
  7594. #ifdef LN
  7595. add KK = M, OFFSET
  7596. #elif defined LT
  7597. mov KK = OFFSET
  7598. #else
  7599. nop __LINE__
  7600. #endif
  7601. }
  7602. ;;
  7603. { .mmf
  7604. cmp.eq p6, p7 = 0, I
  7605. #if defined(LN) || defined(RT)
  7606. mov AORIG = A
  7607. #else
  7608. mov AOFFSET = A
  7609. #endif
  7610. mov f80 = f0
  7611. }
  7612. { .mmf
  7613. add C2 = LDC, C // coffset2 = c + 1 * ldc
  7614. shladd C3 = LDC, 1, C // coffset3 = c + 2 * ldc
  7615. mov f88 = f0
  7616. }
  7617. ;;
  7618. { .mmf
  7619. shladd C5 = LDC, 2, C // coffset5 = c + 4 * ldc
  7620. #ifndef RT
  7621. shladd C = LDC, 3, C // coffset += 8 * ldc
  7622. #else
  7623. nop __LINE__
  7624. #endif
  7625. mov f96 = f0
  7626. }
  7627. { .mmf
  7628. shladd C4 = LDC, 1, C2
  7629. shladd C6 = LDC, 2, C2
  7630. mov f104 = f0
  7631. }
  7632. ;;
  7633. { .mfi
  7634. shladd C7 = LDC, 2, C3
  7635. mov f112 = f0
  7636. #if defined(LT) || defined(RN)
  7637. mov L = KK
  7638. #else
  7639. sub L = K, KK
  7640. #endif
  7641. }{ .mfb
  7642. shladd C8 = LDC, 2, C4
  7643. mov f120 = f0
  7644. (p6) br.cond.dpnt .L020
  7645. }
  7646. ;;
  7647. .align 16
  7648. .L011:
  7649. { .mmi
  7650. cmp.ne p7, p0 = r0, L
  7651. adds BOFFSET = 0 * SIZE, B
  7652. shl r2 = K, 3 + BASE_SHIFT
  7653. }
  7654. { .mmi
  7655. shladd r3 = KK, BASE_SHIFT, r0
  7656. nop __LINE__
  7657. nop __LINE__
  7658. }
  7659. ;;
  7660. #if defined(LT) || defined(RN)
  7661. { .mfb
  7662. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  7663. mov f65 = f0
  7664. nop __LINE__
  7665. }
  7666. { .mmf
  7667. nop __LINE__
  7668. nop __LINE__
  7669. mov f73 = f0
  7670. }
  7671. ;;
  7672. #else
  7673. { .mfi
  7674. shladd BOFFSET = r3, 3, B
  7675. mov f65 = f0
  7676. #ifdef LN
  7677. sub AORIG = AORIG, r2
  7678. #else
  7679. nop __LINE__
  7680. #endif
  7681. }
  7682. ;;
  7683. { .mfi
  7684. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  7685. mov f73 = f0
  7686. shladd AOFFSET = r3, 3, AORIG
  7687. }
  7688. ;;
  7689. #endif
  7690. { .mfb
  7691. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  7692. mov f81 = f0
  7693. nop __LINE__
  7694. }
  7695. { .mmf
  7696. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  7697. setf.d f119 = r0
  7698. mov f89 = f0
  7699. }
  7700. ;;
  7701. { .mmf
  7702. (p7) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  7703. setf.d f97 = r0
  7704. mov f105 = f0
  7705. }
  7706. { .mfb
  7707. setf.d f113 = r0
  7708. mov f121 = f0
  7709. nop __LINE__
  7710. }
  7711. ;;
  7712. { .mmf
  7713. (p7) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  7714. setf.d f66 = r0
  7715. mov f74 = f0
  7716. }
  7717. { .mfb
  7718. setf.d f82 = r0
  7719. mov f90 = f0
  7720. nop __LINE__
  7721. }
  7722. ;;
  7723. { .mmf
  7724. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  7725. setf.d f98 = r0
  7726. mov f106 = f0
  7727. }
  7728. { .mfb
  7729. setf.d f114 = r0
  7730. mov f122 = f0
  7731. nop __LINE__
  7732. }
  7733. ;;
  7734. { .mmf
  7735. (p7) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  7736. setf.d f67 = r0
  7737. mov f75 = f0
  7738. }
  7739. { .mfi
  7740. setf.d f83 = r0
  7741. mov f91 = f0
  7742. cmp.eq p3, p0 = r0, r0
  7743. }
  7744. ;;
  7745. { .mmf
  7746. (p7) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  7747. setf.d f99 = r0
  7748. mov f107 = f0
  7749. }
  7750. { .mfi
  7751. setf.d f115 = r0
  7752. mov f123 = f0
  7753. adds PREC = CPREFETCHSIZE * SIZE, C1
  7754. }
  7755. ;;
  7756. { .mmf
  7757. CPREFETCH [PREC], LDC
  7758. setf.d f68 = r0
  7759. mov f76 = f0
  7760. }
  7761. { .mfi
  7762. setf.d f84 = r0
  7763. mov f92 = f0
  7764. adds L = 1, L
  7765. }
  7766. ;;
  7767. { .mmf
  7768. CPREFETCH [PREC], LDC
  7769. setf.d f100 = r0
  7770. mov f108 = f0
  7771. }
  7772. { .mfi
  7773. setf.d f116 = r0
  7774. mov f124 = f0
  7775. adds PREA = (PREFETCHSIZE + 8) * SIZE, AOFFSET
  7776. }
  7777. ;;
  7778. { .mmf
  7779. CPREFETCH [PREC], LDC
  7780. setf.d f69 = r0
  7781. mov f77 = f0
  7782. }
  7783. { .mfi
  7784. setf.d f85 = r0
  7785. mov f93 = f0
  7786. adds PREB = (PREFETCHSIZE - 8) * SIZE, BOFFSET
  7787. }
  7788. ;;
  7789. { .mmf
  7790. CPREFETCH [PREC], LDC
  7791. setf.d f101 = r0
  7792. mov f109 = f0
  7793. }
  7794. { .mfi
  7795. setf.d f117 = r0
  7796. mov f125 = f0
  7797. tbit.z p12, p0 = L, 0
  7798. }
  7799. ;;
  7800. { .mmf
  7801. CPREFETCH [PREC], LDC
  7802. setf.d f70 = r0
  7803. mov f78 = f0
  7804. }
  7805. { .mfi
  7806. setf.d f86 = r0
  7807. mov f94 = f0
  7808. shr L = L, 1
  7809. }
  7810. ;;
  7811. { .mmf
  7812. CPREFETCH [PREC], LDC
  7813. setf.d f102 = r0
  7814. mov f110 = f0
  7815. }
  7816. { .mfi
  7817. setf.d f118 = r0
  7818. mov f126 = f0
  7819. adds L = -1, L
  7820. }
  7821. ;;
  7822. { .mmf
  7823. CPREFETCH [PREC], LDC
  7824. setf.d f71 = r0
  7825. mov f79 = f0
  7826. }
  7827. { .mfi
  7828. setf.d f87 = r0
  7829. mov f95 = f0
  7830. mov ar.lc = L
  7831. }
  7832. ;;
  7833. { .mmf
  7834. CPREFETCH [PREC]
  7835. setf.d f103 = r0
  7836. mov f111 = f0
  7837. }
  7838. { .mfb
  7839. cmp.eq p6, p0 = -1, L
  7840. mov f127 = f0
  7841. (p6) br.cond.dpnt .L018
  7842. }
  7843. ;;
  7844. .align 16
  7845. .L012:
  7846. /* 1 */
  7847. { .mfi
  7848. lfetch.fault.nt1 [PREA], 16 * SIZE
  7849. FMA f64 = f32, f48, f64 // A1 * B1
  7850. nop __LINE__
  7851. }
  7852. { .mfi
  7853. (p12) cmp.ne p3, p0 = 0, L
  7854. FMA f72 = f32, f49, f72 // A1 * B2
  7855. nop __LINE__
  7856. }
  7857. ;;
  7858. /* 2 */
  7859. { .mfb
  7860. lfetch.nt1 [PREB], 16 * SIZE
  7861. FMA f80 = f32, f50, f80 // A1 * B3
  7862. nop __LINE__
  7863. }
  7864. { .mfb
  7865. cmp.ne p4, p5 = 0, L
  7866. FMA f88 = f32, f51, f88 // A1 * B4
  7867. nop __LINE__
  7868. }
  7869. ;;
  7870. /* 3 */
  7871. { .mfb
  7872. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  7873. FMA f96 = f32, f52, f96 // A1 * B5
  7874. nop __LINE__
  7875. }
  7876. { .mfb
  7877. adds C9 = 4 * SIZE, C1
  7878. FMA f104 = f32, f53, f104 // A1 * B6
  7879. nop __LINE__
  7880. }
  7881. ;;
  7882. /* 4 */
  7883. { .mfb
  7884. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  7885. FMA f112 = f32, f54, f112 // A1 * B7
  7886. nop __LINE__
  7887. }
  7888. { .mfb
  7889. adds C10 = 4 * SIZE, C2
  7890. FMA f120 = f32, f55, f120 // A1 * B8
  7891. nop __LINE__
  7892. }
  7893. ;;
  7894. /* 5 */
  7895. { .mfb
  7896. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  7897. FMA f65 = f33, f48, f65 // A2 * B1
  7898. nop __LINE__
  7899. }
  7900. { .mfb
  7901. adds C11 = 4 * SIZE, C3
  7902. FMA f73 = f33, f49, f73 // A2 * B2
  7903. nop __LINE__
  7904. }
  7905. ;;
  7906. /* 6 */
  7907. { .mfb
  7908. (p3) LDFPD f60, f61 = [BOFFSET], 2 * SIZE
  7909. FMA f81 = f33, f50, f81 // A2 * B3
  7910. nop __LINE__
  7911. }
  7912. { .mfb
  7913. adds C12 = 4 * SIZE, C4
  7914. FMA f89 = f33, f51, f89 // A2 * B4
  7915. nop __LINE__
  7916. }
  7917. ;;
  7918. /* 7 */
  7919. { .mfb
  7920. (p3) LDFPD f62, f63 = [BOFFSET], 2 * SIZE
  7921. FMA f97 = f33, f52, f97 // A2 * B5
  7922. nop __LINE__
  7923. }
  7924. { .mfb
  7925. adds C13 = 4 * SIZE, C5
  7926. FMA f105 = f33, f53, f105 // A2 * B6
  7927. nop __LINE__
  7928. }
  7929. ;;
  7930. /* 8 */
  7931. { .mfb
  7932. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  7933. FMA f113 = f33, f54, f113 // A2 * B7
  7934. nop __LINE__
  7935. }
  7936. { .mfb
  7937. adds C14 = 4 * SIZE, C6
  7938. FMA f121 = f33, f55, f121 // A2 * B8
  7939. nop __LINE__
  7940. }
  7941. ;;
  7942. /* 9 */
  7943. { .mfb
  7944. (p3) LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  7945. FMA f66 = f34, f48, f66 // A3 * B1
  7946. nop __LINE__
  7947. }
  7948. { .mfb
  7949. adds C15 = 4 * SIZE, C7
  7950. FMA f74 = f34, f49, f74 // A3 * B2
  7951. nop __LINE__
  7952. }
  7953. ;;
  7954. /* 10 */
  7955. { .mfb
  7956. (p3) LDFPD f46, f47 = [AOFFSET], 2 * SIZE
  7957. FMA f82 = f34, f50, f82 // A3 * B3
  7958. nop __LINE__
  7959. }
  7960. { .mfb
  7961. adds C16 = 4 * SIZE, C8
  7962. FMA f90 = f34, f51, f90 // A3 * B4
  7963. nop __LINE__
  7964. }
  7965. ;;
  7966. /* 11 */
  7967. { .mfb
  7968. FMA f98 = f34, f52, f98 // A3 * B5
  7969. nop __LINE__
  7970. }
  7971. { .mfb
  7972. nop __LINE__
  7973. FMA f106 = f34, f53, f106 // A3 * B6
  7974. nop __LINE__
  7975. }
  7976. ;;
  7977. /* 12 */
  7978. { .mfb
  7979. FMA f114 = f34, f54, f114 // A3 * B7
  7980. nop __LINE__
  7981. }
  7982. { .mfb
  7983. nop __LINE__
  7984. FMA f122 = f34, f55, f122 // A3 * B8
  7985. nop __LINE__
  7986. }
  7987. ;;
  7988. /* 13 */
  7989. { .mfb
  7990. nop __LINE__
  7991. FMA f67 = f35, f48, f67 // A4 * B1
  7992. }
  7993. { .mfb
  7994. nop __LINE__
  7995. FMA f75 = f35, f49, f75 // A4 * B2
  7996. nop __LINE__
  7997. }
  7998. ;;
  7999. /* 14 */
  8000. { .mfb
  8001. FMA f83 = f35, f50, f83 // A4 * B3
  8002. nop __LINE__
  8003. }
  8004. { .mfb
  8005. nop __LINE__
  8006. FMA f91 = f35, f51, f91 // A4 * B4
  8007. nop __LINE__
  8008. }
  8009. ;;
  8010. /* 15 */
  8011. { .mfb
  8012. FMA f99 = f35, f52, f99 // A4 * B5
  8013. nop __LINE__
  8014. }
  8015. { .mfb
  8016. nop __LINE__
  8017. FMA f107 = f35, f53, f107 // A4 * B6
  8018. nop __LINE__
  8019. }
  8020. ;;
  8021. /* 16 */
  8022. { .mfb
  8023. FMA f115 = f35, f54, f115 // A4 * B7
  8024. nop __LINE__
  8025. }
  8026. { .mfb
  8027. nop __LINE__
  8028. FMA f123 = f35, f55, f123 // A4 * B8
  8029. nop __LINE__
  8030. }
  8031. ;;
  8032. /* 17 */
  8033. { .mfb
  8034. nop __LINE__
  8035. FMA f68 = f36, f48, f68 // A5 * B1
  8036. nop __LINE__
  8037. }
  8038. { .mfb
  8039. nop __LINE__
  8040. FMA f76 = f36, f49, f76 // A5 * B2
  8041. nop __LINE__
  8042. }
  8043. ;;
  8044. /* 18 */
  8045. { .mfb
  8046. nop __LINE__
  8047. FMA f84 = f36, f50, f84 // A5 * B3
  8048. nop __LINE__
  8049. }
  8050. { .mfb
  8051. nop __LINE__
  8052. FMA f92 = f36, f51, f92 // A5 * B4
  8053. nop __LINE__
  8054. }
  8055. ;;
  8056. /* 19 */
  8057. { .mfb
  8058. nop __LINE__
  8059. FMA f100 = f36, f52, f100 // A5 * B5
  8060. nop __LINE__
  8061. }
  8062. { .mfb
  8063. nop __LINE__
  8064. FMA f108 = f36, f53, f108 // A5 * B6
  8065. nop __LINE__
  8066. }
  8067. ;;
  8068. /* 20 */
  8069. { .mfb
  8070. nop __LINE__
  8071. FMA f116 = f36, f54, f116 // A5 * B7
  8072. nop __LINE__
  8073. }
  8074. { .mfb
  8075. nop __LINE__
  8076. FMA f124 = f36, f55, f124 // A5 * B8
  8077. nop __LINE__
  8078. }
  8079. ;;
  8080. /* 21 */
  8081. { .mfb
  8082. nop __LINE__
  8083. FMA f69 = f37, f48, f69 // A6 * B1
  8084. nop __LINE__
  8085. }
  8086. { .mfb
  8087. nop __LINE__
  8088. FMA f77 = f37, f49, f77 // A6 * B2
  8089. nop __LINE__
  8090. }
  8091. ;;
  8092. /* 22 */
  8093. { .mfb
  8094. nop __LINE__
  8095. FMA f85 = f37, f50, f85 // A6 * B3
  8096. nop __LINE__
  8097. }
  8098. { .mfb
  8099. nop __LINE__
  8100. FMA f93 = f37, f51, f93 // A6 * B4
  8101. nop __LINE__
  8102. }
  8103. ;;
  8104. /* 23 */
  8105. { .mfb
  8106. nop __LINE__
  8107. FMA f101 = f37, f52, f101 // A6 * B5
  8108. nop __LINE__
  8109. }
  8110. { .mfb
  8111. nop __LINE__
  8112. FMA f109 = f37, f53, f109 // A6 * B6
  8113. nop __LINE__
  8114. }
  8115. ;;
  8116. /* 24 */
  8117. { .mfb
  8118. nop __LINE__
  8119. FMA f117 = f37, f54, f117 // A6 * B7
  8120. nop __LINE__
  8121. }
  8122. { .mfb
  8123. nop __LINE__
  8124. FMA f125 = f37, f55, f125 // A6 * B8
  8125. nop __LINE__
  8126. }
  8127. ;;
  8128. /* 25 */
  8129. { .mfb
  8130. nop __LINE__
  8131. FMA f70 = f38, f48, f70 // A7 * B1
  8132. nop __LINE__
  8133. }
  8134. { .mfb
  8135. nop __LINE__
  8136. FMA f78 = f38, f49, f78 // A7 * B2
  8137. nop __LINE__
  8138. }
  8139. ;;
  8140. /* 26 */
  8141. { .mfb
  8142. nop __LINE__
  8143. FMA f86 = f38, f50, f86 // A7 * B3
  8144. nop __LINE__
  8145. }
  8146. { .mfb
  8147. nop __LINE__
  8148. FMA f94 = f38, f51, f94 // A7 * B4
  8149. nop __LINE__
  8150. }
  8151. ;;
  8152. /* 27 */
  8153. { .mfb
  8154. nop __LINE__
  8155. FMA f102 = f38, f52, f102 // A7 * B5
  8156. nop __LINE__
  8157. }
  8158. { .mfb
  8159. nop __LINE__
  8160. FMA f110 = f38, f53, f110 // A7 * B6
  8161. nop __LINE__
  8162. }
  8163. ;;
  8164. /* 28 */
  8165. { .mfb
  8166. nop __LINE__
  8167. FMA f118 = f38, f54, f118 // A7 * B7
  8168. nop __LINE__
  8169. }
  8170. { .mfb
  8171. nop __LINE__
  8172. FMA f126 = f38, f55, f126 // A7 * B8
  8173. nop __LINE__
  8174. }
  8175. ;;
  8176. /* 29 */
  8177. { .mfb
  8178. nop __LINE__
  8179. FMA f71 = f39, f48, f71 // A8 * B1
  8180. nop __LINE__
  8181. }
  8182. { .mfb
  8183. nop __LINE__
  8184. FMA f79 = f39, f49, f79 // A8 * B2
  8185. nop __LINE__
  8186. }
  8187. ;;
  8188. /* 30 */
  8189. { .mfb
  8190. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  8191. FMA f87 = f39, f50, f87 // A8 * B3
  8192. nop __LINE__
  8193. }
  8194. { .mfb
  8195. nop __LINE__
  8196. FMA f95 = f39, f51, f95 // A8 * B4
  8197. nop __LINE__
  8198. }
  8199. ;;
  8200. /* 31 */
  8201. { .mfb
  8202. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  8203. FMA f103 = f39, f52, f103 // A8 * B5
  8204. nop __LINE__
  8205. }
  8206. { .mfb
  8207. nop __LINE__
  8208. FMA f111 = f39, f53, f111 // A8 * B6
  8209. nop __LINE__
  8210. }
  8211. ;;
  8212. /* 32 */
  8213. { .mfb
  8214. nop __LINE__
  8215. FMA f119 = f39, f54, f119 // A8 * B7
  8216. nop __LINE__
  8217. }
  8218. { .mfb
  8219. nop __LINE__
  8220. FMA f127 = f39, f55, f127 // A8 * B8
  8221. nop __LINE__
  8222. }
  8223. ;;
  8224. /* 33 */
  8225. { .mfb
  8226. nop __LINE__
  8227. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  8228. nop __LINE__
  8229. }
  8230. { .mfb
  8231. nop __LINE__
  8232. (p3) FMA f72 = f40, f57, f72 // A1 * B2
  8233. nop __LINE__
  8234. }
  8235. ;;
  8236. /* 34 */
  8237. { .mfb
  8238. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  8239. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  8240. nop __LINE__
  8241. }
  8242. { .mfb
  8243. nop __LINE__
  8244. (p3) FMA f88 = f40, f59, f88 // A1 * B4
  8245. nop __LINE__
  8246. }
  8247. ;;
  8248. /* 35 */
  8249. { .mfb
  8250. (p4) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  8251. (p3) FMA f96 = f40, f60, f96 // A1 * B5
  8252. nop __LINE__
  8253. }
  8254. { .mfb
  8255. nop __LINE__
  8256. (p3) FMA f104 = f40, f61, f104 // A1 * B6
  8257. nop __LINE__
  8258. }
  8259. ;;
  8260. /* 36 */
  8261. { .mfb
  8262. (p4) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  8263. (p3) FMA f112 = f40, f62, f112 // A1 * B7
  8264. nop __LINE__
  8265. }
  8266. { .mfb
  8267. nop __LINE__
  8268. (p3) FMA f120 = f40, f63, f120 // A1 * B8
  8269. nop __LINE__
  8270. }
  8271. ;;
  8272. /* 37 */
  8273. { .mfb
  8274. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  8275. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  8276. nop __LINE__
  8277. }
  8278. { .mfb
  8279. nop __LINE__
  8280. (p3) FMA f73 = f41, f57, f73 // A2 * B2
  8281. nop __LINE__
  8282. }
  8283. ;;
  8284. /* 38 */
  8285. { .mfb
  8286. (p4) LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  8287. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  8288. nop __LINE__
  8289. }
  8290. { .mfb
  8291. nop __LINE__
  8292. (p3) FMA f89 = f41, f59, f89 // A2 * B4
  8293. nop __LINE__
  8294. }
  8295. ;;
  8296. /* 39 */
  8297. { .mfb
  8298. (p4) LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  8299. (p3) FMA f97 = f41, f60, f97 // A2 * B5
  8300. nop __LINE__
  8301. }
  8302. { .mfb
  8303. nop __LINE__
  8304. (p3) FMA f105 = f41, f61, f105 // A2 * B6
  8305. nop __LINE__
  8306. }
  8307. ;;
  8308. /* 40 */
  8309. { .mfb
  8310. nop __LINE__
  8311. (p3) FMA f113 = f41, f62, f113 // A2 * B7
  8312. nop __LINE__
  8313. }
  8314. { .mfb
  8315. nop __LINE__
  8316. (p3) FMA f121 = f41, f63, f121 // A2 * B8
  8317. nop __LINE__
  8318. }
  8319. ;;
  8320. /* 41 */
  8321. { .mfb
  8322. nop __LINE__
  8323. (p3) FMA f66 = f42, f56, f66 // A3 * B1
  8324. nop __LINE__
  8325. }
  8326. { .mfb
  8327. nop __LINE__
  8328. (p3) FMA f74 = f42, f57, f74 // A3 * B2
  8329. nop __LINE__
  8330. }
  8331. ;;
  8332. /* 42 */
  8333. { .mfb
  8334. nop __LINE__
  8335. (p3) FMA f82 = f42, f58, f82 // A3 * B3
  8336. nop __LINE__
  8337. }
  8338. { .mfb
  8339. nop __LINE__
  8340. (p3) FMA f90 = f42, f59, f90 // A3 * B4
  8341. nop __LINE__
  8342. }
  8343. ;;
  8344. /* 43 */
  8345. { .mfb
  8346. nop __LINE__
  8347. (p3) FMA f98 = f42, f60, f98 // A3 * B5
  8348. nop __LINE__
  8349. }
  8350. { .mfb
  8351. nop __LINE__
  8352. (p3) FMA f106 = f42, f61, f106 // A3 * B6
  8353. nop __LINE__
  8354. }
  8355. ;;
  8356. /* 44 */
  8357. { .mfb
  8358. nop __LINE__
  8359. (p3) FMA f114 = f42, f62, f114 // A3 * B7
  8360. nop __LINE__
  8361. }
  8362. { .mfb
  8363. nop __LINE__
  8364. (p3) FMA f122 = f42, f63, f122 // A3 * B8
  8365. nop __LINE__
  8366. }
  8367. ;;
  8368. /* 45 */
  8369. { .mfb
  8370. nop __LINE__
  8371. (p3) FMA f67 = f43, f56, f67 // A4 * B1
  8372. nop __LINE__
  8373. }
  8374. { .mfb
  8375. nop __LINE__
  8376. (p3) FMA f75 = f43, f57, f75 // A4 * B2
  8377. nop __LINE__
  8378. }
  8379. ;;
  8380. /* 46 */
  8381. { .mfb
  8382. nop __LINE__
  8383. (p3) FMA f83 = f43, f58, f83 // A4 * B3
  8384. nop __LINE__
  8385. }
  8386. { .mfb
  8387. nop __LINE__
  8388. (p3) FMA f91 = f43, f59, f91 // A4 * B4
  8389. nop __LINE__
  8390. }
  8391. ;;
  8392. /* 47 */
  8393. { .mfb
  8394. nop __LINE__
  8395. (p3) FMA f99 = f43, f60, f99 // A4 * B5
  8396. nop __LINE__
  8397. }
  8398. { .mfb
  8399. nop __LINE__
  8400. (p3) FMA f107 = f43, f61, f107 // A4 * B6
  8401. nop __LINE__
  8402. }
  8403. ;;
  8404. /* 48 */
  8405. { .mfb
  8406. nop __LINE__
  8407. (p3) FMA f115 = f43, f62, f115 // A4 * B7
  8408. nop __LINE__
  8409. }
  8410. { .mfb
  8411. nop __LINE__
  8412. (p3) FMA f123 = f43, f63, f123 // A4 * B8
  8413. nop __LINE__
  8414. }
  8415. ;;
  8416. /* 49 */
  8417. { .mfb
  8418. nop __LINE__
  8419. (p3) FMA f68 = f44, f56, f68 // A5 * B1
  8420. nop __LINE__
  8421. }
  8422. { .mfb
  8423. nop __LINE__
  8424. (p3) FMA f76 = f44, f57, f76 // A5 * B2
  8425. nop __LINE__
  8426. }
  8427. ;;
  8428. /* 50 */
  8429. { .mfb
  8430. nop __LINE__
  8431. (p3) FMA f84 = f44, f58, f84 // A5 * B3
  8432. nop __LINE__
  8433. }
  8434. { .mfb
  8435. nop __LINE__
  8436. (p3) FMA f92 = f44, f59, f92 // A5 * B4
  8437. nop __LINE__
  8438. }
  8439. ;;
  8440. /* 51 */
  8441. { .mfb
  8442. nop __LINE__
  8443. (p3) FMA f100 = f44, f60, f100 // A5 * B5
  8444. nop __LINE__
  8445. }
  8446. { .mfb
  8447. nop __LINE__
  8448. (p3) FMA f108 = f44, f61, f108 // A5 * B6
  8449. nop __LINE__
  8450. }
  8451. ;;
  8452. /* 52 */
  8453. { .mfb
  8454. nop __LINE__
  8455. (p3) FMA f116 = f44, f62, f116 // A5 * B7
  8456. nop __LINE__
  8457. }
  8458. { .mfb
  8459. nop __LINE__
  8460. (p3) FMA f124 = f44, f63, f124 // A5 * B8
  8461. nop __LINE__
  8462. }
  8463. ;;
  8464. /* 53 */
  8465. { .mfb
  8466. nop __LINE__
  8467. (p3) FMA f69 = f45, f56, f69 // A6 * B1
  8468. nop __LINE__
  8469. }
  8470. { .mfb
  8471. nop __LINE__
  8472. (p3) FMA f77 = f45, f57, f77 // A6 * B2
  8473. nop __LINE__
  8474. }
  8475. ;;
  8476. /* 54 */
  8477. { .mfb
  8478. nop __LINE__
  8479. (p3) FMA f85 = f45, f58, f85 // A6 * B3
  8480. nop __LINE__
  8481. }
  8482. { .mfb
  8483. nop __LINE__
  8484. (p3) FMA f93 = f45, f59, f93 // A6 * B4
  8485. nop __LINE__
  8486. }
  8487. ;;
  8488. /* 55 */
  8489. { .mfb
  8490. nop __LINE__
  8491. (p3) FMA f101 = f45, f60, f101 // A6 * B5
  8492. nop __LINE__
  8493. }
  8494. { .mfb
  8495. nop __LINE__
  8496. (p3) FMA f109 = f45, f61, f109 // A6 * B6
  8497. nop __LINE__
  8498. }
  8499. ;;
  8500. /* 56 */
  8501. { .mfb
  8502. nop __LINE__
  8503. (p3) FMA f117 = f45, f62, f117 // A6 * B7
  8504. nop __LINE__
  8505. }
  8506. { .mfb
  8507. nop __LINE__
  8508. (p3) FMA f125 = f45, f63, f125 // A6 * B8
  8509. nop __LINE__
  8510. }
  8511. ;;
  8512. /* 57 */
  8513. { .mfb
  8514. nop __LINE__
  8515. (p3) FMA f70 = f46, f56, f70 // A7 * B1
  8516. nop __LINE__
  8517. }
  8518. { .mfb
  8519. nop __LINE__
  8520. (p3) FMA f78 = f46, f57, f78 // A7 * B2
  8521. nop __LINE__
  8522. }
  8523. ;;
  8524. /* 58 */
  8525. { .mfb
  8526. nop __LINE__
  8527. (p3) FMA f86 = f46, f58, f86 // A7 * B3
  8528. nop __LINE__
  8529. }
  8530. { .mfb
  8531. nop __LINE__
  8532. (p3) FMA f94 = f46, f59, f94 // A7 * B4
  8533. nop __LINE__
  8534. }
  8535. ;;
  8536. /* 59 */
  8537. { .mfb
  8538. nop __LINE__
  8539. (p3) FMA f102 = f46, f60, f102 // A7 * B5
  8540. nop __LINE__
  8541. }
  8542. { .mfb
  8543. nop __LINE__
  8544. (p3) FMA f110 = f46, f61, f110 // A7 * B6
  8545. nop __LINE__
  8546. }
  8547. ;;
  8548. /* 60 */
  8549. { .mfb
  8550. nop __LINE__
  8551. (p3) FMA f118 = f46, f62, f118 // A7 * B7
  8552. nop __LINE__
  8553. }
  8554. { .mfb
  8555. nop __LINE__
  8556. (p3) FMA f126 = f46, f63, f126 // A7 * B8
  8557. nop __LINE__
  8558. }
  8559. ;;
  8560. /* 61 */
  8561. { .mfb
  8562. nop __LINE__
  8563. (p3) FMA f71 = f47, f56, f71 // A8 * B1
  8564. nop __LINE__
  8565. }
  8566. { .mfb
  8567. nop __LINE__
  8568. (p3) FMA f79 = f47, f57, f79 // A8 * B2
  8569. nop __LINE__
  8570. }
  8571. ;;
  8572. /* 62 */
  8573. { .mfb
  8574. nop __LINE__
  8575. (p3) FMA f87 = f47, f58, f87 // A8 * B3
  8576. nop __LINE__
  8577. }
  8578. { .mfb
  8579. nop __LINE__
  8580. (p3) FMA f95 = f47, f59, f95 // A8 * B4
  8581. nop __LINE__
  8582. }
  8583. ;;
  8584. /* 63 */
  8585. { .mfb
  8586. nop __LINE__
  8587. (p3) FMA f103 = f47, f60, f103 // A8 * B5
  8588. nop __LINE__
  8589. }
  8590. { .mfb
  8591. nop __LINE__
  8592. (p3) FMA f111 = f47, f61, f111 // A8 * B6
  8593. nop __LINE__
  8594. }
  8595. ;;
  8596. /* 64 */
  8597. { .mfi
  8598. nop __LINE__
  8599. (p3) FMA f119 = f47, f62, f119 // A8 * B7
  8600. adds L = -1, L
  8601. }
  8602. { .mfb
  8603. nop __LINE__
  8604. (p3) FMA f127 = f47, f63, f127 // A8 * B8
  8605. br.cloop.sptk.few .L012
  8606. }
  8607. ;;
  8608. .L018:
  8609. #if defined(LN) || defined(RT)
  8610. #ifdef LN
  8611. adds r2 = -8, KK
  8612. #else
  8613. adds r2 = -8, KK
  8614. #endif
  8615. ;;
  8616. shladd r2 = r2, BASE_SHIFT, r0
  8617. ;;
  8618. shladd AOFFSET = r2, 3, AORIG
  8619. shladd BOFFSET = r2, 3, B
  8620. ;;
  8621. #endif
  8622. #if defined(LN) || defined(LT)
  8623. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  8624. ;;
  8625. LDFPD f34, f35 = [BOFFSET], 2 * SIZE
  8626. ;;
  8627. LDFPD f36, f37 = [BOFFSET], 2 * SIZE
  8628. ;;
  8629. LDFPD f38, f39 = [BOFFSET], 2 * SIZE
  8630. ;;
  8631. LDFPD f40, f41 = [BOFFSET], 2 * SIZE
  8632. ;;
  8633. LDFPD f42, f43 = [BOFFSET], 2 * SIZE
  8634. ;;
  8635. LDFPD f44, f45 = [BOFFSET], 2 * SIZE
  8636. ;;
  8637. LDFPD f46, f47 = [BOFFSET], 2 * SIZE
  8638. ;;
  8639. { .mfi
  8640. LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  8641. FSUB f64 = f32, f64
  8642. nop __LINE__
  8643. }
  8644. { .mfi
  8645. nop __LINE__
  8646. FSUB f72 = f33, f72
  8647. nop __LINE__
  8648. }
  8649. ;;
  8650. { .mfi
  8651. LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  8652. FSUB f80 = f34, f80
  8653. nop __LINE__
  8654. }
  8655. { .mfi
  8656. nop __LINE__
  8657. FSUB f88 = f35, f88
  8658. nop __LINE__
  8659. }
  8660. ;;
  8661. { .mfi
  8662. LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  8663. FSUB f96 = f36, f96
  8664. nop __LINE__
  8665. }
  8666. { .mfi
  8667. nop __LINE__
  8668. FSUB f104 = f37, f104
  8669. nop __LINE__
  8670. }
  8671. ;;
  8672. { .mfi
  8673. LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  8674. FSUB f112 = f38, f112
  8675. nop __LINE__
  8676. }
  8677. { .mfi
  8678. nop __LINE__
  8679. FSUB f120 = f39, f120
  8680. nop __LINE__
  8681. }
  8682. ;;
  8683. { .mfi
  8684. LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  8685. FSUB f65 = f40, f65
  8686. nop __LINE__
  8687. }
  8688. { .mfi
  8689. nop __LINE__
  8690. FSUB f73 = f41, f73
  8691. nop __LINE__
  8692. }
  8693. ;;
  8694. { .mfi
  8695. LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  8696. FSUB f81 = f42, f81
  8697. nop __LINE__
  8698. }
  8699. { .mfi
  8700. nop __LINE__
  8701. FSUB f89 = f43, f89
  8702. nop __LINE__
  8703. }
  8704. ;;
  8705. { .mfi
  8706. LDFPD f60, f61 = [BOFFSET], 2 * SIZE
  8707. FSUB f97 = f44, f97
  8708. nop __LINE__
  8709. }
  8710. { .mfi
  8711. nop __LINE__
  8712. FSUB f105 = f45, f105
  8713. nop __LINE__
  8714. }
  8715. ;;
  8716. { .mfi
  8717. LDFPD f62, f63 = [BOFFSET], 2 * SIZE
  8718. FSUB f113 = f46, f113
  8719. nop __LINE__
  8720. }
  8721. { .mfi
  8722. nop __LINE__
  8723. FSUB f121 = f47, f121
  8724. nop __LINE__
  8725. }
  8726. ;;
  8727. { .mfi
  8728. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  8729. FSUB f66 = f48, f66
  8730. nop __LINE__
  8731. }
  8732. { .mfi
  8733. nop __LINE__
  8734. FSUB f74 = f49, f74
  8735. nop __LINE__
  8736. }
  8737. ;;
  8738. { .mfi
  8739. LDFPD f34, f35 = [BOFFSET], 2 * SIZE
  8740. FSUB f82 = f50, f82
  8741. nop __LINE__
  8742. }
  8743. { .mfi
  8744. nop __LINE__
  8745. FSUB f90 = f51, f90
  8746. nop __LINE__
  8747. }
  8748. ;;
  8749. { .mfi
  8750. LDFPD f36, f37 = [BOFFSET], 2 * SIZE
  8751. FSUB f98 = f52, f98
  8752. nop __LINE__
  8753. }
  8754. { .mfi
  8755. nop __LINE__
  8756. FSUB f106 = f53, f106
  8757. nop __LINE__
  8758. }
  8759. ;;
  8760. { .mfi
  8761. LDFPD f38, f39 = [BOFFSET], 2 * SIZE
  8762. FSUB f114 = f54, f114
  8763. nop __LINE__
  8764. }
  8765. { .mfi
  8766. nop __LINE__
  8767. FSUB f122 = f55, f122
  8768. nop __LINE__
  8769. }
  8770. ;;
  8771. { .mfi
  8772. LDFPD f40, f41 = [BOFFSET], 2 * SIZE
  8773. FSUB f67 = f56, f67
  8774. nop __LINE__
  8775. }
  8776. { .mfi
  8777. nop __LINE__
  8778. FSUB f75 = f57, f75
  8779. nop __LINE__
  8780. }
  8781. ;;
  8782. { .mfi
  8783. LDFPD f42, f43 = [BOFFSET], 2 * SIZE
  8784. FSUB f83 = f58, f83
  8785. nop __LINE__
  8786. }
  8787. { .mfi
  8788. nop __LINE__
  8789. FSUB f91 = f59, f91
  8790. nop __LINE__
  8791. }
  8792. ;;
  8793. { .mfi
  8794. LDFPD f44, f45 = [BOFFSET], 2 * SIZE
  8795. FSUB f99 = f60, f99
  8796. nop __LINE__
  8797. }
  8798. { .mfi
  8799. nop __LINE__
  8800. FSUB f107 = f61, f107
  8801. nop __LINE__
  8802. }
  8803. ;;
  8804. { .mfi
  8805. LDFPD f46, f47 = [BOFFSET], 2 * SIZE
  8806. FSUB f115 = f62, f115
  8807. nop __LINE__
  8808. }
  8809. { .mfi
  8810. nop __LINE__
  8811. FSUB f123 = f63, f123
  8812. nop __LINE__
  8813. }
  8814. ;;
  8815. { .mfi
  8816. LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  8817. FSUB f68 = f32, f68
  8818. nop __LINE__
  8819. }
  8820. { .mfi
  8821. nop __LINE__
  8822. FSUB f76 = f33, f76
  8823. nop __LINE__
  8824. }
  8825. ;;
  8826. { .mfi
  8827. LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  8828. FSUB f84 = f34, f84
  8829. nop __LINE__
  8830. }
  8831. { .mfi
  8832. nop __LINE__
  8833. FSUB f92 = f35, f92
  8834. nop __LINE__
  8835. }
  8836. ;;
  8837. { .mfi
  8838. LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  8839. FSUB f100 = f36, f100
  8840. nop __LINE__
  8841. }
  8842. { .mfi
  8843. nop __LINE__
  8844. FSUB f108 = f37, f108
  8845. nop __LINE__
  8846. }
  8847. ;;
  8848. { .mfi
  8849. LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  8850. FSUB f116 = f38, f116
  8851. nop __LINE__
  8852. }
  8853. { .mfi
  8854. nop __LINE__
  8855. FSUB f124 = f39, f124
  8856. nop __LINE__
  8857. }
  8858. ;;
  8859. { .mfi
  8860. LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  8861. FSUB f69 = f40, f69
  8862. nop __LINE__
  8863. }
  8864. { .mfi
  8865. nop __LINE__
  8866. FSUB f77 = f41, f77
  8867. nop __LINE__
  8868. }
  8869. ;;
  8870. { .mfi
  8871. LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  8872. FSUB f85 = f42, f85
  8873. nop __LINE__
  8874. }
  8875. { .mfi
  8876. nop __LINE__
  8877. FSUB f93 = f43, f93
  8878. nop __LINE__
  8879. }
  8880. ;;
  8881. { .mfi
  8882. LDFPD f60, f61 = [BOFFSET], 2 * SIZE
  8883. FSUB f101 = f44, f101
  8884. nop __LINE__
  8885. }
  8886. { .mfi
  8887. nop __LINE__
  8888. FSUB f109 = f45, f109
  8889. nop __LINE__
  8890. }
  8891. ;;
  8892. { .mfi
  8893. LDFPD f62, f63 = [BOFFSET]
  8894. FSUB f117 = f46, f117
  8895. adds BOFFSET = -62 * SIZE, BOFFSET
  8896. }
  8897. { .mfi
  8898. nop __LINE__
  8899. FSUB f125 = f47, f125
  8900. nop __LINE__
  8901. }
  8902. ;;
  8903. { .mfi
  8904. nop __LINE__
  8905. FSUB f70 = f48, f70
  8906. #ifdef LN
  8907. adds AOFFSET = 62 * SIZE, AOFFSET
  8908. #else
  8909. nop __LINE__
  8910. #endif
  8911. }
  8912. { .mfi
  8913. nop __LINE__
  8914. FSUB f78 = f49, f78
  8915. nop __LINE__
  8916. }
  8917. { .mfi
  8918. nop __LINE__
  8919. FSUB f86 = f50, f86
  8920. nop __LINE__
  8921. }
  8922. { .mfi
  8923. nop __LINE__
  8924. FSUB f94 = f51, f94
  8925. nop __LINE__
  8926. }
  8927. ;;
  8928. { .mfi
  8929. #ifdef LN
  8930. LDFPD f33, f32 = [AOFFSET]
  8931. #else
  8932. LDFPD f32, f33 = [AOFFSET]
  8933. #endif
  8934. FSUB f102 = f52, f102
  8935. nop __LINE__
  8936. }
  8937. { .mfi
  8938. nop __LINE__
  8939. FSUB f110 = f53, f110
  8940. nop __LINE__
  8941. }
  8942. { .mfi
  8943. nop __LINE__
  8944. FSUB f118 = f54, f118
  8945. nop __LINE__
  8946. }
  8947. { .mfi
  8948. nop __LINE__
  8949. FSUB f126 = f55, f126
  8950. #ifdef LN
  8951. adds AOFFSET = - 2 * SIZE, AOFFSET
  8952. #else
  8953. adds AOFFSET = 2 * SIZE, AOFFSET
  8954. #endif
  8955. }
  8956. ;;
  8957. { .mfi
  8958. nop __LINE__
  8959. FSUB f71 = f56, f71
  8960. nop __LINE__
  8961. }
  8962. { .mfi
  8963. nop __LINE__
  8964. FSUB f79 = f57, f79
  8965. nop __LINE__
  8966. }
  8967. { .mfi
  8968. nop __LINE__
  8969. FSUB f87 = f58, f87
  8970. nop __LINE__
  8971. }
  8972. { .mfi
  8973. nop __LINE__
  8974. FSUB f95 = f59, f95
  8975. nop __LINE__
  8976. }
  8977. { .mfi
  8978. nop __LINE__
  8979. FSUB f103 = f60, f103
  8980. nop __LINE__
  8981. }
  8982. { .mfi
  8983. nop __LINE__
  8984. FSUB f111 = f61, f111
  8985. nop __LINE__
  8986. }
  8987. { .mfi
  8988. nop __LINE__
  8989. FSUB f119 = f62, f119
  8990. nop __LINE__
  8991. }
  8992. { .mfi
  8993. nop __LINE__
  8994. FSUB f127 = f63, f127
  8995. nop __LINE__
  8996. }
  8997. ;;
  8998. #else
  8999. adds AOFFSET2 = 4 * SIZE, AOFFSET
  9000. ;;
  9001. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  9002. ;;
  9003. LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  9004. ;;
  9005. LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  9006. ;;
  9007. LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  9008. ;;
  9009. LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  9010. ;;
  9011. LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  9012. ;;
  9013. LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  9014. ;;
  9015. LDFPD f46, f47 = [AOFFSET], 2 * SIZE
  9016. ;;
  9017. { .mfi
  9018. LDFPD f48, f49 = [AOFFSET], 2 * SIZE
  9019. FSUB f64 = f32, f64
  9020. }
  9021. { .mfi
  9022. FSUB f65 = f33, f65
  9023. }
  9024. ;;
  9025. { .mfi
  9026. LDFPD f50, f51 = [AOFFSET], 2 * SIZE
  9027. FSUB f66 = f34, f66
  9028. }
  9029. { .mfi
  9030. FSUB f67 = f35, f67
  9031. }
  9032. ;;
  9033. { .mfi
  9034. LDFPD f52, f53 = [AOFFSET], 2 * SIZE
  9035. FSUB f68 = f36, f68
  9036. }
  9037. { .mfi
  9038. FSUB f69 = f37, f69
  9039. }
  9040. ;;
  9041. { .mfi
  9042. LDFPD f54, f55 = [AOFFSET], 2 * SIZE
  9043. FSUB f70 = f38, f70
  9044. }
  9045. { .mfi
  9046. FSUB f71 = f39, f71
  9047. }
  9048. ;;
  9049. { .mfi
  9050. LDFPD f56, f57 = [AOFFSET], 2 * SIZE
  9051. FSUB f72 = f40, f72
  9052. }
  9053. { .mfi
  9054. FSUB f73 = f41, f73
  9055. }
  9056. ;;
  9057. { .mfi
  9058. LDFPD f58, f59 = [AOFFSET], 2 * SIZE
  9059. FSUB f74 = f42, f74
  9060. }
  9061. { .mfi
  9062. FSUB f75 = f43, f75
  9063. }
  9064. ;;
  9065. { .mfi
  9066. LDFPD f60, f61 = [AOFFSET], 2 * SIZE
  9067. FSUB f76 = f44, f76
  9068. }
  9069. { .mfi
  9070. FSUB f77 = f45, f77
  9071. }
  9072. ;;
  9073. { .mfi
  9074. LDFPD f62, f63 = [AOFFSET], 2 * SIZE
  9075. FSUB f78 = f46, f78
  9076. }
  9077. { .mfi
  9078. FSUB f79 = f47, f79
  9079. }
  9080. ;;
  9081. { .mfi
  9082. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  9083. FSUB f80 = f48, f80
  9084. nop __LINE__
  9085. }
  9086. { .mfi
  9087. nop __LINE__
  9088. FSUB f81 = f49, f81
  9089. nop __LINE__
  9090. }
  9091. ;;
  9092. { .mfi
  9093. LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  9094. FSUB f82 = f50, f82
  9095. nop __LINE__
  9096. }
  9097. { .mfi
  9098. nop __LINE__
  9099. FSUB f83 = f51, f83
  9100. nop __LINE__
  9101. }
  9102. ;;
  9103. { .mfi
  9104. LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  9105. FSUB f84 = f52, f84
  9106. nop __LINE__
  9107. }
  9108. { .mfi
  9109. nop __LINE__
  9110. FSUB f85 = f53, f85
  9111. nop __LINE__
  9112. }
  9113. ;;
  9114. { .mfi
  9115. LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  9116. FSUB f86 = f54, f86
  9117. nop __LINE__
  9118. }
  9119. { .mfi
  9120. nop __LINE__
  9121. FSUB f87 = f55, f87
  9122. nop __LINE__
  9123. }
  9124. ;;
  9125. { .mfi
  9126. LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  9127. FSUB f88 = f56, f88
  9128. nop __LINE__
  9129. }
  9130. { .mfi
  9131. nop __LINE__
  9132. FSUB f89 = f57, f89
  9133. nop __LINE__
  9134. }
  9135. ;;
  9136. { .mfi
  9137. LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  9138. FSUB f90 = f58, f90
  9139. nop __LINE__
  9140. }
  9141. { .mfi
  9142. nop __LINE__
  9143. FSUB f91 = f59, f91
  9144. nop __LINE__
  9145. }
  9146. ;;
  9147. { .mfi
  9148. LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  9149. FSUB f92 = f60, f92
  9150. nop __LINE__
  9151. }
  9152. { .mfi
  9153. nop __LINE__
  9154. FSUB f93 = f61, f93
  9155. nop __LINE__
  9156. }
  9157. ;;
  9158. { .mfi
  9159. LDFPD f46, f47 = [AOFFSET], 2 * SIZE
  9160. FSUB f94 = f62, f94
  9161. nop __LINE__
  9162. }
  9163. { .mfi
  9164. nop __LINE__
  9165. FSUB f95 = f63, f95
  9166. nop __LINE__
  9167. }
  9168. ;;
  9169. { .mfi
  9170. LDFPD f48, f49 = [AOFFSET], 2 * SIZE
  9171. FSUB f96 = f32, f96
  9172. nop __LINE__
  9173. }
  9174. { .mfi
  9175. nop __LINE__
  9176. FSUB f97 = f33, f97
  9177. nop __LINE__
  9178. }
  9179. ;;
  9180. { .mfi
  9181. LDFPD f50, f51 = [AOFFSET], 2 * SIZE
  9182. FSUB f98 = f34, f98
  9183. nop __LINE__
  9184. }
  9185. { .mfi
  9186. nop __LINE__
  9187. FSUB f99 = f35, f99
  9188. nop __LINE__
  9189. }
  9190. ;;
  9191. { .mfi
  9192. LDFPD f52, f53 = [AOFFSET], 2 * SIZE
  9193. FSUB f100 = f36, f100
  9194. nop __LINE__
  9195. }
  9196. { .mfi
  9197. nop __LINE__
  9198. FSUB f101 = f37, f101
  9199. nop __LINE__
  9200. }
  9201. ;;
  9202. { .mfi
  9203. LDFPD f54, f55 = [AOFFSET], 2 * SIZE
  9204. FSUB f102 = f38, f102
  9205. nop __LINE__
  9206. }
  9207. { .mfi
  9208. nop __LINE__
  9209. FSUB f103 = f39, f103
  9210. nop __LINE__
  9211. }
  9212. ;;
  9213. { .mfi
  9214. LDFPD f56, f57 = [AOFFSET], 2 * SIZE
  9215. FSUB f104 = f40, f104
  9216. nop __LINE__
  9217. }
  9218. { .mfi
  9219. nop __LINE__
  9220. FSUB f105 = f41, f105
  9221. nop __LINE__
  9222. }
  9223. ;;
  9224. { .mfi
  9225. LDFPD f58, f59 = [AOFFSET], 2 * SIZE
  9226. FSUB f106 = f42, f106
  9227. nop __LINE__
  9228. }
  9229. { .mfi
  9230. nop __LINE__
  9231. FSUB f107 = f43, f107
  9232. nop __LINE__
  9233. }
  9234. ;;
  9235. { .mfi
  9236. LDFPD f60, f61 = [AOFFSET], 2 * SIZE
  9237. FSUB f108 = f44, f108
  9238. nop __LINE__
  9239. }
  9240. { .mfi
  9241. nop __LINE__
  9242. FSUB f109 = f45, f109
  9243. nop __LINE__
  9244. }
  9245. ;;
  9246. { .mfi
  9247. LDFPD f62, f63 = [AOFFSET]
  9248. FSUB f110 = f46, f110
  9249. adds AOFFSET = -62 * SIZE, AOFFSET
  9250. }
  9251. { .mfi
  9252. nop __LINE__
  9253. FSUB f111 = f47, f111
  9254. nop __LINE__
  9255. }
  9256. ;;
  9257. { .mfi
  9258. nop __LINE__
  9259. FSUB f112 = f48, f112
  9260. nop __LINE__
  9261. }
  9262. { .mfi
  9263. nop __LINE__
  9264. FSUB f113 = f49, f113
  9265. nop __LINE__
  9266. }
  9267. { .mfi
  9268. nop __LINE__
  9269. FSUB f114 = f50, f114
  9270. nop __LINE__
  9271. }
  9272. { .mfi
  9273. nop __LINE__
  9274. FSUB f115 = f51, f115
  9275. nop __LINE__
  9276. }
  9277. { .mfi
  9278. nop __LINE__
  9279. FSUB f116 = f52, f116
  9280. nop __LINE__
  9281. }
  9282. { .mfi
  9283. nop __LINE__
  9284. FSUB f117 = f53, f117
  9285. nop __LINE__
  9286. }
  9287. { .mfi
  9288. nop __LINE__
  9289. FSUB f118 = f54, f118
  9290. nop __LINE__
  9291. }
  9292. { .mfi
  9293. nop __LINE__
  9294. FSUB f119 = f55, f119
  9295. nop __LINE__
  9296. }
  9297. { .mfi
  9298. nop __LINE__
  9299. FSUB f120 = f56, f120
  9300. nop __LINE__
  9301. }
  9302. { .mfi
  9303. nop __LINE__
  9304. FSUB f121 = f57, f121
  9305. nop __LINE__
  9306. }
  9307. { .mfi
  9308. nop __LINE__
  9309. FSUB f122 = f58, f122
  9310. nop __LINE__
  9311. }
  9312. { .mfi
  9313. nop __LINE__
  9314. FSUB f123 = f59, f123
  9315. nop __LINE__
  9316. }
  9317. { .mfi
  9318. nop __LINE__
  9319. FSUB f124 = f60, f124
  9320. nop __LINE__
  9321. }
  9322. { .mfi
  9323. nop __LINE__
  9324. FSUB f125 = f61, f125
  9325. nop __LINE__
  9326. }
  9327. { .mfi
  9328. nop __LINE__
  9329. FSUB f126 = f62, f126
  9330. nop __LINE__
  9331. }
  9332. { .mfi
  9333. nop __LINE__
  9334. FSUB f127 = f63, f127
  9335. nop __LINE__
  9336. }
  9337. ;;
  9338. #endif
  9339. #ifdef LN
  9340. { .mfi
  9341. LDFPD f35, f34 = [AOFFSET]
  9342. FMPY f71 = f71, f32
  9343. adds AOFFSET = - 2 * SIZE, AOFFSET
  9344. }
  9345. { .mfi
  9346. nop __LINE__
  9347. FMPY f103 = f103, f32
  9348. adds BOFFSET2 = 4 * SIZE, BOFFSET
  9349. }
  9350. ;;
  9351. { .mfi
  9352. LDFPD f37, f36 = [AOFFSET]
  9353. FMPY f79 = f79, f32
  9354. adds AOFFSET = - 2 * SIZE, AOFFSET
  9355. }
  9356. { .mfi
  9357. nop __LINE__
  9358. FMPY f111 = f111, f32
  9359. nop __LINE__
  9360. }
  9361. ;;
  9362. { .mfi
  9363. LDFPD f39, f38 = [AOFFSET]
  9364. FMPY f87 = f87, f32
  9365. adds AOFFSET = - 2 * SIZE, AOFFSET
  9366. }
  9367. { .mfi
  9368. nop __LINE__
  9369. FMPY f119 = f119, f32
  9370. nop __LINE__
  9371. }
  9372. ;;
  9373. { .mfi
  9374. LDFD f40 = [AOFFSET], -2 * SIZE
  9375. FMPY f95 = f95, f32
  9376. nop __LINE__
  9377. }
  9378. { .mfi
  9379. nop __LINE__
  9380. FMPY f127 = f127, f32
  9381. nop __LINE__
  9382. }
  9383. ;;
  9384. { .mfi
  9385. LDFPD f42, f41 = [AOFFSET]
  9386. FNMA f70 = f71, f33, f70
  9387. adds AOFFSET = - 2 * SIZE, AOFFSET
  9388. }
  9389. { .mfi
  9390. nop __LINE__
  9391. FNMA f102 = f103, f33, f102
  9392. nop __LINE__
  9393. }
  9394. ;;
  9395. { .mfi
  9396. LDFPD f44, f43 = [AOFFSET]
  9397. FNMA f78 = f79, f33, f78
  9398. adds AOFFSET = - 2 * SIZE, AOFFSET
  9399. }
  9400. { .mfi
  9401. nop __LINE__
  9402. FNMA f110 = f111, f33, f110
  9403. nop __LINE__
  9404. }
  9405. ;;
  9406. { .mfi
  9407. LDFPD f46, f45 = [AOFFSET]
  9408. FNMA f86 = f87, f33, f86
  9409. adds AOFFSET = - 4 * SIZE, AOFFSET
  9410. }
  9411. { .mfi
  9412. nop __LINE__
  9413. FNMA f118 = f119, f33, f118
  9414. nop __LINE__
  9415. }
  9416. ;;
  9417. { .mfi
  9418. LDFPD f48, f47 = [AOFFSET]
  9419. FNMA f94 = f95, f33, f94
  9420. adds AOFFSET = - 2 * SIZE, AOFFSET
  9421. }
  9422. { .mfi
  9423. nop __LINE__
  9424. FNMA f126 = f127, f33, f126
  9425. nop __LINE__
  9426. }
  9427. ;;
  9428. { .mfi
  9429. LDFPD f50, f49 = [AOFFSET]
  9430. FNMA f69 = f71, f34, f69
  9431. adds AOFFSET = - 2 * SIZE, AOFFSET
  9432. }
  9433. { .mfi
  9434. nop __LINE__
  9435. FNMA f101 = f103, f34, f101
  9436. nop __LINE__
  9437. }
  9438. ;;
  9439. { .mfi
  9440. LDFPD f52, f51 = [AOFFSET]
  9441. FNMA f77 = f79, f34, f77
  9442. adds AOFFSET = - 4 * SIZE, AOFFSET
  9443. }
  9444. { .mfi
  9445. nop __LINE__
  9446. FNMA f109 = f111, f34, f109
  9447. nop __LINE__
  9448. }
  9449. ;;
  9450. { .mfi
  9451. LDFD f53 = [AOFFSET], -2 * SIZE
  9452. FNMA f85 = f87, f34, f85
  9453. nop __LINE__
  9454. }
  9455. { .mfi
  9456. nop __LINE__
  9457. FNMA f117 = f119, f34, f117
  9458. nop __LINE__
  9459. }
  9460. ;;
  9461. { .mfi
  9462. LDFPD f55, f54 = [AOFFSET]
  9463. FNMA f93 = f95, f34, f93
  9464. adds AOFFSET = - 2 * SIZE, AOFFSET
  9465. }
  9466. { .mfi
  9467. nop __LINE__
  9468. FNMA f125 = f127, f34, f125
  9469. nop __LINE__
  9470. }
  9471. ;;
  9472. { .mfi
  9473. LDFPD f57, f56 = [AOFFSET]
  9474. FNMA f68 = f71, f35, f68
  9475. adds AOFFSET = - 6 * SIZE, AOFFSET
  9476. }
  9477. { .mfi
  9478. nop __LINE__
  9479. FNMA f100 = f103, f35, f100
  9480. nop __LINE__
  9481. }
  9482. ;;
  9483. { .mfi
  9484. LDFPD f59, f58 = [AOFFSET]
  9485. FNMA f76 = f79, f35, f76
  9486. adds AOFFSET = - 2 * SIZE, AOFFSET
  9487. }
  9488. { .mfi
  9489. nop __LINE__
  9490. FNMA f108 = f111, f35, f108
  9491. nop __LINE__
  9492. }
  9493. ;;
  9494. { .mfi
  9495. LDFPD f61, f60 = [AOFFSET]
  9496. FNMA f84 = f87, f35, f84
  9497. adds AOFFSET = - 6 * SIZE, AOFFSET
  9498. }
  9499. { .mfi
  9500. nop __LINE__
  9501. FNMA f116 = f119, f35, f116
  9502. nop __LINE__
  9503. }
  9504. ;;
  9505. { .mfi
  9506. LDFD f16 = [AOFFSET], -2 * SIZE
  9507. FNMA f92 = f95, f35, f92
  9508. nop __LINE__
  9509. }
  9510. { .mfi
  9511. nop __LINE__
  9512. FNMA f124 = f127, f35, f124
  9513. nop __LINE__
  9514. }
  9515. ;;
  9516. { .mfi
  9517. LDFPD f18, f17 = [AOFFSET]
  9518. FNMA f67 = f71, f36, f67
  9519. adds AOFFSET = - 8 * SIZE, AOFFSET
  9520. }
  9521. { .mfi
  9522. nop __LINE__
  9523. FNMA f99 = f103, f36, f99
  9524. nop __LINE__
  9525. }
  9526. ;;
  9527. { .mfi
  9528. LDFPD f20, f19 = [AOFFSET]
  9529. FNMA f75 = f79, f36, f75
  9530. adds AOFFSET = - 8 * SIZE, AOFFSET
  9531. }
  9532. { .mfi
  9533. nop __LINE__
  9534. FNMA f107 = f111, f36, f107
  9535. nop __LINE__
  9536. }
  9537. ;;
  9538. { .mfi
  9539. LDFD f21 = [AOFFSET]
  9540. FNMA f83 = f87, f36, f83
  9541. adds BOFFSET = 56 * SIZE, BOFFSET
  9542. }
  9543. { .mfi
  9544. FNMA f115 = f119, f36, f115
  9545. adds BOFFSET2 = 56 * SIZE, BOFFSET2
  9546. }
  9547. ;;
  9548. FNMA f91 = f95, f36, f91
  9549. FNMA f123 = f127, f36, f123
  9550. ;;
  9551. FNMA f66 = f71, f37, f66
  9552. FNMA f98 = f103, f37, f98
  9553. FNMA f74 = f79, f37, f74
  9554. FNMA f106 = f111, f37, f106
  9555. FNMA f82 = f87, f37, f82
  9556. FNMA f114 = f119, f37, f114
  9557. FNMA f90 = f95, f37, f90
  9558. FNMA f122 = f127, f37, f122
  9559. ;;
  9560. FNMA f65 = f71, f38, f65
  9561. FNMA f97 = f103, f38, f97
  9562. FNMA f73 = f79, f38, f73
  9563. FNMA f105 = f111, f38, f105
  9564. FNMA f81 = f87, f38, f81
  9565. FNMA f113 = f119, f38, f113
  9566. FNMA f89 = f95, f38, f89
  9567. FNMA f121 = f127, f38, f121
  9568. ;;
  9569. FNMA f64 = f71, f39, f64
  9570. FNMA f96 = f103, f39, f96
  9571. FNMA f72 = f79, f39, f72
  9572. FNMA f104 = f111, f39, f104
  9573. FNMA f80 = f87, f39, f80
  9574. FNMA f112 = f119, f39, f112
  9575. FNMA f88 = f95, f39, f88
  9576. FNMA f120 = f127, f39, f120
  9577. ;;
  9578. FMPY f70 = f70, f40
  9579. FMPY f102 = f102, f40
  9580. FMPY f78 = f78, f40
  9581. FMPY f110 = f110, f40
  9582. FMPY f86 = f86, f40
  9583. FMPY f118 = f118, f40
  9584. FMPY f94 = f94, f40
  9585. FMPY f126 = f126, f40
  9586. ;;
  9587. FNMA f69 = f70, f41, f69
  9588. FNMA f101 = f102, f41, f101
  9589. FNMA f77 = f78, f41, f77
  9590. FNMA f109 = f110, f41, f109
  9591. FNMA f85 = f86, f41, f85
  9592. FNMA f117 = f118, f41, f117
  9593. FNMA f93 = f94, f41, f93
  9594. FNMA f125 = f126, f41, f125
  9595. ;;
  9596. FNMA f68 = f70, f42, f68
  9597. FNMA f100 = f102, f42, f100
  9598. FNMA f76 = f78, f42, f76
  9599. FNMA f108 = f110, f42, f108
  9600. FNMA f84 = f86, f42, f84
  9601. FNMA f116 = f118, f42, f116
  9602. FNMA f92 = f94, f42, f92
  9603. FNMA f124 = f126, f42, f124
  9604. ;;
  9605. FNMA f67 = f70, f43, f67
  9606. FNMA f99 = f102, f43, f99
  9607. FNMA f75 = f78, f43, f75
  9608. FNMA f107 = f110, f43, f107
  9609. FNMA f83 = f86, f43, f83
  9610. FNMA f115 = f118, f43, f115
  9611. FNMA f91 = f94, f43, f91
  9612. FNMA f123 = f126, f43, f123
  9613. ;;
  9614. FNMA f66 = f70, f44, f66
  9615. FNMA f98 = f102, f44, f98
  9616. FNMA f74 = f78, f44, f74
  9617. FNMA f106 = f110, f44, f106
  9618. FNMA f82 = f86, f44, f82
  9619. FNMA f114 = f118, f44, f114
  9620. FNMA f90 = f94, f44, f90
  9621. FNMA f122 = f126, f44, f122
  9622. ;;
  9623. FNMA f65 = f70, f45, f65
  9624. FNMA f97 = f102, f45, f97
  9625. FNMA f73 = f78, f45, f73
  9626. FNMA f105 = f110, f45, f105
  9627. FNMA f81 = f86, f45, f81
  9628. FNMA f113 = f118, f45, f113
  9629. FNMA f89 = f94, f45, f89
  9630. FNMA f121 = f126, f45, f121
  9631. ;;
  9632. FNMA f64 = f70, f46, f64
  9633. FNMA f96 = f102, f46, f96
  9634. FNMA f72 = f78, f46, f72
  9635. FNMA f104 = f110, f46, f104
  9636. FNMA f80 = f86, f46, f80
  9637. FNMA f112 = f118, f46, f112
  9638. FNMA f88 = f94, f46, f88
  9639. FNMA f120 = f126, f46, f120
  9640. ;;
  9641. FMPY f69 = f69, f47
  9642. FMPY f101 = f101, f47
  9643. FMPY f77 = f77, f47
  9644. FMPY f109 = f109, f47
  9645. FMPY f85 = f85, f47
  9646. FMPY f117 = f117, f47
  9647. FMPY f93 = f93, f47
  9648. FMPY f125 = f125, f47
  9649. ;;
  9650. FNMA f68 = f69, f48, f68
  9651. FNMA f100 = f101, f48, f100
  9652. FNMA f76 = f77, f48, f76
  9653. FNMA f108 = f109, f48, f108
  9654. FNMA f84 = f85, f48, f84
  9655. FNMA f116 = f117, f48, f116
  9656. FNMA f92 = f93, f48, f92
  9657. FNMA f124 = f125, f48, f124
  9658. ;;
  9659. FNMA f67 = f69, f49, f67
  9660. FNMA f99 = f101, f49, f99
  9661. FNMA f75 = f77, f49, f75
  9662. FNMA f107 = f109, f49, f107
  9663. FNMA f83 = f85, f49, f83
  9664. FNMA f115 = f117, f49, f115
  9665. FNMA f91 = f93, f49, f91
  9666. FNMA f123 = f125, f49, f123
  9667. ;;
  9668. FNMA f66 = f69, f50, f66
  9669. FNMA f98 = f101, f50, f98
  9670. FNMA f74 = f77, f50, f74
  9671. FNMA f106 = f109, f50, f106
  9672. FNMA f82 = f85, f50, f82
  9673. FNMA f114 = f117, f50, f114
  9674. FNMA f90 = f93, f50, f90
  9675. FNMA f122 = f125, f50, f122
  9676. ;;
  9677. FNMA f65 = f69, f51, f65
  9678. FNMA f97 = f101, f51, f97
  9679. FNMA f73 = f77, f51, f73
  9680. FNMA f105 = f109, f51, f105
  9681. FNMA f81 = f85, f51, f81
  9682. FNMA f113 = f117, f51, f113
  9683. FNMA f89 = f93, f51, f89
  9684. FNMA f121 = f125, f51, f121
  9685. ;;
  9686. FNMA f64 = f69, f52, f64
  9687. FNMA f96 = f101, f52, f96
  9688. FNMA f72 = f77, f52, f72
  9689. FNMA f104 = f109, f52, f104
  9690. FNMA f80 = f85, f52, f80
  9691. FNMA f112 = f117, f52, f112
  9692. FNMA f88 = f93, f52, f88
  9693. FNMA f120 = f125, f52, f120
  9694. ;;
  9695. FMPY f68 = f68, f53
  9696. FMPY f100 = f100, f53
  9697. FMPY f76 = f76, f53
  9698. FMPY f108 = f108, f53
  9699. FMPY f84 = f84, f53
  9700. FMPY f116 = f116, f53
  9701. FMPY f92 = f92, f53
  9702. FMPY f124 = f124, f53
  9703. ;;
  9704. FNMA f67 = f68, f54, f67
  9705. FNMA f99 = f100, f54, f99
  9706. FNMA f75 = f76, f54, f75
  9707. FNMA f107 = f108, f54, f107
  9708. FNMA f83 = f84, f54, f83
  9709. FNMA f115 = f116, f54, f115
  9710. FNMA f91 = f92, f54, f91
  9711. FNMA f123 = f124, f54, f123
  9712. ;;
  9713. FNMA f66 = f68, f55, f66
  9714. FNMA f98 = f100, f55, f98
  9715. FNMA f74 = f76, f55, f74
  9716. FNMA f106 = f108, f55, f106
  9717. FNMA f82 = f84, f55, f82
  9718. FNMA f114 = f116, f55, f114
  9719. FNMA f90 = f92, f55, f90
  9720. FNMA f122 = f124, f55, f122
  9721. ;;
  9722. FNMA f65 = f68, f56, f65
  9723. FNMA f97 = f100, f56, f97
  9724. FNMA f73 = f76, f56, f73
  9725. FNMA f105 = f108, f56, f105
  9726. FNMA f81 = f84, f56, f81
  9727. FNMA f113 = f116, f56, f113
  9728. FNMA f89 = f92, f56, f89
  9729. FNMA f121 = f124, f56, f121
  9730. ;;
  9731. FNMA f64 = f68, f57, f64
  9732. FNMA f96 = f100, f57, f96
  9733. FNMA f72 = f76, f57, f72
  9734. FNMA f104 = f108, f57, f104
  9735. FNMA f80 = f84, f57, f80
  9736. FNMA f112 = f116, f57, f112
  9737. FNMA f88 = f92, f57, f88
  9738. FNMA f120 = f124, f57, f120
  9739. ;;
  9740. FMPY f67 = f67, f58
  9741. FMPY f99 = f99, f58
  9742. FMPY f75 = f75, f58
  9743. FMPY f107 = f107, f58
  9744. FMPY f83 = f83, f58
  9745. FMPY f115 = f115, f58
  9746. FMPY f91 = f91, f58
  9747. FMPY f123 = f123, f58
  9748. ;;
  9749. FNMA f66 = f67, f59, f66
  9750. FNMA f98 = f99, f59, f98
  9751. FNMA f74 = f75, f59, f74
  9752. FNMA f106 = f107, f59, f106
  9753. FNMA f82 = f83, f59, f82
  9754. FNMA f114 = f115, f59, f114
  9755. FNMA f90 = f91, f59, f90
  9756. FNMA f122 = f123, f59, f122
  9757. ;;
  9758. FNMA f65 = f67, f60, f65
  9759. FNMA f97 = f99, f60, f97
  9760. FNMA f73 = f75, f60, f73
  9761. FNMA f105 = f107, f60, f105
  9762. FNMA f81 = f83, f60, f81
  9763. FNMA f113 = f115, f60, f113
  9764. FNMA f89 = f91, f60, f89
  9765. FNMA f121 = f123, f60, f121
  9766. ;;
  9767. { .mfi
  9768. STFD [BOFFSET] = f71, SIZE
  9769. FNMA f64 = f67, f61, f64
  9770. }
  9771. { .mfi
  9772. STFD [BOFFSET2] = f103, SIZE
  9773. FNMA f96 = f99, f61, f96
  9774. }
  9775. ;;
  9776. { .mfi
  9777. STFD [BOFFSET] = f79, SIZE
  9778. FNMA f72 = f75, f61, f72
  9779. }
  9780. { .mfi
  9781. STFD [BOFFSET2] = f111, SIZE
  9782. FNMA f104 = f107, f61, f104
  9783. }
  9784. ;;
  9785. { .mfi
  9786. STFD [BOFFSET] = f87, SIZE
  9787. FNMA f80 = f83, f61, f80
  9788. }
  9789. { .mfi
  9790. STFD [BOFFSET2] = f119, SIZE
  9791. FNMA f112 = f115, f61, f112
  9792. }
  9793. ;;
  9794. { .mfi
  9795. STFD [BOFFSET] = f95, - 11 * SIZE
  9796. FNMA f88 = f91, f61, f88
  9797. }
  9798. { .mfi
  9799. STFD [BOFFSET2] = f127, - 11 * SIZE
  9800. FNMA f120 = f123, f61, f120
  9801. }
  9802. ;;
  9803. { .mfi
  9804. STFD [BOFFSET] = f70, SIZE
  9805. FMPY f66 = f66, f16
  9806. }
  9807. { .mfi
  9808. STFD [BOFFSET2] = f102, SIZE
  9809. FMPY f98 = f98, f16
  9810. }
  9811. ;;
  9812. { .mfi
  9813. STFD [BOFFSET] = f78, SIZE
  9814. FMPY f74 = f74, f16
  9815. }
  9816. { .mfi
  9817. STFD [BOFFSET2] = f110, SIZE
  9818. FMPY f106 = f106, f16
  9819. }
  9820. ;;
  9821. { .mfi
  9822. STFD [BOFFSET] = f86, SIZE
  9823. FMPY f82 = f82, f16
  9824. }
  9825. { .mfi
  9826. STFD [BOFFSET2] = f118, SIZE
  9827. FMPY f114 = f114, f16
  9828. }
  9829. ;;
  9830. { .mfi
  9831. STFD [BOFFSET] = f94, - 11 * SIZE
  9832. FMPY f90 = f90, f16
  9833. }
  9834. { .mfi
  9835. STFD [BOFFSET2] = f126, - 11 * SIZE
  9836. FMPY f122 = f122, f16
  9837. }
  9838. ;;
  9839. { .mfi
  9840. STFD [BOFFSET] = f69, SIZE
  9841. FNMA f65 = f66, f17, f65
  9842. }
  9843. { .mfi
  9844. STFD [BOFFSET2] = f101, SIZE
  9845. FNMA f97 = f98, f17, f97
  9846. }
  9847. ;;
  9848. { .mfi
  9849. STFD [BOFFSET] = f77, SIZE
  9850. FNMA f73 = f74, f17, f73
  9851. }
  9852. { .mfi
  9853. STFD [BOFFSET2] = f109, SIZE
  9854. FNMA f105 = f106, f17, f105
  9855. }
  9856. ;;
  9857. { .mfi
  9858. STFD [BOFFSET] = f85, SIZE
  9859. FNMA f81 = f82, f17, f81
  9860. }
  9861. { .mfi
  9862. STFD [BOFFSET2] = f117, SIZE
  9863. FNMA f113 = f114, f17, f113
  9864. }
  9865. ;;
  9866. { .mfi
  9867. STFD [BOFFSET] = f93, - 11 * SIZE
  9868. FNMA f89 = f90, f17, f89
  9869. }
  9870. { .mfi
  9871. STFD [BOFFSET2] = f125, - 11 * SIZE
  9872. FNMA f121 = f122, f17, f121
  9873. }
  9874. ;;
  9875. { .mfi
  9876. STFD [BOFFSET] = f68, SIZE
  9877. FNMA f64 = f66, f18, f64
  9878. }
  9879. { .mfi
  9880. STFD [BOFFSET2] = f100, SIZE
  9881. FNMA f96 = f98, f18, f96
  9882. }
  9883. ;;
  9884. { .mfi
  9885. STFD [BOFFSET] = f76, SIZE
  9886. FNMA f72 = f74, f18, f72
  9887. }
  9888. { .mfi
  9889. STFD [BOFFSET2] = f108, SIZE
  9890. FNMA f104 = f106, f18, f104
  9891. }
  9892. ;;
  9893. { .mfi
  9894. STFD [BOFFSET] = f84, SIZE
  9895. FNMA f80 = f82, f18, f80
  9896. }
  9897. { .mfi
  9898. STFD [BOFFSET2] = f116, SIZE
  9899. FNMA f112 = f114, f18, f112
  9900. }
  9901. ;;
  9902. { .mfi
  9903. STFD [BOFFSET] = f92, - 11 * SIZE
  9904. FNMA f88 = f90, f18, f88
  9905. }
  9906. { .mfi
  9907. STFD [BOFFSET2] = f124, - 11 * SIZE
  9908. FNMA f120 = f122, f18, f120
  9909. }
  9910. ;;
  9911. { .mfi
  9912. STFD [BOFFSET] = f67, SIZE
  9913. FMPY f65 = f65, f19
  9914. }
  9915. { .mfi
  9916. STFD [BOFFSET2] = f99, SIZE
  9917. FMPY f97 = f97, f19
  9918. }
  9919. ;;
  9920. { .mfi
  9921. STFD [BOFFSET] = f75, SIZE
  9922. FMPY f73 = f73, f19
  9923. }
  9924. { .mfi
  9925. STFD [BOFFSET2] = f107, SIZE
  9926. FMPY f105 = f105, f19
  9927. }
  9928. ;;
  9929. { .mfi
  9930. STFD [BOFFSET] = f83, SIZE
  9931. FMPY f81 = f81, f19
  9932. }
  9933. { .mfi
  9934. STFD [BOFFSET2] = f115, SIZE
  9935. FMPY f113 = f113, f19
  9936. }
  9937. ;;
  9938. { .mfi
  9939. STFD [BOFFSET] = f91, - 11 * SIZE
  9940. FMPY f89 = f89, f19
  9941. }
  9942. { .mfi
  9943. STFD [BOFFSET2] = f123, - 11 * SIZE
  9944. FMPY f121 = f121, f19
  9945. }
  9946. ;;
  9947. { .mfi
  9948. STFD [BOFFSET] = f66, SIZE
  9949. FNMA f64 = f65, f20, f64
  9950. }
  9951. { .mfi
  9952. STFD [BOFFSET2] = f98, SIZE
  9953. FNMA f96 = f97, f20, f96
  9954. }
  9955. ;;
  9956. { .mfi
  9957. STFD [BOFFSET] = f74, SIZE
  9958. FNMA f72 = f73, f20, f72
  9959. }
  9960. { .mfi
  9961. STFD [BOFFSET2] = f106, SIZE
  9962. FNMA f104 = f105, f20, f104
  9963. }
  9964. ;;
  9965. { .mfi
  9966. STFD [BOFFSET] = f82, SIZE
  9967. FNMA f80 = f81, f20, f80
  9968. }
  9969. { .mfi
  9970. STFD [BOFFSET2] = f114, SIZE
  9971. FNMA f112 = f113, f20, f112
  9972. }
  9973. ;;
  9974. { .mfi
  9975. STFD [BOFFSET] = f90, -11 * SIZE
  9976. FNMA f88 = f89, f20, f88
  9977. }
  9978. { .mfi
  9979. STFD [BOFFSET2] = f122, -11 * SIZE
  9980. FNMA f120 = f121, f20, f120
  9981. }
  9982. ;;
  9983. { .mfi
  9984. STFD [BOFFSET] = f65, SIZE
  9985. FMPY f64 = f64, f21
  9986. }
  9987. { .mfi
  9988. STFD [BOFFSET2] = f97, SIZE
  9989. FMPY f96 = f96, f21
  9990. }
  9991. ;;
  9992. { .mfi
  9993. STFD [BOFFSET] = f73, SIZE
  9994. FMPY f72 = f72, f21
  9995. }
  9996. { .mfi
  9997. STFD [BOFFSET2] = f105, SIZE
  9998. FMPY f104 = f104, f21
  9999. }
  10000. ;;
  10001. { .mfi
  10002. STFD [BOFFSET] = f81, SIZE
  10003. FMPY f80 = f80, f21
  10004. }
  10005. { .mfi
  10006. STFD [BOFFSET2] = f113, SIZE
  10007. FMPY f112 = f112, f21
  10008. }
  10009. ;;
  10010. { .mfi
  10011. STFD [BOFFSET] = f89, - 11 * SIZE
  10012. FMPY f88 = f88, f21
  10013. }
  10014. { .mfi
  10015. STFD [BOFFSET2] = f121, - 11 * SIZE
  10016. FMPY f120 = f120, f21
  10017. }
  10018. ;;
  10019. { .mmi
  10020. STFD [BOFFSET] = f64, SIZE
  10021. STFD [BOFFSET2] = f96, SIZE
  10022. adds C1 = -8 * SIZE, C1
  10023. }
  10024. ;;
  10025. { .mmi
  10026. STFD [BOFFSET] = f72, SIZE
  10027. STFD [BOFFSET2] = f104, SIZE
  10028. adds C2 = -8 * SIZE, C2
  10029. }
  10030. ;;
  10031. { .mmi
  10032. STFD [BOFFSET] = f80, SIZE
  10033. STFD [BOFFSET2] = f112, SIZE
  10034. nop __LINE__
  10035. }
  10036. ;;
  10037. { .mmi
  10038. STFD [BOFFSET] = f88, - 3 * SIZE
  10039. STFD [BOFFSET2] = f120, - 3 * SIZE
  10040. adds C9 = 4 * SIZE, C1
  10041. }
  10042. ;;
  10043. #endif
  10044. #ifdef LT
  10045. { .mfi
  10046. LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  10047. FMPY f64 = f64, f32
  10048. nop __LINE__
  10049. }
  10050. { .mfi
  10051. nop __LINE__
  10052. FMPY f96 = f96, f32
  10053. adds BOFFSET2 = 4 * SIZE, BOFFSET
  10054. }
  10055. ;;
  10056. { .mfi
  10057. LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  10058. FMPY f72 = f72, f32
  10059. nop __LINE__
  10060. }
  10061. { .mfi
  10062. nop __LINE__
  10063. FMPY f104 = f104, f32
  10064. nop __LINE__
  10065. }
  10066. ;;
  10067. { .mfi
  10068. LDFPD f38, f39 = [AOFFSET]
  10069. FMPY f80 = f80, f32
  10070. adds AOFFSET = 3 * SIZE, AOFFSET
  10071. }
  10072. { .mfi
  10073. nop __LINE__
  10074. FMPY f112 = f112, f32
  10075. nop __LINE__
  10076. }
  10077. ;;
  10078. { .mfi
  10079. LDFD f40 = [AOFFSET], 1 * SIZE
  10080. FMPY f88 = f88, f32
  10081. nop __LINE__
  10082. }
  10083. { .mfi
  10084. nop __LINE__
  10085. FMPY f120 = f120, f32
  10086. nop __LINE__
  10087. }
  10088. ;;
  10089. { .mfi
  10090. LDFPD f41, f42 = [AOFFSET], 2 * SIZE
  10091. FNMA f65 = f64, f33, f65
  10092. nop __LINE__
  10093. }
  10094. { .mfi
  10095. nop __LINE__
  10096. FNMA f97 = f96, f33, f97
  10097. nop __LINE__
  10098. }
  10099. ;;
  10100. { .mfi
  10101. LDFPD f43, f44 = [AOFFSET], 2 * SIZE
  10102. FNMA f73 = f72, f33, f73
  10103. nop __LINE__
  10104. }
  10105. { .mfi
  10106. nop __LINE__
  10107. FNMA f105 = f104, f33, f105
  10108. nop __LINE__
  10109. }
  10110. ;;
  10111. { .mfi
  10112. LDFPD f45, f46 = [AOFFSET]
  10113. FNMA f81 = f80, f33, f81
  10114. adds AOFFSET = 4 * SIZE, AOFFSET
  10115. }
  10116. { .mfi
  10117. nop __LINE__
  10118. FNMA f113 = f112, f33, f113
  10119. nop __LINE__
  10120. }
  10121. ;;
  10122. { .mfi
  10123. LDFPD f47, f48 = [AOFFSET], 2 * SIZE
  10124. FNMA f89 = f88, f33, f89
  10125. nop __LINE__
  10126. }
  10127. { .mfi
  10128. nop __LINE__
  10129. FNMA f121 = f120, f33, f121
  10130. nop __LINE__
  10131. }
  10132. ;;
  10133. { .mfi
  10134. LDFPD f49, f50 = [AOFFSET], 2 * SIZE
  10135. FNMA f66 = f64, f34, f66
  10136. nop __LINE__
  10137. }
  10138. { .mfi
  10139. nop __LINE__
  10140. FNMA f98 = f96, f34, f98
  10141. nop __LINE__
  10142. }
  10143. ;;
  10144. { .mfi
  10145. LDFPD f51, f52 = [AOFFSET]
  10146. FNMA f74 = f72, f34, f74
  10147. adds AOFFSET = 5 * SIZE, AOFFSET
  10148. }
  10149. { .mfi
  10150. nop __LINE__
  10151. FNMA f106 = f104, f34, f106
  10152. nop __LINE__
  10153. }
  10154. ;;
  10155. { .mfi
  10156. LDFD f53 = [AOFFSET], 1 * SIZE
  10157. FNMA f82 = f80, f34, f82
  10158. nop __LINE__
  10159. }
  10160. { .mfi
  10161. nop __LINE__
  10162. FNMA f114 = f112, f34, f114
  10163. nop __LINE__
  10164. }
  10165. ;;
  10166. { .mfi
  10167. LDFPD f54, f55 = [AOFFSET], 2 * SIZE
  10168. FNMA f90 = f88, f34, f90
  10169. nop __LINE__
  10170. }
  10171. { .mfi
  10172. nop __LINE__
  10173. FNMA f122 = f120, f34, f122
  10174. nop __LINE__
  10175. }
  10176. ;;
  10177. { .mfi
  10178. LDFPD f56, f57 = [AOFFSET]
  10179. FNMA f67 = f64, f35, f67
  10180. adds AOFFSET = 6 * SIZE, AOFFSET
  10181. }
  10182. { .mfi
  10183. nop __LINE__
  10184. FNMA f99 = f96, f35, f99
  10185. nop __LINE__
  10186. }
  10187. ;;
  10188. { .mfi
  10189. LDFPD f58, f59 = [AOFFSET], 2 * SIZE
  10190. FNMA f75 = f72, f35, f75
  10191. nop __LINE__
  10192. }
  10193. { .mfi
  10194. nop __LINE__
  10195. FNMA f107 = f104, f35, f107
  10196. nop __LINE__
  10197. }
  10198. ;;
  10199. { .mfi
  10200. LDFPD f60, f61 = [AOFFSET]
  10201. FNMA f83 = f80, f35, f83
  10202. adds AOFFSET = 7 * SIZE, AOFFSET
  10203. }
  10204. { .mfi
  10205. nop __LINE__
  10206. FNMA f115 = f112, f35, f115
  10207. nop __LINE__
  10208. }
  10209. ;;
  10210. { .mfi
  10211. LDFD f16 = [AOFFSET], 1 * SIZE
  10212. FNMA f91 = f88, f35, f91
  10213. nop __LINE__
  10214. }
  10215. { .mfi
  10216. nop __LINE__
  10217. FNMA f123 = f120, f35, f123
  10218. nop __LINE__
  10219. }
  10220. ;;
  10221. { .mfi
  10222. LDFPD f17, f18 = [AOFFSET]
  10223. FNMA f68 = f64, f36, f68
  10224. adds AOFFSET = 8 * SIZE, AOFFSET
  10225. }
  10226. { .mfi
  10227. nop __LINE__
  10228. FNMA f100 = f96, f36, f100
  10229. nop __LINE__
  10230. }
  10231. ;;
  10232. { .mfi
  10233. LDFPD f19, f20 = [AOFFSET]
  10234. FNMA f76 = f72, f36, f76
  10235. adds AOFFSET = 9 * SIZE, AOFFSET
  10236. }
  10237. { .mfi
  10238. nop __LINE__
  10239. FNMA f108 = f104, f36, f108
  10240. nop __LINE__
  10241. }
  10242. ;;
  10243. { .mfi
  10244. LDFD f21 = [AOFFSET]
  10245. FNMA f84 = f80, f36, f84
  10246. adds AOFFSET = -63 * SIZE, AOFFSET
  10247. }
  10248. { .mfi
  10249. nop __LINE__
  10250. FNMA f116 = f112, f36, f116
  10251. nop __LINE__
  10252. }
  10253. ;;
  10254. { .mfi
  10255. nop __LINE__
  10256. FNMA f92 = f88, f36, f92
  10257. nop __LINE__
  10258. }
  10259. { .mfi
  10260. nop __LINE__
  10261. FNMA f124 = f120, f36, f124
  10262. nop __LINE__
  10263. }
  10264. ;;
  10265. FNMA f69 = f64, f37, f69
  10266. FNMA f101 = f96, f37, f101
  10267. FNMA f77 = f72, f37, f77
  10268. FNMA f109 = f104, f37, f109
  10269. FNMA f85 = f80, f37, f85
  10270. FNMA f117 = f112, f37, f117
  10271. FNMA f93 = f88, f37, f93
  10272. FNMA f125 = f120, f37, f125
  10273. ;;
  10274. FNMA f70 = f64, f38, f70
  10275. FNMA f102 = f96, f38, f102
  10276. FNMA f78 = f72, f38, f78
  10277. FNMA f110 = f104, f38, f110
  10278. FNMA f86 = f80, f38, f86
  10279. FNMA f118 = f112, f38, f118
  10280. FNMA f94 = f88, f38, f94
  10281. FNMA f126 = f120, f38, f126
  10282. ;;
  10283. FNMA f71 = f64, f39, f71
  10284. FNMA f103 = f96, f39, f103
  10285. FNMA f79 = f72, f39, f79
  10286. FNMA f111 = f104, f39, f111
  10287. FNMA f87 = f80, f39, f87
  10288. FNMA f119 = f112, f39, f119
  10289. FNMA f95 = f88, f39, f95
  10290. FNMA f127 = f120, f39, f127
  10291. ;;
  10292. FMPY f65 = f65, f40
  10293. FMPY f97 = f97, f40
  10294. FMPY f73 = f73, f40
  10295. FMPY f105 = f105, f40
  10296. FMPY f81 = f81, f40
  10297. FMPY f113 = f113, f40
  10298. FMPY f89 = f89, f40
  10299. FMPY f121 = f121, f40
  10300. ;;
  10301. FNMA f66 = f65, f41, f66
  10302. FNMA f98 = f97, f41, f98
  10303. FNMA f74 = f73, f41, f74
  10304. FNMA f106 = f105, f41, f106
  10305. FNMA f82 = f81, f41, f82
  10306. FNMA f114 = f113, f41, f114
  10307. FNMA f90 = f89, f41, f90
  10308. FNMA f122 = f121, f41, f122
  10309. FNMA f67 = f65, f42, f67
  10310. FNMA f99 = f97, f42, f99
  10311. FNMA f75 = f73, f42, f75
  10312. FNMA f107 = f105, f42, f107
  10313. FNMA f83 = f81, f42, f83
  10314. FNMA f115 = f113, f42, f115
  10315. FNMA f91 = f89, f42, f91
  10316. FNMA f123 = f121, f42, f123
  10317. ;;
  10318. FNMA f68 = f65, f43, f68
  10319. FNMA f100 = f97, f43, f100
  10320. FNMA f76 = f73, f43, f76
  10321. FNMA f108 = f105, f43, f108
  10322. FNMA f84 = f81, f43, f84
  10323. FNMA f116 = f113, f43, f116
  10324. FNMA f92 = f89, f43, f92
  10325. FNMA f124 = f121, f43, f124
  10326. ;;
  10327. FNMA f69 = f65, f44, f69
  10328. FNMA f101 = f97, f44, f101
  10329. FNMA f77 = f73, f44, f77
  10330. FNMA f109 = f105, f44, f109
  10331. FNMA f85 = f81, f44, f85
  10332. FNMA f117 = f113, f44, f117
  10333. FNMA f93 = f89, f44, f93
  10334. FNMA f125 = f121, f44, f125
  10335. ;;
  10336. FNMA f70 = f65, f45, f70
  10337. FNMA f102 = f97, f45, f102
  10338. FNMA f78 = f73, f45, f78
  10339. FNMA f110 = f105, f45, f110
  10340. FNMA f86 = f81, f45, f86
  10341. FNMA f118 = f113, f45, f118
  10342. FNMA f94 = f89, f45, f94
  10343. FNMA f126 = f121, f45, f126
  10344. ;;
  10345. FNMA f71 = f65, f46, f71
  10346. FNMA f103 = f97, f46, f103
  10347. FNMA f79 = f73, f46, f79
  10348. FNMA f111 = f105, f46, f111
  10349. FNMA f87 = f81, f46, f87
  10350. FNMA f119 = f113, f46, f119
  10351. FNMA f95 = f89, f46, f95
  10352. FNMA f127 = f121, f46, f127
  10353. ;;
  10354. FMPY f66 = f66, f47
  10355. FMPY f98 = f98, f47
  10356. FMPY f74 = f74, f47
  10357. FMPY f106 = f106, f47
  10358. FMPY f82 = f82, f47
  10359. FMPY f114 = f114, f47
  10360. FMPY f90 = f90, f47
  10361. FMPY f122 = f122, f47
  10362. ;;
  10363. FNMA f67 = f66, f48, f67
  10364. FNMA f99 = f98, f48, f99
  10365. FNMA f75 = f74, f48, f75
  10366. FNMA f107 = f106, f48, f107
  10367. FNMA f83 = f82, f48, f83
  10368. FNMA f115 = f114, f48, f115
  10369. FNMA f91 = f90, f48, f91
  10370. FNMA f123 = f122, f48, f123
  10371. FNMA f68 = f66, f49, f68
  10372. FNMA f100 = f98, f49, f100
  10373. FNMA f76 = f74, f49, f76
  10374. FNMA f108 = f106, f49, f108
  10375. FNMA f84 = f82, f49, f84
  10376. FNMA f116 = f114, f49, f116
  10377. FNMA f92 = f90, f49, f92
  10378. FNMA f124 = f122, f49, f124
  10379. ;;
  10380. FNMA f69 = f66, f50, f69
  10381. FNMA f101 = f98, f50, f101
  10382. FNMA f77 = f74, f50, f77
  10383. FNMA f109 = f106, f50, f109
  10384. FNMA f85 = f82, f50, f85
  10385. FNMA f117 = f114, f50, f117
  10386. FNMA f93 = f90, f50, f93
  10387. FNMA f125 = f122, f50, f125
  10388. ;;
  10389. FNMA f70 = f66, f51, f70
  10390. FNMA f102 = f98, f51, f102
  10391. FNMA f78 = f74, f51, f78
  10392. FNMA f110 = f106, f51, f110
  10393. FNMA f86 = f82, f51, f86
  10394. FNMA f118 = f114, f51, f118
  10395. FNMA f94 = f90, f51, f94
  10396. FNMA f126 = f122, f51, f126
  10397. ;;
  10398. FNMA f71 = f66, f52, f71
  10399. FNMA f103 = f98, f52, f103
  10400. FNMA f79 = f74, f52, f79
  10401. FNMA f111 = f106, f52, f111
  10402. FNMA f87 = f82, f52, f87
  10403. FNMA f119 = f114, f52, f119
  10404. FNMA f95 = f90, f52, f95
  10405. FNMA f127 = f122, f52, f127
  10406. ;;
  10407. FMPY f67 = f67, f53
  10408. FMPY f99 = f99, f53
  10409. FMPY f75 = f75, f53
  10410. FMPY f107 = f107, f53
  10411. FMPY f83 = f83, f53
  10412. FMPY f115 = f115, f53
  10413. FMPY f91 = f91, f53
  10414. FMPY f123 = f123, f53
  10415. ;;
  10416. FNMA f68 = f67, f54, f68
  10417. FNMA f100 = f99, f54, f100
  10418. FNMA f76 = f75, f54, f76
  10419. FNMA f108 = f107, f54, f108
  10420. FNMA f84 = f83, f54, f84
  10421. FNMA f116 = f115, f54, f116
  10422. FNMA f92 = f91, f54, f92
  10423. FNMA f124 = f123, f54, f124
  10424. ;;
  10425. FNMA f69 = f67, f55, f69
  10426. FNMA f101 = f99, f55, f101
  10427. FNMA f77 = f75, f55, f77
  10428. FNMA f109 = f107, f55, f109
  10429. FNMA f85 = f83, f55, f85
  10430. FNMA f117 = f115, f55, f117
  10431. FNMA f93 = f91, f55, f93
  10432. FNMA f125 = f123, f55, f125
  10433. ;;
  10434. FNMA f70 = f67, f56, f70
  10435. FNMA f102 = f99, f56, f102
  10436. FNMA f78 = f75, f56, f78
  10437. FNMA f110 = f107, f56, f110
  10438. FNMA f86 = f83, f56, f86
  10439. FNMA f118 = f115, f56, f118
  10440. FNMA f94 = f91, f56, f94
  10441. FNMA f126 = f123, f56, f126
  10442. ;;
  10443. FNMA f71 = f67, f57, f71
  10444. FNMA f103 = f99, f57, f103
  10445. FNMA f79 = f75, f57, f79
  10446. FNMA f111 = f107, f57, f111
  10447. FNMA f87 = f83, f57, f87
  10448. FNMA f119 = f115, f57, f119
  10449. FNMA f95 = f91, f57, f95
  10450. FNMA f127 = f123, f57, f127
  10451. ;;
  10452. FMPY f68 = f68, f58
  10453. FMPY f100 = f100, f58
  10454. FMPY f76 = f76, f58
  10455. FMPY f108 = f108, f58
  10456. FMPY f84 = f84, f58
  10457. FMPY f116 = f116, f58
  10458. FMPY f92 = f92, f58
  10459. FMPY f124 = f124, f58
  10460. ;;
  10461. FNMA f69 = f68, f59, f69
  10462. FNMA f101 = f100, f59, f101
  10463. FNMA f77 = f76, f59, f77
  10464. FNMA f109 = f108, f59, f109
  10465. FNMA f85 = f84, f59, f85
  10466. FNMA f117 = f116, f59, f117
  10467. FNMA f93 = f92, f59, f93
  10468. FNMA f125 = f124, f59, f125
  10469. ;;
  10470. FNMA f70 = f68, f60, f70
  10471. FNMA f102 = f100, f60, f102
  10472. FNMA f78 = f76, f60, f78
  10473. FNMA f110 = f108, f60, f110
  10474. FNMA f86 = f84, f60, f86
  10475. FNMA f118 = f116, f60, f118
  10476. FNMA f94 = f92, f60, f94
  10477. FNMA f126 = f124, f60, f126
  10478. ;;
  10479. { .mfi
  10480. STFD [BOFFSET] = f64, SIZE
  10481. FNMA f71 = f68, f61, f71
  10482. }
  10483. { .mfi
  10484. STFD [BOFFSET2] = f96, SIZE
  10485. FNMA f103 = f100, f61, f103
  10486. }
  10487. ;;
  10488. { .mfi
  10489. STFD [BOFFSET] = f72, SIZE
  10490. FNMA f79 = f76, f61, f79
  10491. }
  10492. { .mfi
  10493. STFD [BOFFSET2] = f104, SIZE
  10494. FNMA f111 = f108, f61, f111
  10495. }
  10496. ;;
  10497. { .mfi
  10498. STFD [BOFFSET] = f80, SIZE
  10499. FNMA f87 = f84, f61, f87
  10500. }
  10501. { .mfi
  10502. STFD [BOFFSET2] = f112, SIZE
  10503. FNMA f119 = f116, f61, f119
  10504. }
  10505. ;;
  10506. { .mfi
  10507. STFD [BOFFSET] = f88, 5 * SIZE
  10508. FNMA f95 = f92, f61, f95
  10509. }
  10510. { .mfi
  10511. STFD [BOFFSET2] = f120, 5 * SIZE
  10512. FNMA f127 = f124, f61, f127
  10513. }
  10514. ;;
  10515. { .mfi
  10516. STFD [BOFFSET] = f65, SIZE
  10517. FMPY f69 = f69, f16
  10518. }
  10519. { .mfi
  10520. STFD [BOFFSET2] = f97, SIZE
  10521. FMPY f101 = f101, f16
  10522. }
  10523. ;;
  10524. { .mfi
  10525. STFD [BOFFSET] = f73, SIZE
  10526. FMPY f77 = f77, f16
  10527. }
  10528. { .mfi
  10529. STFD [BOFFSET2] = f105, SIZE
  10530. FMPY f109 = f109, f16
  10531. }
  10532. ;;
  10533. { .mfi
  10534. STFD [BOFFSET] = f81, SIZE
  10535. FMPY f85 = f85, f16
  10536. }
  10537. { .mfi
  10538. STFD [BOFFSET2] = f113, SIZE
  10539. FMPY f117 = f117, f16
  10540. }
  10541. ;;
  10542. { .mfi
  10543. STFD [BOFFSET] = f89, 5 * SIZE
  10544. FMPY f93 = f93, f16
  10545. }
  10546. { .mfi
  10547. STFD [BOFFSET2] = f121, 5 * SIZE
  10548. FMPY f125 = f125, f16
  10549. }
  10550. ;;
  10551. { .mfi
  10552. STFD [BOFFSET] = f66, SIZE
  10553. FNMA f70 = f69, f17, f70
  10554. }
  10555. { .mfi
  10556. STFD [BOFFSET2] = f98, SIZE
  10557. FNMA f102 = f101, f17, f102
  10558. }
  10559. ;;
  10560. { .mfi
  10561. STFD [BOFFSET] = f74, SIZE
  10562. FNMA f78 = f77, f17, f78
  10563. }
  10564. { .mfi
  10565. STFD [BOFFSET2] = f106, SIZE
  10566. FNMA f110 = f109, f17, f110
  10567. }
  10568. ;;
  10569. { .mfi
  10570. STFD [BOFFSET] = f82, SIZE
  10571. FNMA f86 = f85, f17, f86
  10572. }
  10573. { .mfi
  10574. STFD [BOFFSET2] = f114, SIZE
  10575. FNMA f118 = f117, f17, f118
  10576. }
  10577. ;;
  10578. { .mfi
  10579. STFD [BOFFSET] = f90, 5 * SIZE
  10580. FNMA f94 = f93, f17, f94
  10581. }
  10582. { .mfi
  10583. STFD [BOFFSET2] = f122, 5 * SIZE
  10584. FNMA f126 = f125, f17, f126
  10585. }
  10586. ;;
  10587. { .mfi
  10588. STFD [BOFFSET] = f67, SIZE
  10589. FNMA f71 = f69, f18, f71
  10590. }
  10591. { .mfi
  10592. STFD [BOFFSET2] = f99, SIZE
  10593. FNMA f103 = f101, f18, f103
  10594. }
  10595. ;;
  10596. { .mfi
  10597. STFD [BOFFSET] = f75, SIZE
  10598. FNMA f79 = f77, f18, f79
  10599. }
  10600. { .mfi
  10601. STFD [BOFFSET2] = f107, SIZE
  10602. FNMA f111 = f109, f18, f111
  10603. }
  10604. ;;
  10605. { .mfi
  10606. STFD [BOFFSET] = f83, SIZE
  10607. FNMA f87 = f85, f18, f87
  10608. }
  10609. { .mfi
  10610. STFD [BOFFSET2] = f115, SIZE
  10611. FNMA f119 = f117, f18, f119
  10612. }
  10613. ;;
  10614. { .mfi
  10615. STFD [BOFFSET] = f91, 5 * SIZE
  10616. FNMA f95 = f93, f18, f95
  10617. }
  10618. { .mfi
  10619. STFD [BOFFSET2] = f123, 5 * SIZE
  10620. FNMA f127 = f125, f18, f127
  10621. }
  10622. ;;
  10623. { .mfi
  10624. STFD [BOFFSET] = f68, SIZE
  10625. FMPY f70 = f70, f19
  10626. }
  10627. { .mfi
  10628. STFD [BOFFSET2] = f100, SIZE
  10629. FMPY f102 = f102, f19
  10630. }
  10631. ;;
  10632. { .mfi
  10633. STFD [BOFFSET] = f76, SIZE
  10634. FMPY f78 = f78, f19
  10635. }
  10636. { .mfi
  10637. STFD [BOFFSET2] = f108, SIZE
  10638. FMPY f110 = f110, f19
  10639. }
  10640. ;;
  10641. { .mfi
  10642. STFD [BOFFSET] = f84, SIZE
  10643. FMPY f86 = f86, f19
  10644. }
  10645. { .mfi
  10646. STFD [BOFFSET2] = f116, SIZE
  10647. FMPY f118 = f118, f19
  10648. }
  10649. ;;
  10650. { .mfi
  10651. STFD [BOFFSET] = f92, 5 * SIZE
  10652. FMPY f94 = f94, f19
  10653. }
  10654. { .mfi
  10655. STFD [BOFFSET2] = f124, 5 * SIZE
  10656. FMPY f126 = f126, f19
  10657. }
  10658. ;;
  10659. { .mfi
  10660. STFD [BOFFSET] = f69, SIZE
  10661. FNMA f71 = f70, f20, f71
  10662. }
  10663. { .mfi
  10664. STFD [BOFFSET2] = f101, SIZE
  10665. FNMA f103 = f102, f20, f103
  10666. }
  10667. ;;
  10668. { .mfi
  10669. STFD [BOFFSET] = f77, SIZE
  10670. FNMA f79 = f78, f20, f79
  10671. }
  10672. { .mfi
  10673. STFD [BOFFSET2] = f109, SIZE
  10674. FNMA f111 = f110, f20, f111
  10675. }
  10676. ;;
  10677. { .mfi
  10678. STFD [BOFFSET] = f85, SIZE
  10679. FNMA f87 = f86, f20, f87
  10680. }
  10681. { .mfi
  10682. STFD [BOFFSET2] = f117, SIZE
  10683. FNMA f119 = f118, f20, f119
  10684. }
  10685. ;;
  10686. { .mfi
  10687. STFD [BOFFSET] = f93, 5 * SIZE
  10688. FNMA f95 = f94, f20, f95
  10689. }
  10690. { .mfi
  10691. STFD [BOFFSET2] = f125, 5 * SIZE
  10692. FNMA f127 = f126, f20, f127
  10693. }
  10694. ;;
  10695. { .mfi
  10696. STFD [BOFFSET] = f70, SIZE
  10697. FMPY f71 = f71, f21
  10698. }
  10699. { .mfi
  10700. STFD [BOFFSET2] = f102, SIZE
  10701. FMPY f103 = f103, f21
  10702. }
  10703. ;;
  10704. { .mfi
  10705. STFD [BOFFSET] = f78, SIZE
  10706. FMPY f79 = f79, f21
  10707. }
  10708. { .mfi
  10709. STFD [BOFFSET2] = f110, SIZE
  10710. FMPY f111 = f111, f21
  10711. }
  10712. ;;
  10713. { .mfi
  10714. STFD [BOFFSET] = f86, SIZE
  10715. FMPY f87 = f87, f21
  10716. }
  10717. { .mfi
  10718. STFD [BOFFSET2] = f118, SIZE
  10719. FMPY f119 = f119, f21
  10720. }
  10721. ;;
  10722. { .mfi
  10723. STFD [BOFFSET] = f94, 5 * SIZE
  10724. FMPY f95 = f95, f21
  10725. }
  10726. { .mfi
  10727. STFD [BOFFSET2] = f126, 5 * SIZE
  10728. FMPY f127 = f127, f21
  10729. }
  10730. ;;
  10731. { .mmi
  10732. STFD [BOFFSET] = f71, SIZE
  10733. STFD [BOFFSET2] = f103, SIZE
  10734. }
  10735. ;;
  10736. { .mmi
  10737. STFD [BOFFSET] = f79, SIZE
  10738. STFD [BOFFSET2] = f111, SIZE
  10739. }
  10740. ;;
  10741. { .mmi
  10742. STFD [BOFFSET] = f87, SIZE
  10743. STFD [BOFFSET2] = f119, SIZE
  10744. adds C9 = 4 * SIZE, C1
  10745. }
  10746. ;;
  10747. { .mfi
  10748. STFD [BOFFSET] = f95
  10749. adds BOFFSET = - 59 * SIZE, BOFFSET
  10750. }
  10751. { .mfi
  10752. STFD [BOFFSET2] = f127
  10753. adds BOFFSET2 = - 59 * SIZE, BOFFSET2
  10754. }
  10755. ;;
  10756. #endif
  10757. #ifdef RN
  10758. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  10759. ;;
  10760. { .mfi
  10761. LDFPD f34, f35 = [BOFFSET], 2 * SIZE
  10762. FMPY f64 = f64, f32
  10763. nop __LINE__
  10764. }
  10765. { .mfi
  10766. nop __LINE__
  10767. FMPY f68 = f68, f32
  10768. nop __LINE__
  10769. }
  10770. ;;
  10771. { .mfi
  10772. LDFPD f36, f37 = [BOFFSET], 2 * SIZE
  10773. FMPY f65 = f65, f32
  10774. nop __LINE__
  10775. }
  10776. { .mfi
  10777. nop __LINE__
  10778. FMPY f69 = f69, f32
  10779. nop __LINE__
  10780. }
  10781. ;;
  10782. { .mfi
  10783. LDFPD f38, f39 = [BOFFSET]
  10784. FMPY f66 = f66, f32
  10785. adds BOFFSET = 3 * SIZE, BOFFSET
  10786. }
  10787. { .mfi
  10788. nop __LINE__
  10789. FMPY f70 = f70, f32
  10790. nop __LINE__
  10791. }
  10792. ;;
  10793. { .mfi
  10794. LDFD f40 = [BOFFSET], 1 * SIZE
  10795. FMPY f67 = f67, f32
  10796. nop __LINE__
  10797. }
  10798. { .mfi
  10799. nop __LINE__
  10800. FMPY f71 = f71, f32
  10801. nop __LINE__
  10802. }
  10803. ;;
  10804. { .mfi
  10805. LDFPD f41, f42 = [BOFFSET], 2 * SIZE
  10806. FNMA f72 = f64, f33, f72
  10807. nop __LINE__
  10808. }
  10809. { .mfi
  10810. nop __LINE__
  10811. FNMA f76 = f68, f33, f76
  10812. nop __LINE__
  10813. }
  10814. ;;
  10815. { .mfi
  10816. LDFPD f43, f44 = [BOFFSET], 2 * SIZE
  10817. FNMA f73 = f65, f33, f73
  10818. nop __LINE__
  10819. }
  10820. { .mfi
  10821. nop __LINE__
  10822. FNMA f77 = f69, f33, f77
  10823. nop __LINE__
  10824. }
  10825. ;;
  10826. { .mfi
  10827. LDFPD f45, f46 = [BOFFSET]
  10828. FNMA f74 = f66, f33, f74
  10829. adds BOFFSET = 4 * SIZE, BOFFSET
  10830. }
  10831. { .mfi
  10832. nop __LINE__
  10833. FNMA f78 = f70, f33, f78
  10834. nop __LINE__
  10835. }
  10836. ;;
  10837. { .mfi
  10838. LDFPD f47, f48 = [BOFFSET], 2 * SIZE
  10839. FNMA f75 = f67, f33, f75
  10840. nop __LINE__
  10841. }
  10842. { .mfi
  10843. nop __LINE__
  10844. FNMA f79 = f71, f33, f79
  10845. nop __LINE__
  10846. }
  10847. ;;
  10848. { .mfi
  10849. LDFPD f49, f50 = [BOFFSET], 2 * SIZE
  10850. FNMA f80 = f64, f34, f80
  10851. nop __LINE__
  10852. }
  10853. { .mfi
  10854. nop __LINE__
  10855. FNMA f84 = f68, f34, f84
  10856. nop __LINE__
  10857. }
  10858. ;;
  10859. { .mfi
  10860. LDFPD f51, f52 = [BOFFSET]
  10861. FNMA f81 = f65, f34, f81
  10862. adds BOFFSET = 5 * SIZE, BOFFSET
  10863. }
  10864. { .mfi
  10865. nop __LINE__
  10866. FNMA f85 = f69, f34, f85
  10867. nop __LINE__
  10868. }
  10869. ;;
  10870. { .mfi
  10871. LDFD f53 = [BOFFSET], 1 * SIZE
  10872. FNMA f82 = f66, f34, f82
  10873. nop __LINE__
  10874. }
  10875. { .mfi
  10876. nop __LINE__
  10877. FNMA f86 = f70, f34, f86
  10878. nop __LINE__
  10879. }
  10880. ;;
  10881. { .mfi
  10882. LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  10883. FNMA f83 = f67, f34, f83
  10884. nop __LINE__
  10885. }
  10886. { .mfi
  10887. nop __LINE__
  10888. FNMA f87 = f71, f34, f87
  10889. nop __LINE__
  10890. }
  10891. ;;
  10892. { .mfi
  10893. LDFPD f56, f57 = [BOFFSET]
  10894. FNMA f88 = f64, f35, f88
  10895. adds BOFFSET = 6 * SIZE, BOFFSET
  10896. }
  10897. { .mfi
  10898. nop __LINE__
  10899. FNMA f92 = f68, f35, f92
  10900. nop __LINE__
  10901. }
  10902. ;;
  10903. { .mfi
  10904. LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  10905. FNMA f89 = f65, f35, f89
  10906. nop __LINE__
  10907. }
  10908. { .mfi
  10909. nop __LINE__
  10910. FNMA f93 = f69, f35, f93
  10911. nop __LINE__
  10912. }
  10913. ;;
  10914. { .mfi
  10915. LDFPD f60, f61 = [BOFFSET]
  10916. FNMA f90 = f66, f35, f90
  10917. adds BOFFSET = 7 * SIZE, BOFFSET
  10918. }
  10919. { .mfi
  10920. nop __LINE__
  10921. FNMA f94 = f70, f35, f94
  10922. nop __LINE__
  10923. }
  10924. ;;
  10925. { .mfi
  10926. LDFD f16 = [BOFFSET], 1 * SIZE
  10927. FNMA f91 = f67, f35, f91
  10928. nop __LINE__
  10929. }
  10930. { .mfi
  10931. nop __LINE__
  10932. FNMA f95 = f71, f35, f95
  10933. nop __LINE__
  10934. }
  10935. ;;
  10936. { .mfi
  10937. LDFPD f17, f18 = [BOFFSET]
  10938. FNMA f96 = f64, f36, f96
  10939. adds BOFFSET = 8 * SIZE, BOFFSET
  10940. }
  10941. { .mfi
  10942. nop __LINE__
  10943. FNMA f100 = f68, f36, f100
  10944. nop __LINE__
  10945. }
  10946. ;;
  10947. { .mfi
  10948. LDFPD f19, f20 = [BOFFSET]
  10949. FNMA f97 = f65, f36, f97
  10950. adds BOFFSET = 9 * SIZE, BOFFSET
  10951. }
  10952. { .mfi
  10953. nop __LINE__
  10954. FNMA f101 = f69, f36, f101
  10955. nop __LINE__
  10956. }
  10957. ;;
  10958. { .mfi
  10959. LDFD f21 = [BOFFSET]
  10960. FNMA f98 = f66, f36, f98
  10961. adds BOFFSET = -63 * SIZE, BOFFSET
  10962. }
  10963. { .mfi
  10964. nop __LINE__
  10965. FNMA f102 = f70, f36, f102
  10966. nop __LINE__
  10967. }
  10968. ;;
  10969. { .mfi
  10970. nop __LINE__
  10971. FNMA f99 = f67, f36, f99
  10972. nop __LINE__
  10973. }
  10974. { .mfi
  10975. nop __LINE__
  10976. FNMA f103 = f71, f36, f103
  10977. nop __LINE__
  10978. }
  10979. ;;
  10980. FNMA f104 = f64, f37, f104
  10981. FNMA f108 = f68, f37, f108
  10982. FNMA f105 = f65, f37, f105
  10983. FNMA f109 = f69, f37, f109
  10984. FNMA f106 = f66, f37, f106
  10985. FNMA f110 = f70, f37, f110
  10986. FNMA f107 = f67, f37, f107
  10987. FNMA f111 = f71, f37, f111
  10988. ;;
  10989. FNMA f112 = f64, f38, f112
  10990. FNMA f116 = f68, f38, f116
  10991. FNMA f113 = f65, f38, f113
  10992. FNMA f117 = f69, f38, f117
  10993. FNMA f114 = f66, f38, f114
  10994. FNMA f118 = f70, f38, f118
  10995. FNMA f115 = f67, f38, f115
  10996. FNMA f119 = f71, f38, f119
  10997. ;;
  10998. FNMA f120 = f64, f39, f120
  10999. FNMA f124 = f68, f39, f124
  11000. FNMA f121 = f65, f39, f121
  11001. FNMA f125 = f69, f39, f125
  11002. FNMA f122 = f66, f39, f122
  11003. FNMA f126 = f70, f39, f126
  11004. FNMA f123 = f67, f39, f123
  11005. FNMA f127 = f71, f39, f127
  11006. ;;
  11007. FMPY f72 = f72, f40
  11008. FMPY f76 = f76, f40
  11009. FMPY f73 = f73, f40
  11010. FMPY f77 = f77, f40
  11011. FMPY f74 = f74, f40
  11012. FMPY f78 = f78, f40
  11013. FMPY f75 = f75, f40
  11014. FMPY f79 = f79, f40
  11015. ;;
  11016. FNMA f80 = f72, f41, f80
  11017. FNMA f84 = f76, f41, f84
  11018. FNMA f81 = f73, f41, f81
  11019. FNMA f85 = f77, f41, f85
  11020. FNMA f82 = f74, f41, f82
  11021. FNMA f86 = f78, f41, f86
  11022. FNMA f83 = f75, f41, f83
  11023. FNMA f87 = f79, f41, f87
  11024. ;;
  11025. FNMA f88 = f72, f42, f88
  11026. FNMA f92 = f76, f42, f92
  11027. FNMA f89 = f73, f42, f89
  11028. FNMA f93 = f77, f42, f93
  11029. FNMA f90 = f74, f42, f90
  11030. FNMA f94 = f78, f42, f94
  11031. FNMA f91 = f75, f42, f91
  11032. FNMA f95 = f79, f42, f95
  11033. ;;
  11034. FNMA f96 = f72, f43, f96
  11035. FNMA f100 = f76, f43, f100
  11036. FNMA f97 = f73, f43, f97
  11037. FNMA f101 = f77, f43, f101
  11038. FNMA f98 = f74, f43, f98
  11039. FNMA f102 = f78, f43, f102
  11040. FNMA f99 = f75, f43, f99
  11041. FNMA f103 = f79, f43, f103
  11042. ;;
  11043. FNMA f104 = f72, f44, f104
  11044. FNMA f108 = f76, f44, f108
  11045. FNMA f105 = f73, f44, f105
  11046. FNMA f109 = f77, f44, f109
  11047. FNMA f106 = f74, f44, f106
  11048. FNMA f110 = f78, f44, f110
  11049. FNMA f107 = f75, f44, f107
  11050. FNMA f111 = f79, f44, f111
  11051. ;;
  11052. FNMA f112 = f72, f45, f112
  11053. FNMA f116 = f76, f45, f116
  11054. FNMA f113 = f73, f45, f113
  11055. FNMA f117 = f77, f45, f117
  11056. FNMA f114 = f74, f45, f114
  11057. FNMA f118 = f78, f45, f118
  11058. FNMA f115 = f75, f45, f115
  11059. FNMA f119 = f79, f45, f119
  11060. ;;
  11061. FNMA f120 = f72, f46, f120
  11062. FNMA f124 = f76, f46, f124
  11063. FNMA f121 = f73, f46, f121
  11064. FNMA f125 = f77, f46, f125
  11065. FNMA f122 = f74, f46, f122
  11066. FNMA f126 = f78, f46, f126
  11067. FNMA f123 = f75, f46, f123
  11068. FNMA f127 = f79, f46, f127
  11069. ;;
  11070. FMPY f80 = f80, f47
  11071. FMPY f84 = f84, f47
  11072. FMPY f81 = f81, f47
  11073. FMPY f85 = f85, f47
  11074. FMPY f82 = f82, f47
  11075. FMPY f86 = f86, f47
  11076. FMPY f83 = f83, f47
  11077. FMPY f87 = f87, f47
  11078. ;;
  11079. FNMA f88 = f80, f48, f88
  11080. FNMA f92 = f84, f48, f92
  11081. FNMA f89 = f81, f48, f89
  11082. FNMA f93 = f85, f48, f93
  11083. FNMA f90 = f82, f48, f90
  11084. FNMA f94 = f86, f48, f94
  11085. FNMA f91 = f83, f48, f91
  11086. FNMA f95 = f87, f48, f95
  11087. ;;
  11088. FNMA f96 = f80, f49, f96
  11089. FNMA f100 = f84, f49, f100
  11090. FNMA f97 = f81, f49, f97
  11091. FNMA f101 = f85, f49, f101
  11092. FNMA f98 = f82, f49, f98
  11093. FNMA f102 = f86, f49, f102
  11094. FNMA f99 = f83, f49, f99
  11095. FNMA f103 = f87, f49, f103
  11096. ;;
  11097. FNMA f104 = f80, f50, f104
  11098. FNMA f108 = f84, f50, f108
  11099. FNMA f105 = f81, f50, f105
  11100. FNMA f109 = f85, f50, f109
  11101. FNMA f106 = f82, f50, f106
  11102. FNMA f110 = f86, f50, f110
  11103. FNMA f107 = f83, f50, f107
  11104. FNMA f111 = f87, f50, f111
  11105. ;;
  11106. FNMA f112 = f80, f51, f112
  11107. FNMA f116 = f84, f51, f116
  11108. FNMA f113 = f81, f51, f113
  11109. FNMA f117 = f85, f51, f117
  11110. FNMA f114 = f82, f51, f114
  11111. FNMA f118 = f86, f51, f118
  11112. FNMA f115 = f83, f51, f115
  11113. FNMA f119 = f87, f51, f119
  11114. ;;
  11115. FNMA f120 = f80, f52, f120
  11116. FNMA f124 = f84, f52, f124
  11117. FNMA f121 = f81, f52, f121
  11118. FNMA f125 = f85, f52, f125
  11119. FNMA f122 = f82, f52, f122
  11120. FNMA f126 = f86, f52, f126
  11121. FNMA f123 = f83, f52, f123
  11122. FNMA f127 = f87, f52, f127
  11123. ;;
  11124. FMPY f88 = f88, f53
  11125. FMPY f92 = f92, f53
  11126. FMPY f89 = f89, f53
  11127. FMPY f93 = f93, f53
  11128. FMPY f90 = f90, f53
  11129. FMPY f94 = f94, f53
  11130. FMPY f91 = f91, f53
  11131. FMPY f95 = f95, f53
  11132. ;;
  11133. FNMA f96 = f88, f54, f96
  11134. FNMA f100 = f92, f54, f100
  11135. FNMA f97 = f89, f54, f97
  11136. FNMA f101 = f93, f54, f101
  11137. FNMA f98 = f90, f54, f98
  11138. FNMA f102 = f94, f54, f102
  11139. FNMA f99 = f91, f54, f99
  11140. FNMA f103 = f95, f54, f103
  11141. ;;
  11142. FNMA f104 = f88, f55, f104
  11143. FNMA f108 = f92, f55, f108
  11144. FNMA f105 = f89, f55, f105
  11145. FNMA f109 = f93, f55, f109
  11146. FNMA f106 = f90, f55, f106
  11147. FNMA f110 = f94, f55, f110
  11148. FNMA f107 = f91, f55, f107
  11149. FNMA f111 = f95, f55, f111
  11150. ;;
  11151. FNMA f112 = f88, f56, f112
  11152. FNMA f116 = f92, f56, f116
  11153. FNMA f113 = f89, f56, f113
  11154. FNMA f117 = f93, f56, f117
  11155. FNMA f114 = f90, f56, f114
  11156. FNMA f118 = f94, f56, f118
  11157. FNMA f115 = f91, f56, f115
  11158. FNMA f119 = f95, f56, f119
  11159. ;;
  11160. FNMA f120 = f88, f57, f120
  11161. FNMA f124 = f92, f57, f124
  11162. FNMA f121 = f89, f57, f121
  11163. FNMA f125 = f93, f57, f125
  11164. FNMA f122 = f90, f57, f122
  11165. FNMA f126 = f94, f57, f126
  11166. FNMA f123 = f91, f57, f123
  11167. FNMA f127 = f95, f57, f127
  11168. ;;
  11169. FMPY f96 = f96, f58
  11170. FMPY f100 = f100, f58
  11171. FMPY f97 = f97, f58
  11172. FMPY f101 = f101, f58
  11173. FMPY f98 = f98, f58
  11174. FMPY f102 = f102, f58
  11175. FMPY f99 = f99, f58
  11176. FMPY f103 = f103, f58
  11177. ;;
  11178. FNMA f104 = f96, f59, f104
  11179. FNMA f108 = f100, f59, f108
  11180. FNMA f105 = f97, f59, f105
  11181. FNMA f109 = f101, f59, f109
  11182. FNMA f106 = f98, f59, f106
  11183. FNMA f110 = f102, f59, f110
  11184. FNMA f107 = f99, f59, f107
  11185. FNMA f111 = f103, f59, f111
  11186. ;;
  11187. FNMA f112 = f96, f60, f112
  11188. FNMA f116 = f100, f60, f116
  11189. FNMA f113 = f97, f60, f113
  11190. FNMA f117 = f101, f60, f117
  11191. FNMA f114 = f98, f60, f114
  11192. FNMA f118 = f102, f60, f118
  11193. FNMA f115 = f99, f60, f115
  11194. FNMA f119 = f103, f60, f119
  11195. ;;
  11196. { .mfi
  11197. STFD [AOFFSET] = f64, SIZE
  11198. FNMA f120 = f96, f61, f120
  11199. }
  11200. { .mfi
  11201. STFD [AOFFSET2] = f68, SIZE
  11202. FNMA f124 = f100, f61, f124
  11203. }
  11204. ;;
  11205. { .mfi
  11206. STFD [AOFFSET] = f65, SIZE
  11207. FNMA f121 = f97, f61, f121
  11208. }
  11209. { .mfi
  11210. STFD [AOFFSET2] = f69, SIZE
  11211. FNMA f125 = f101, f61, f125
  11212. }
  11213. ;;
  11214. { .mfi
  11215. STFD [AOFFSET] = f66, SIZE
  11216. FNMA f122 = f98, f61, f122
  11217. }
  11218. { .mfi
  11219. STFD [AOFFSET2] = f70, SIZE
  11220. FNMA f126 = f102, f61, f126
  11221. }
  11222. ;;
  11223. { .mfi
  11224. STFD [AOFFSET] = f67, 5 * SIZE
  11225. FNMA f123 = f99, f61, f123
  11226. }
  11227. { .mfi
  11228. STFD [AOFFSET2] = f71, 5 * SIZE
  11229. FNMA f127 = f103, f61, f127
  11230. }
  11231. ;;
  11232. { .mfi
  11233. STFD [AOFFSET] = f72, SIZE
  11234. FMPY f104 = f104, f16
  11235. }
  11236. { .mfi
  11237. STFD [AOFFSET2] = f76, SIZE
  11238. FMPY f108 = f108, f16
  11239. }
  11240. ;;
  11241. { .mfi
  11242. STFD [AOFFSET] = f73, SIZE
  11243. FMPY f105 = f105, f16
  11244. }
  11245. { .mfi
  11246. STFD [AOFFSET2] = f77, SIZE
  11247. FMPY f109 = f109, f16
  11248. }
  11249. ;;
  11250. { .mfi
  11251. STFD [AOFFSET] = f74, SIZE
  11252. FMPY f106 = f106, f16
  11253. }
  11254. { .mfi
  11255. STFD [AOFFSET2] = f78, SIZE
  11256. FMPY f110 = f110, f16
  11257. }
  11258. ;;
  11259. { .mfi
  11260. STFD [AOFFSET] = f75, 5 * SIZE
  11261. FMPY f107 = f107, f16
  11262. }
  11263. { .mfi
  11264. STFD [AOFFSET2] = f79, 5 * SIZE
  11265. FMPY f111 = f111, f16
  11266. }
  11267. ;;
  11268. { .mfi
  11269. STFD [AOFFSET] = f80, SIZE
  11270. FNMA f112 = f104, f17, f112
  11271. }
  11272. { .mfi
  11273. STFD [AOFFSET2] = f84, SIZE
  11274. FNMA f116 = f108, f17, f116
  11275. }
  11276. ;;
  11277. { .mfi
  11278. STFD [AOFFSET] = f81, SIZE
  11279. FNMA f113 = f105, f17, f113
  11280. }
  11281. { .mfi
  11282. STFD [AOFFSET2] = f85, SIZE
  11283. FNMA f117 = f109, f17, f117
  11284. }
  11285. ;;
  11286. { .mfi
  11287. STFD [AOFFSET] = f82, SIZE
  11288. FNMA f114 = f106, f17, f114
  11289. }
  11290. { .mfi
  11291. STFD [AOFFSET2] = f86, SIZE
  11292. FNMA f118 = f110, f17, f118
  11293. }
  11294. ;;
  11295. { .mfi
  11296. STFD [AOFFSET] = f83, 5 * SIZE
  11297. FNMA f115 = f107, f17, f115
  11298. }
  11299. { .mfi
  11300. STFD [AOFFSET2] = f87, 5 * SIZE
  11301. FNMA f119 = f111, f17, f119
  11302. }
  11303. ;;
  11304. { .mfi
  11305. STFD [AOFFSET] = f88, SIZE
  11306. FNMA f120 = f104, f18, f120
  11307. }
  11308. { .mfi
  11309. STFD [AOFFSET2] = f92, SIZE
  11310. FNMA f124 = f108, f18, f124
  11311. }
  11312. ;;
  11313. { .mfi
  11314. STFD [AOFFSET] = f89, SIZE
  11315. FNMA f121 = f105, f18, f121
  11316. }
  11317. { .mfi
  11318. STFD [AOFFSET2] = f93, SIZE
  11319. FNMA f125 = f109, f18, f125
  11320. }
  11321. ;;
  11322. { .mfi
  11323. STFD [AOFFSET] = f90, SIZE
  11324. FNMA f122 = f106, f18, f122
  11325. }
  11326. { .mfi
  11327. STFD [AOFFSET2] = f94, SIZE
  11328. FNMA f126 = f110, f18, f126
  11329. }
  11330. ;;
  11331. { .mfi
  11332. STFD [AOFFSET] = f91, 5 * SIZE
  11333. FNMA f123 = f107, f18, f123
  11334. }
  11335. { .mfi
  11336. STFD [AOFFSET2] = f95, 5 * SIZE
  11337. FNMA f127 = f111, f18, f127
  11338. }
  11339. ;;
  11340. { .mfi
  11341. STFD [AOFFSET] = f96, SIZE
  11342. FMPY f112 = f112, f19
  11343. }
  11344. { .mfi
  11345. STFD [AOFFSET2] = f100, SIZE
  11346. FMPY f116 = f116, f19
  11347. }
  11348. ;;
  11349. { .mfi
  11350. STFD [AOFFSET] = f97, SIZE
  11351. FMPY f113 = f113, f19
  11352. }
  11353. { .mfi
  11354. STFD [AOFFSET2] = f101, SIZE
  11355. FMPY f117 = f117, f19
  11356. }
  11357. ;;
  11358. { .mfi
  11359. STFD [AOFFSET] = f98, SIZE
  11360. FMPY f114 = f114, f19
  11361. }
  11362. { .mfi
  11363. STFD [AOFFSET2] = f102, SIZE
  11364. FMPY f118 = f118, f19
  11365. }
  11366. ;;
  11367. { .mfi
  11368. STFD [AOFFSET] = f99, 5 * SIZE
  11369. FMPY f115 = f115, f19
  11370. }
  11371. { .mfi
  11372. STFD [AOFFSET2] = f103, 5 * SIZE
  11373. FMPY f119 = f119, f19
  11374. }
  11375. ;;
  11376. { .mfi
  11377. STFD [AOFFSET] = f104, SIZE
  11378. FNMA f120 = f112, f20, f120
  11379. }
  11380. { .mfi
  11381. STFD [AOFFSET2] = f108, SIZE
  11382. FNMA f124 = f116, f20, f124
  11383. }
  11384. ;;
  11385. { .mfi
  11386. STFD [AOFFSET] = f105, SIZE
  11387. FNMA f121 = f113, f20, f121
  11388. }
  11389. { .mfi
  11390. STFD [AOFFSET2] = f109, SIZE
  11391. FNMA f125 = f117, f20, f125
  11392. }
  11393. ;;
  11394. { .mfi
  11395. STFD [AOFFSET] = f106, SIZE
  11396. FNMA f122 = f114, f20, f122
  11397. }
  11398. { .mfi
  11399. STFD [AOFFSET2] = f110, SIZE
  11400. FNMA f126 = f118, f20, f126
  11401. }
  11402. ;;
  11403. { .mfi
  11404. STFD [AOFFSET] = f107, 5 * SIZE
  11405. FNMA f123 = f115, f20, f123
  11406. }
  11407. { .mfi
  11408. STFD [AOFFSET2] = f111, 5 * SIZE
  11409. FNMA f127 = f119, f20, f127
  11410. }
  11411. ;;
  11412. { .mfi
  11413. STFD [AOFFSET] = f112, SIZE
  11414. FMPY f120 = f120, f21
  11415. }
  11416. { .mfi
  11417. STFD [AOFFSET2] = f116, SIZE
  11418. FMPY f124 = f124, f21
  11419. }
  11420. ;;
  11421. { .mfi
  11422. STFD [AOFFSET] = f113, SIZE
  11423. FMPY f121 = f121, f21
  11424. }
  11425. { .mfi
  11426. STFD [AOFFSET2] = f117, SIZE
  11427. FMPY f125 = f125, f21
  11428. }
  11429. ;;
  11430. { .mfi
  11431. STFD [AOFFSET] = f114, SIZE
  11432. FMPY f122 = f122, f21
  11433. }
  11434. { .mfi
  11435. STFD [AOFFSET2] = f118, SIZE
  11436. FMPY f126 = f126, f21
  11437. }
  11438. ;;
  11439. { .mfi
  11440. STFD [AOFFSET] = f115, 5 * SIZE
  11441. FMPY f123 = f123, f21
  11442. }
  11443. { .mfi
  11444. STFD [AOFFSET2] = f119, 5 * SIZE
  11445. FMPY f127 = f127, f21
  11446. }
  11447. ;;
  11448. { .mmi
  11449. STFD [AOFFSET] = f120, SIZE
  11450. STFD [AOFFSET2] = f124, SIZE
  11451. }
  11452. ;;
  11453. { .mmi
  11454. STFD [AOFFSET] = f121, SIZE
  11455. STFD [AOFFSET2] = f125, SIZE
  11456. }
  11457. ;;
  11458. { .mmi
  11459. STFD [AOFFSET] = f122, SIZE
  11460. STFD [AOFFSET2] = f126, SIZE
  11461. adds C9 = 4 * SIZE, C1
  11462. }
  11463. ;;
  11464. { .mfi
  11465. STFD [AOFFSET] = f123
  11466. adds AOFFSET = - 59 * SIZE, AOFFSET
  11467. }
  11468. { .mfi
  11469. STFD [AOFFSET2] = f127
  11470. adds AOFFSET2 = - 59 * SIZE, AOFFSET2
  11471. }
  11472. ;;
  11473. #endif
  11474. #ifdef RT
  11475. adds BOFFSET = 62 * SIZE, BOFFSET
  11476. ;;
  11477. LDFPD f33, f32 = [BOFFSET]
  11478. adds BOFFSET = - 2 * SIZE, BOFFSET
  11479. ;;
  11480. { .mfi
  11481. LDFPD f35, f34 = [BOFFSET]
  11482. FMPY f120 = f120, f32
  11483. adds BOFFSET = - 2 * SIZE, BOFFSET
  11484. }
  11485. { .mfi
  11486. nop __LINE__
  11487. FMPY f124 = f124, f32
  11488. nop __LINE__
  11489. }
  11490. ;;
  11491. { .mfi
  11492. LDFPD f37, f36 = [BOFFSET]
  11493. FMPY f121 = f121, f32
  11494. adds BOFFSET = - 2 * SIZE, BOFFSET
  11495. }
  11496. { .mfi
  11497. nop __LINE__
  11498. FMPY f125 = f125, f32
  11499. nop __LINE__
  11500. }
  11501. ;;
  11502. { .mfi
  11503. LDFPD f39, f38 = [BOFFSET]
  11504. FMPY f122 = f122, f32
  11505. adds BOFFSET = - 2 * SIZE, BOFFSET
  11506. }
  11507. { .mfi
  11508. nop __LINE__
  11509. FMPY f126 = f126, f32
  11510. nop __LINE__
  11511. }
  11512. ;;
  11513. { .mfi
  11514. LDFD f40 = [BOFFSET], -2 * SIZE
  11515. FMPY f123 = f123, f32
  11516. nop __LINE__
  11517. }
  11518. { .mfi
  11519. nop __LINE__
  11520. FMPY f127 = f127, f32
  11521. nop __LINE__
  11522. }
  11523. ;;
  11524. { .mfi
  11525. LDFPD f42, f41 = [BOFFSET]
  11526. FNMA f112 = f120, f33, f112
  11527. adds BOFFSET = - 2 * SIZE, BOFFSET
  11528. }
  11529. { .mfi
  11530. nop __LINE__
  11531. FNMA f116 = f124, f33, f116
  11532. nop __LINE__
  11533. }
  11534. ;;
  11535. { .mfi
  11536. LDFPD f44, f43 = [BOFFSET]
  11537. FNMA f113 = f121, f33, f113
  11538. adds BOFFSET = - 2 * SIZE, BOFFSET
  11539. }
  11540. { .mfi
  11541. nop __LINE__
  11542. FNMA f117 = f125, f33, f117
  11543. nop __LINE__
  11544. }
  11545. ;;
  11546. { .mfi
  11547. LDFPD f46, f45 = [BOFFSET]
  11548. FNMA f114 = f122, f33, f114
  11549. adds BOFFSET = - 4 * SIZE, BOFFSET
  11550. }
  11551. { .mfi
  11552. nop __LINE__
  11553. FNMA f118 = f126, f33, f118
  11554. nop __LINE__
  11555. }
  11556. ;;
  11557. { .mfi
  11558. LDFPD f48, f47 = [BOFFSET]
  11559. FNMA f115 = f123, f33, f115
  11560. adds BOFFSET = - 2 * SIZE, BOFFSET
  11561. }
  11562. { .mfi
  11563. nop __LINE__
  11564. FNMA f119 = f127, f33, f119
  11565. nop __LINE__
  11566. }
  11567. ;;
  11568. { .mfi
  11569. LDFPD f50, f49 = [BOFFSET]
  11570. FNMA f104 = f120, f34, f104
  11571. adds BOFFSET = - 2 * SIZE, BOFFSET
  11572. }
  11573. { .mfi
  11574. nop __LINE__
  11575. FNMA f108 = f124, f34, f108
  11576. nop __LINE__
  11577. }
  11578. ;;
  11579. { .mfi
  11580. LDFPD f52, f51 = [BOFFSET]
  11581. FNMA f105 = f121, f34, f105
  11582. adds BOFFSET = - 4 * SIZE, BOFFSET
  11583. }
  11584. { .mfi
  11585. nop __LINE__
  11586. FNMA f109 = f125, f34, f109
  11587. nop __LINE__
  11588. }
  11589. ;;
  11590. { .mfi
  11591. LDFD f53 = [BOFFSET], -2 * SIZE
  11592. FNMA f106 = f122, f34, f106
  11593. }
  11594. { .mfi
  11595. nop __LINE__
  11596. FNMA f110 = f126, f34, f110
  11597. nop __LINE__
  11598. }
  11599. ;;
  11600. { .mfi
  11601. LDFPD f55, f54 = [BOFFSET]
  11602. FNMA f107 = f123, f34, f107
  11603. adds BOFFSET = - 2 * SIZE, BOFFSET
  11604. }
  11605. { .mfi
  11606. nop __LINE__
  11607. FNMA f111 = f127, f34, f111
  11608. nop __LINE__
  11609. }
  11610. ;;
  11611. { .mfi
  11612. LDFPD f57, f56 = [BOFFSET]
  11613. FNMA f96 = f120, f35, f96
  11614. adds BOFFSET = - 6 * SIZE, BOFFSET
  11615. }
  11616. { .mfi
  11617. nop __LINE__
  11618. FNMA f100 = f124, f35, f100
  11619. nop __LINE__
  11620. }
  11621. ;;
  11622. { .mfi
  11623. LDFPD f59, f58 = [BOFFSET]
  11624. FNMA f97 = f121, f35, f97
  11625. adds BOFFSET = - 2 * SIZE, BOFFSET
  11626. }
  11627. { .mfi
  11628. nop __LINE__
  11629. FNMA f101 = f125, f35, f101
  11630. nop __LINE__
  11631. }
  11632. ;;
  11633. { .mfi
  11634. LDFPD f61, f60 = [BOFFSET]
  11635. FNMA f98 = f122, f35, f98
  11636. adds BOFFSET = - 6 * SIZE, BOFFSET
  11637. }
  11638. { .mfi
  11639. nop __LINE__
  11640. FNMA f102 = f126, f35, f102
  11641. nop __LINE__
  11642. }
  11643. ;;
  11644. { .mfi
  11645. LDFD f16 = [BOFFSET], -2 * SIZE
  11646. FNMA f99 = f123, f35, f99
  11647. nop __LINE__
  11648. }
  11649. { .mfi
  11650. nop __LINE__
  11651. FNMA f103 = f127, f35, f103
  11652. nop __LINE__
  11653. }
  11654. ;;
  11655. { .mfi
  11656. LDFPD f18, f17 = [BOFFSET]
  11657. FNMA f88 = f120, f36, f88
  11658. adds BOFFSET = - 8 * SIZE, BOFFSET
  11659. }
  11660. { .mfi
  11661. nop __LINE__
  11662. FNMA f92 = f124, f36, f92
  11663. nop __LINE__
  11664. }
  11665. ;;
  11666. { .mfi
  11667. LDFPD f20, f19 = [BOFFSET]
  11668. FNMA f89 = f121, f36, f89
  11669. adds BOFFSET = - 8 * SIZE, BOFFSET
  11670. }
  11671. { .mfi
  11672. nop __LINE__
  11673. FNMA f93 = f125, f36, f93
  11674. nop __LINE__
  11675. }
  11676. ;;
  11677. { .mfi
  11678. LDFD f21 = [BOFFSET]
  11679. FNMA f90 = f122, f36, f90
  11680. nop __LINE__
  11681. }
  11682. { .mfi
  11683. nop __LINE__
  11684. FNMA f94 = f126, f36, f94
  11685. nop __LINE__
  11686. }
  11687. ;;
  11688. { .mfi
  11689. nop __LINE__
  11690. FNMA f91 = f123, f36, f91
  11691. adds AOFFSET = 56 * SIZE, AOFFSET
  11692. }
  11693. { .mfi
  11694. nop __LINE__
  11695. FNMA f95 = f127, f36, f95
  11696. adds AOFFSET2 = 56 * SIZE, AOFFSET2
  11697. }
  11698. ;;
  11699. FNMA f80 = f120, f37, f80
  11700. FNMA f84 = f124, f37, f84
  11701. FNMA f81 = f121, f37, f81
  11702. FNMA f85 = f125, f37, f85
  11703. FNMA f82 = f122, f37, f82
  11704. FNMA f86 = f126, f37, f86
  11705. FNMA f83 = f123, f37, f83
  11706. FNMA f87 = f127, f37, f87
  11707. ;;
  11708. FNMA f72 = f120, f38, f72
  11709. FNMA f76 = f124, f38, f76
  11710. FNMA f73 = f121, f38, f73
  11711. FNMA f77 = f125, f38, f77
  11712. FNMA f74 = f122, f38, f74
  11713. FNMA f78 = f126, f38, f78
  11714. FNMA f75 = f123, f38, f75
  11715. FNMA f79 = f127, f38, f79
  11716. ;;
  11717. FNMA f64 = f120, f39, f64
  11718. FNMA f68 = f124, f39, f68
  11719. FNMA f65 = f121, f39, f65
  11720. FNMA f69 = f125, f39, f69
  11721. FNMA f66 = f122, f39, f66
  11722. FNMA f70 = f126, f39, f70
  11723. FNMA f67 = f123, f39, f67
  11724. FNMA f71 = f127, f39, f71
  11725. ;;
  11726. FMPY f112 = f112, f40
  11727. FMPY f116 = f116, f40
  11728. FMPY f113 = f113, f40
  11729. FMPY f117 = f117, f40
  11730. FMPY f114 = f114, f40
  11731. FMPY f118 = f118, f40
  11732. FMPY f115 = f115, f40
  11733. FMPY f119 = f119, f40
  11734. ;;
  11735. FNMA f104 = f112, f41, f104
  11736. FNMA f108 = f116, f41, f108
  11737. FNMA f105 = f113, f41, f105
  11738. FNMA f109 = f117, f41, f109
  11739. FNMA f106 = f114, f41, f106
  11740. FNMA f110 = f118, f41, f110
  11741. FNMA f107 = f115, f41, f107
  11742. FNMA f111 = f119, f41, f111
  11743. ;;
  11744. FNMA f96 = f112, f42, f96
  11745. FNMA f100 = f116, f42, f100
  11746. FNMA f97 = f113, f42, f97
  11747. FNMA f101 = f117, f42, f101
  11748. FNMA f98 = f114, f42, f98
  11749. FNMA f102 = f118, f42, f102
  11750. FNMA f99 = f115, f42, f99
  11751. FNMA f103 = f119, f42, f103
  11752. ;;
  11753. FNMA f88 = f112, f43, f88
  11754. FNMA f92 = f116, f43, f92
  11755. FNMA f89 = f113, f43, f89
  11756. FNMA f93 = f117, f43, f93
  11757. FNMA f90 = f114, f43, f90
  11758. FNMA f94 = f118, f43, f94
  11759. FNMA f91 = f115, f43, f91
  11760. FNMA f95 = f119, f43, f95
  11761. ;;
  11762. FNMA f80 = f112, f44, f80
  11763. FNMA f84 = f116, f44, f84
  11764. FNMA f81 = f113, f44, f81
  11765. FNMA f85 = f117, f44, f85
  11766. FNMA f82 = f114, f44, f82
  11767. FNMA f86 = f118, f44, f86
  11768. FNMA f83 = f115, f44, f83
  11769. FNMA f87 = f119, f44, f87
  11770. ;;
  11771. FNMA f72 = f112, f45, f72
  11772. FNMA f76 = f116, f45, f76
  11773. FNMA f73 = f113, f45, f73
  11774. FNMA f77 = f117, f45, f77
  11775. FNMA f74 = f114, f45, f74
  11776. FNMA f78 = f118, f45, f78
  11777. FNMA f75 = f115, f45, f75
  11778. FNMA f79 = f119, f45, f79
  11779. ;;
  11780. FNMA f64 = f112, f46, f64
  11781. FNMA f68 = f116, f46, f68
  11782. FNMA f65 = f113, f46, f65
  11783. FNMA f69 = f117, f46, f69
  11784. FNMA f66 = f114, f46, f66
  11785. FNMA f70 = f118, f46, f70
  11786. FNMA f67 = f115, f46, f67
  11787. FNMA f71 = f119, f46, f71
  11788. ;;
  11789. FMPY f104 = f104, f47
  11790. FMPY f108 = f108, f47
  11791. FMPY f105 = f105, f47
  11792. FMPY f109 = f109, f47
  11793. FMPY f106 = f106, f47
  11794. FMPY f110 = f110, f47
  11795. FMPY f107 = f107, f47
  11796. FMPY f111 = f111, f47
  11797. ;;
  11798. FNMA f96 = f104, f48, f96
  11799. FNMA f100 = f108, f48, f100
  11800. FNMA f97 = f105, f48, f97
  11801. FNMA f101 = f109, f48, f101
  11802. FNMA f98 = f106, f48, f98
  11803. FNMA f102 = f110, f48, f102
  11804. FNMA f99 = f107, f48, f99
  11805. FNMA f103 = f111, f48, f103
  11806. ;;
  11807. FNMA f88 = f104, f49, f88
  11808. FNMA f92 = f108, f49, f92
  11809. FNMA f89 = f105, f49, f89
  11810. FNMA f93 = f109, f49, f93
  11811. FNMA f90 = f106, f49, f90
  11812. FNMA f94 = f110, f49, f94
  11813. FNMA f91 = f107, f49, f91
  11814. FNMA f95 = f111, f49, f95
  11815. ;;
  11816. FNMA f80 = f104, f50, f80
  11817. FNMA f84 = f108, f50, f84
  11818. FNMA f81 = f105, f50, f81
  11819. FNMA f85 = f109, f50, f85
  11820. FNMA f82 = f106, f50, f82
  11821. FNMA f86 = f110, f50, f86
  11822. FNMA f83 = f107, f50, f83
  11823. FNMA f87 = f111, f50, f87
  11824. ;;
  11825. FNMA f72 = f104, f51, f72
  11826. FNMA f76 = f108, f51, f76
  11827. FNMA f73 = f105, f51, f73
  11828. FNMA f77 = f109, f51, f77
  11829. FNMA f74 = f106, f51, f74
  11830. FNMA f78 = f110, f51, f78
  11831. FNMA f75 = f107, f51, f75
  11832. FNMA f79 = f111, f51, f79
  11833. ;;
  11834. FNMA f64 = f104, f52, f64
  11835. FNMA f68 = f108, f52, f68
  11836. FNMA f65 = f105, f52, f65
  11837. FNMA f69 = f109, f52, f69
  11838. FNMA f66 = f106, f52, f66
  11839. FNMA f70 = f110, f52, f70
  11840. FNMA f67 = f107, f52, f67
  11841. FNMA f71 = f111, f52, f71
  11842. ;;
  11843. FMPY f96 = f96, f53
  11844. FMPY f100 = f100, f53
  11845. FMPY f97 = f97, f53
  11846. FMPY f101 = f101, f53
  11847. FMPY f98 = f98, f53
  11848. FMPY f102 = f102, f53
  11849. FMPY f99 = f99, f53
  11850. FMPY f103 = f103, f53
  11851. ;;
  11852. FNMA f88 = f96, f54, f88
  11853. FNMA f92 = f100, f54, f92
  11854. FNMA f89 = f97, f54, f89
  11855. FNMA f93 = f101, f54, f93
  11856. FNMA f90 = f98, f54, f90
  11857. FNMA f94 = f102, f54, f94
  11858. FNMA f91 = f99, f54, f91
  11859. FNMA f95 = f103, f54, f95
  11860. ;;
  11861. FNMA f80 = f96, f55, f80
  11862. FNMA f84 = f100, f55, f84
  11863. FNMA f81 = f97, f55, f81
  11864. FNMA f85 = f101, f55, f85
  11865. FNMA f82 = f98, f55, f82
  11866. FNMA f86 = f102, f55, f86
  11867. FNMA f83 = f99, f55, f83
  11868. FNMA f87 = f103, f55, f87
  11869. ;;
  11870. FNMA f72 = f96, f56, f72
  11871. FNMA f76 = f100, f56, f76
  11872. FNMA f73 = f97, f56, f73
  11873. FNMA f77 = f101, f56, f77
  11874. FNMA f74 = f98, f56, f74
  11875. FNMA f78 = f102, f56, f78
  11876. FNMA f75 = f99, f56, f75
  11877. FNMA f79 = f103, f56, f79
  11878. ;;
  11879. FNMA f64 = f96, f57, f64
  11880. FNMA f68 = f100, f57, f68
  11881. FNMA f65 = f97, f57, f65
  11882. FNMA f69 = f101, f57, f69
  11883. FNMA f66 = f98, f57, f66
  11884. FNMA f70 = f102, f57, f70
  11885. FNMA f67 = f99, f57, f67
  11886. FNMA f71 = f103, f57, f71
  11887. ;;
  11888. FMPY f88 = f88, f58
  11889. FMPY f92 = f92, f58
  11890. FMPY f89 = f89, f58
  11891. FMPY f93 = f93, f58
  11892. FMPY f90 = f90, f58
  11893. FMPY f94 = f94, f58
  11894. FMPY f91 = f91, f58
  11895. FMPY f95 = f95, f58
  11896. ;;
  11897. FNMA f80 = f88, f59, f80
  11898. FNMA f84 = f92, f59, f84
  11899. FNMA f81 = f89, f59, f81
  11900. FNMA f85 = f93, f59, f85
  11901. FNMA f82 = f90, f59, f82
  11902. FNMA f86 = f94, f59, f86
  11903. FNMA f83 = f91, f59, f83
  11904. FNMA f87 = f95, f59, f87
  11905. ;;
  11906. FNMA f72 = f88, f60, f72
  11907. FNMA f76 = f92, f60, f76
  11908. FNMA f73 = f89, f60, f73
  11909. FNMA f77 = f93, f60, f77
  11910. FNMA f74 = f90, f60, f74
  11911. FNMA f78 = f94, f60, f78
  11912. FNMA f75 = f91, f60, f75
  11913. FNMA f79 = f95, f60, f79
  11914. ;;
  11915. { .mfi
  11916. STFD [AOFFSET] = f120, SIZE
  11917. FNMA f64 = f88, f61, f64
  11918. }
  11919. { .mfi
  11920. STFD [AOFFSET2] = f124, SIZE
  11921. FNMA f68 = f92, f61, f68
  11922. }
  11923. ;;
  11924. { .mfi
  11925. STFD [AOFFSET] = f121, SIZE
  11926. FNMA f65 = f89, f61, f65
  11927. }
  11928. { .mfi
  11929. STFD [AOFFSET2] = f125, SIZE
  11930. FNMA f69 = f93, f61, f69
  11931. }
  11932. ;;
  11933. { .mfi
  11934. STFD [AOFFSET] = f122, SIZE
  11935. FNMA f66 = f90, f61, f66
  11936. }
  11937. { .mfi
  11938. STFD [AOFFSET2] = f126, SIZE
  11939. FNMA f70 = f94, f61, f70
  11940. }
  11941. ;;
  11942. { .mfi
  11943. STFD [AOFFSET] = f123, - 11 * SIZE
  11944. FNMA f67 = f91, f61, f67
  11945. }
  11946. { .mfi
  11947. STFD [AOFFSET2] = f127, - 11 * SIZE
  11948. FNMA f71 = f95, f61, f71
  11949. }
  11950. ;;
  11951. { .mfi
  11952. STFD [AOFFSET] = f112, SIZE
  11953. FMPY f80 = f80, f16
  11954. }
  11955. { .mfi
  11956. STFD [AOFFSET2] = f116, SIZE
  11957. FMPY f84 = f84, f16
  11958. }
  11959. ;;
  11960. { .mfi
  11961. STFD [AOFFSET] = f113, SIZE
  11962. FMPY f81 = f81, f16
  11963. }
  11964. { .mfi
  11965. STFD [AOFFSET2] = f117, SIZE
  11966. FMPY f85 = f85, f16
  11967. }
  11968. ;;
  11969. { .mfi
  11970. STFD [AOFFSET] = f114, SIZE
  11971. FMPY f82 = f82, f16
  11972. }
  11973. { .mfi
  11974. STFD [AOFFSET2] = f118, SIZE
  11975. FMPY f86 = f86, f16
  11976. }
  11977. ;;
  11978. { .mfi
  11979. STFD [AOFFSET] = f115, - 11 * SIZE
  11980. FMPY f83 = f83, f16
  11981. }
  11982. { .mfi
  11983. STFD [AOFFSET2] = f119, - 11 * SIZE
  11984. FMPY f87 = f87, f16
  11985. }
  11986. ;;
  11987. { .mfi
  11988. STFD [AOFFSET] = f104, SIZE
  11989. FNMA f72 = f80, f17, f72
  11990. }
  11991. { .mfi
  11992. STFD [AOFFSET2] = f108, SIZE
  11993. FNMA f76 = f84, f17, f76
  11994. }
  11995. ;;
  11996. { .mfi
  11997. STFD [AOFFSET] = f105, SIZE
  11998. FNMA f73 = f81, f17, f73
  11999. }
  12000. { .mfi
  12001. STFD [AOFFSET2] = f109, SIZE
  12002. FNMA f77 = f85, f17, f77
  12003. }
  12004. ;;
  12005. { .mfi
  12006. STFD [AOFFSET] = f106, SIZE
  12007. FNMA f74 = f82, f17, f74
  12008. }
  12009. { .mfi
  12010. STFD [AOFFSET2] = f110, SIZE
  12011. FNMA f78 = f86, f17, f78
  12012. }
  12013. ;;
  12014. { .mfi
  12015. STFD [AOFFSET] = f107, - 11 * SIZE
  12016. FNMA f75 = f83, f17, f75
  12017. }
  12018. { .mfi
  12019. STFD [AOFFSET2] = f111, - 11 * SIZE
  12020. FNMA f79 = f87, f17, f79
  12021. }
  12022. ;;
  12023. { .mfi
  12024. STFD [AOFFSET] = f96, SIZE
  12025. FNMA f64 = f80, f18, f64
  12026. }
  12027. { .mfi
  12028. STFD [AOFFSET2] = f100, SIZE
  12029. FNMA f68 = f84, f18, f68
  12030. }
  12031. ;;
  12032. { .mfi
  12033. STFD [AOFFSET] = f97, SIZE
  12034. FNMA f65 = f81, f18, f65
  12035. }
  12036. { .mfi
  12037. STFD [AOFFSET2] = f101, SIZE
  12038. FNMA f69 = f85, f18, f69
  12039. }
  12040. ;;
  12041. { .mfi
  12042. STFD [AOFFSET] = f98, SIZE
  12043. FNMA f66 = f82, f18, f66
  12044. }
  12045. { .mfi
  12046. STFD [AOFFSET2] = f102, SIZE
  12047. FNMA f70 = f86, f18, f70
  12048. }
  12049. ;;
  12050. { .mfi
  12051. STFD [AOFFSET] = f99, - 11 * SIZE
  12052. FNMA f67 = f83, f18, f67
  12053. }
  12054. { .mfi
  12055. STFD [AOFFSET2] = f103, - 11 * SIZE
  12056. FNMA f71 = f87, f18, f71
  12057. }
  12058. ;;
  12059. { .mfi
  12060. STFD [AOFFSET] = f88, SIZE
  12061. FMPY f72 = f72, f19
  12062. }
  12063. { .mfi
  12064. STFD [AOFFSET2] = f92, SIZE
  12065. FMPY f76 = f76, f19
  12066. }
  12067. ;;
  12068. { .mfi
  12069. STFD [AOFFSET] = f89, SIZE
  12070. FMPY f73 = f73, f19
  12071. }
  12072. { .mfi
  12073. STFD [AOFFSET2] = f93, SIZE
  12074. FMPY f77 = f77, f19
  12075. }
  12076. ;;
  12077. { .mfi
  12078. STFD [AOFFSET] = f90, SIZE
  12079. FMPY f74 = f74, f19
  12080. }
  12081. { .mfi
  12082. STFD [AOFFSET2] = f94, SIZE
  12083. FMPY f78 = f78, f19
  12084. }
  12085. ;;
  12086. { .mfi
  12087. STFD [AOFFSET] = f91, - 11 * SIZE
  12088. FMPY f75 = f75, f19
  12089. }
  12090. { .mfi
  12091. STFD [AOFFSET2] = f95, - 11 * SIZE
  12092. FMPY f79 = f79, f19
  12093. }
  12094. ;;
  12095. { .mfi
  12096. STFD [AOFFSET] = f80, SIZE
  12097. FNMA f64 = f72, f20, f64
  12098. }
  12099. { .mfi
  12100. STFD [AOFFSET2] = f84, SIZE
  12101. FNMA f68 = f76, f20, f68
  12102. }
  12103. ;;
  12104. { .mfi
  12105. STFD [AOFFSET] = f81, SIZE
  12106. FNMA f65 = f73, f20, f65
  12107. }
  12108. { .mfi
  12109. STFD [AOFFSET2] = f85, SIZE
  12110. FNMA f69 = f77, f20, f69
  12111. }
  12112. ;;
  12113. { .mfi
  12114. STFD [AOFFSET] = f82, SIZE
  12115. FNMA f66 = f74, f20, f66
  12116. }
  12117. { .mfi
  12118. STFD [AOFFSET2] = f86, SIZE
  12119. FNMA f70 = f78, f20, f70
  12120. }
  12121. ;;
  12122. { .mfi
  12123. STFD [AOFFSET] = f83, - 11 * SIZE
  12124. FNMA f67 = f75, f20, f67
  12125. }
  12126. { .mfi
  12127. STFD [AOFFSET2] = f87, - 11 * SIZE
  12128. FNMA f71 = f79, f20, f71
  12129. }
  12130. ;;
  12131. { .mfi
  12132. STFD [AOFFSET] = f72, SIZE
  12133. FMPY f64 = f64, f21
  12134. }
  12135. { .mfi
  12136. STFD [AOFFSET2] = f76, SIZE
  12137. FMPY f68 = f68, f21
  12138. }
  12139. ;;
  12140. { .mfi
  12141. STFD [AOFFSET] = f73, SIZE
  12142. FMPY f65 = f65, f21
  12143. }
  12144. { .mfi
  12145. STFD [AOFFSET2] = f77, SIZE
  12146. FMPY f69 = f69, f21
  12147. }
  12148. ;;
  12149. { .mfi
  12150. STFD [AOFFSET] = f74, SIZE
  12151. FMPY f66 = f66, f21
  12152. }
  12153. { .mfi
  12154. STFD [AOFFSET2] = f78, SIZE
  12155. FMPY f70 = f70, f21
  12156. }
  12157. ;;
  12158. { .mfi
  12159. STFD [AOFFSET] = f75, - 11 * SIZE
  12160. FMPY f67 = f67, f21
  12161. }
  12162. { .mfi
  12163. STFD [AOFFSET2] = f79, - 11 * SIZE
  12164. FMPY f71 = f71, f21
  12165. }
  12166. ;;
  12167. { .mmi
  12168. STFD [AOFFSET] = f64, SIZE
  12169. STFD [AOFFSET2] = f68, SIZE
  12170. }
  12171. ;;
  12172. { .mmi
  12173. STFD [AOFFSET] = f65, SIZE
  12174. STFD [AOFFSET2] = f69, SIZE
  12175. }
  12176. ;;
  12177. { .mmi
  12178. STFD [AOFFSET] = f66, SIZE
  12179. STFD [AOFFSET2] = f70, SIZE
  12180. }
  12181. ;;
  12182. { .mmi
  12183. STFD [AOFFSET] = f67, - 3 * SIZE
  12184. STFD [AOFFSET2] = f71, - 3 * SIZE
  12185. adds C9 = 4 * SIZE, C1
  12186. }
  12187. ;;
  12188. #endif
  12189. { .mmf
  12190. STFD [C1 ] = f64, SIZE
  12191. STFD [C9 ] = f68, SIZE
  12192. mov f64 = f0
  12193. }
  12194. ;;
  12195. { .mmi
  12196. STFD [C1 ] = f65, SIZE
  12197. STFD [C9 ] = f69, SIZE
  12198. adds C10 = 4 * SIZE, C2
  12199. }
  12200. ;;
  12201. { .mmi
  12202. STFD [C1 ] = f66, SIZE
  12203. STFD [C9 ] = f70, SIZE
  12204. #ifdef LN
  12205. adds C3 = -8 * SIZE, C3
  12206. #else
  12207. nop __LINE__
  12208. #endif
  12209. }
  12210. ;;
  12211. { .mmi
  12212. #ifndef LN
  12213. STFD [C1 ] = f67, 5 * SIZE
  12214. #else
  12215. STFD [C1 ] = f67, - 3 * SIZE
  12216. #endif
  12217. STFD [C9 ] = f71
  12218. adds C11 = 4 * SIZE, C3
  12219. }
  12220. ;;
  12221. { .mmf
  12222. STFD [C2 ] = f72, SIZE
  12223. STFD [C10] = f76, SIZE
  12224. mov f72 = f0
  12225. }
  12226. ;;
  12227. { .mmi
  12228. STFD [C2 ] = f73, SIZE
  12229. STFD [C10] = f77, SIZE
  12230. #ifdef LN
  12231. adds C4 = -8 * SIZE, C4
  12232. #else
  12233. nop __LINE__
  12234. #endif
  12235. }
  12236. ;;
  12237. { .mmi
  12238. STFD [C2 ] = f74, SIZE
  12239. STFD [C10] = f78, SIZE
  12240. adds C12 = 4 * SIZE, C4
  12241. }
  12242. ;;
  12243. { .mmi
  12244. #ifndef LN
  12245. STFD [C2 ] = f75, 5 * SIZE
  12246. #else
  12247. STFD [C2 ] = f75, - 3 * SIZE
  12248. #endif
  12249. STFD [C10] = f79
  12250. #ifdef LN
  12251. adds C5 = -8 * SIZE, C5
  12252. #else
  12253. nop __LINE__
  12254. #endif
  12255. }
  12256. ;;
  12257. { .mmf
  12258. STFD [C3 ] = f80, SIZE
  12259. STFD [C11] = f84, SIZE
  12260. mov f80 = f0
  12261. }
  12262. ;;
  12263. { .mmi
  12264. STFD [C3 ] = f81, SIZE
  12265. STFD [C11] = f85, SIZE
  12266. adds C13 = 4 * SIZE, C5
  12267. }
  12268. ;;
  12269. { .mmi
  12270. STFD [C3 ] = f82, SIZE
  12271. STFD [C11] = f86, SIZE
  12272. #ifdef LN
  12273. adds C6 = -8 * SIZE, C6
  12274. #else
  12275. nop __LINE__
  12276. #endif
  12277. }
  12278. ;;
  12279. { .mmi
  12280. #ifndef LN
  12281. STFD [C3 ] = f83, 5 * SIZE
  12282. #else
  12283. STFD [C3 ] = f83, - 3 * SIZE
  12284. #endif
  12285. STFD [C11] = f87
  12286. adds C14 = 4 * SIZE, C6
  12287. }
  12288. ;;
  12289. { .mmf
  12290. STFD [C4 ] = f88, SIZE
  12291. STFD [C12] = f92, SIZE
  12292. mov f88 = f0
  12293. }
  12294. ;;
  12295. { .mmi
  12296. STFD [C4 ] = f89, SIZE
  12297. STFD [C12] = f93, SIZE
  12298. #ifdef LN
  12299. adds C8 = -8 * SIZE, C8
  12300. #else
  12301. nop __LINE__
  12302. #endif
  12303. }
  12304. ;;
  12305. { .mmi
  12306. STFD [C4 ] = f90, SIZE
  12307. STFD [C12] = f94, SIZE
  12308. adds C16 = 4 * SIZE, C8
  12309. }
  12310. ;;
  12311. { .mmi
  12312. #ifndef LN
  12313. STFD [C4 ] = f91, 5 * SIZE
  12314. #else
  12315. STFD [C4 ] = f91, - 3 * SIZE
  12316. #endif
  12317. STFD [C12] = f95
  12318. cmp.ne p6, p0 = 1, I
  12319. }
  12320. ;;
  12321. { .mmf
  12322. STFD [C5 ] = f96, SIZE
  12323. STFD [C13] = f100, SIZE
  12324. mov f96 = f0
  12325. }
  12326. ;;
  12327. { .mmi
  12328. STFD [C5 ] = f97, SIZE
  12329. STFD [C13] = f101, SIZE
  12330. adds I = -1, I
  12331. }
  12332. ;;
  12333. { .mmi
  12334. STFD [C5 ] = f98, SIZE
  12335. STFD [C13] = f102, SIZE
  12336. #ifdef LN
  12337. adds C7 = -8 * SIZE, C7
  12338. #else
  12339. nop __LINE__
  12340. #endif
  12341. }
  12342. ;;
  12343. { .mmi
  12344. #ifndef LN
  12345. STFD [C5 ] = f99, 5 * SIZE
  12346. #else
  12347. STFD [C5 ] = f99, - 3 * SIZE
  12348. #endif
  12349. STFD [C13] = f103
  12350. adds C15 = 4 * SIZE, C7
  12351. }
  12352. ;;
  12353. { .mmf
  12354. STFD [C6 ] = f104, SIZE
  12355. STFD [C14] = f108, SIZE
  12356. mov f104 = f0
  12357. }
  12358. ;;
  12359. { .mmi
  12360. STFD [C6 ] = f105, SIZE
  12361. STFD [C14] = f109, SIZE
  12362. shladd r2 = K, BASE_SHIFT, r0
  12363. }
  12364. ;;
  12365. { .mmi
  12366. STFD [C6 ] = f106, SIZE
  12367. STFD [C14] = f110, SIZE
  12368. sub L = K, KK
  12369. }
  12370. ;;
  12371. { .mmi
  12372. #ifndef LN
  12373. STFD [C6 ] = f107, 5 * SIZE
  12374. #else
  12375. STFD [C6 ] = f107, - 3 * SIZE
  12376. #endif
  12377. STFD [C14] = f111
  12378. #ifdef RT
  12379. shladd AORIG = r2, 3, AORIG
  12380. #else
  12381. nop __LINE__
  12382. #endif
  12383. }
  12384. ;;
  12385. { .mmf
  12386. STFD [C7 ] = f112, SIZE
  12387. STFD [C15] = f116, SIZE
  12388. mov f112 = f0
  12389. }
  12390. ;;
  12391. { .mmi
  12392. STFD [C7 ] = f113, SIZE
  12393. STFD [C15] = f117, SIZE
  12394. #if defined(LT) || defined(RN)
  12395. shladd L = L, BASE_SHIFT, r0
  12396. #else
  12397. nop __LINE__
  12398. #endif
  12399. }
  12400. ;;
  12401. { .mmi
  12402. STFD [C7 ] = f114, SIZE
  12403. STFD [C15] = f118, SIZE
  12404. #if defined(LT) || defined(RN)
  12405. shladd AOFFSET = L, 3, AOFFSET
  12406. #else
  12407. nop __LINE__
  12408. #endif
  12409. }
  12410. ;;
  12411. { .mmi
  12412. #ifndef LN
  12413. STFD [C7 ] = f115, 5 * SIZE
  12414. #else
  12415. STFD [C7 ] = f115, - 3 * SIZE
  12416. #endif
  12417. STFD [C15] = f119
  12418. #if defined(LT) || defined(RN)
  12419. shladd BOFFSET = L, 3, BOFFSET
  12420. #else
  12421. nop __LINE__
  12422. #endif
  12423. }
  12424. ;;
  12425. { .mmf
  12426. STFD [C8 ] = f120, SIZE
  12427. STFD [C16] = f124, SIZE
  12428. mov f120 = f0
  12429. }
  12430. ;;
  12431. { .mmi
  12432. STFD [C8 ] = f121, SIZE
  12433. STFD [C16] = f125, SIZE
  12434. #ifdef LT
  12435. adds KK = 8, KK
  12436. #elif defined LN
  12437. adds KK = -8, KK
  12438. #else
  12439. nop __LINE__
  12440. #endif
  12441. }
  12442. ;;
  12443. { .mmi
  12444. STFD [C8 ] = f122, SIZE
  12445. STFD [C16] = f126, SIZE
  12446. #if defined(LT) || defined(RN)
  12447. mov L = KK
  12448. #else
  12449. sub L = K, KK
  12450. #endif
  12451. }
  12452. ;;
  12453. { .mmb
  12454. #ifndef LN
  12455. STFD [C8 ] = f123, 5 * SIZE
  12456. #else
  12457. STFD [C8 ] = f123, - 3 * SIZE
  12458. #endif
  12459. STFD [C16] = f127
  12460. (p6) br.cond.dptk .L011
  12461. }
  12462. ;;
  12463. .L020:
  12464. { .mib
  12465. #if defined(LT) || defined(RN)
  12466. mov L = KK
  12467. #else
  12468. sub L = K, KK
  12469. #endif
  12470. tbit.z p6, p0 = M, 2
  12471. (p6) br.cond.dptk .L030
  12472. }
  12473. ;;
  12474. { .mmi
  12475. cmp.ne p7, p0 = r0, L
  12476. adds BOFFSET = 0 * SIZE, B
  12477. shl r2 = K, 2 + BASE_SHIFT
  12478. }
  12479. { .mmi
  12480. shladd r3 = KK, BASE_SHIFT, r0
  12481. nop __LINE__
  12482. nop __LINE__
  12483. }
  12484. ;;
  12485. #if defined(LT) || defined(RN)
  12486. { .mmf
  12487. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  12488. setf.d f73 = r0
  12489. mov f65 = f0
  12490. }
  12491. ;;
  12492. #else
  12493. { .mfi
  12494. shladd BOFFSET = r3, 3, B
  12495. mov f65 = f0
  12496. #ifdef LN
  12497. sub AORIG = AORIG, r2
  12498. #else
  12499. nop __LINE__
  12500. #endif
  12501. }
  12502. ;;
  12503. { .mfi
  12504. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  12505. mov f73 = f0
  12506. shladd AOFFSET = r3, 2, AORIG
  12507. }
  12508. ;;
  12509. #endif
  12510. { .mfi
  12511. setf.d f105 = r0
  12512. mov f81 = f0
  12513. adds L = 1, L
  12514. }
  12515. { .mfi
  12516. adds PREA = (PREFETCHSIZE + 8) * SIZE, AOFFSET
  12517. mov f89 = f0
  12518. cmp.eq p3, p0 = r0, r0
  12519. }
  12520. ;;
  12521. { .mfi
  12522. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  12523. mov f113 = f0
  12524. tbit.z p12, p0 = L, 0
  12525. }
  12526. { .mfi
  12527. setf.d f97 = r0
  12528. mov f121 = f0
  12529. shr L = L, 1
  12530. }
  12531. ;;
  12532. { .mmf
  12533. (p7) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  12534. setf.d f66 = r0
  12535. mov f67 = f0
  12536. }
  12537. { .mfi
  12538. setf.d f74 = r0
  12539. mov f75 = f0
  12540. adds L = -1, L
  12541. }
  12542. ;;
  12543. { .mmf
  12544. (p7) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  12545. setf.d f82 = r0
  12546. mov f83 = f0
  12547. }
  12548. { .mfi
  12549. setf.d f90 = r0
  12550. mov f91 = f0
  12551. cmp.eq p6, p0 = -1, L
  12552. }
  12553. ;;
  12554. { .mmf
  12555. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  12556. setf.d f98 = r0
  12557. mov f99 = f0
  12558. }
  12559. { .mfi
  12560. setf.d f106 = r0
  12561. mov f107 = f0
  12562. mov ar.lc = L
  12563. }
  12564. ;;
  12565. { .mmf
  12566. (p7) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  12567. setf.d f114 = r0
  12568. mov f115 = f0
  12569. }
  12570. { .mfb
  12571. setf.d f122 = r0
  12572. mov f123 = f0
  12573. (p6) br.cond.dpnt .L028
  12574. }
  12575. ;;
  12576. .L022:
  12577. { .mfi
  12578. lfetch.nt1 [PREA], 8 * SIZE
  12579. FMA f64 = f32, f48, f64 // A1 * B1
  12580. adds PREB = (PREFETCHSIZE + 0) * SIZE, BOFFSET
  12581. }
  12582. { .mfi
  12583. nop __LINE__
  12584. FMA f72 = f32, f49, f72 // A1 * B2
  12585. (p12) cmp.ne p3, p0 = 0, L
  12586. }
  12587. ;;
  12588. { .mfi
  12589. lfetch.nt1 [PREB], 16 * SIZE
  12590. FMA f80 = f32, f50, f80 // A1 * B3
  12591. cmp.ne p4, p5 = 0, L
  12592. }
  12593. { .mfb
  12594. nop __LINE__
  12595. FMA f88 = f32, f51, f88 // A1 * B4
  12596. nop __LINE__
  12597. }
  12598. ;;
  12599. { .mfi
  12600. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  12601. FMA f96 = f32, f52, f96 // A1 * B5
  12602. (p5) adds C9 = 2 * SIZE, C1
  12603. }
  12604. { .mfi
  12605. nop __LINE__
  12606. FMA f104 = f32, f53, f104 // A1 * B6
  12607. (p5) adds C10 = 2 * SIZE, C2
  12608. }
  12609. ;;
  12610. { .mfi
  12611. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  12612. FMA f112 = f32, f54, f112 // A1 * B7
  12613. (p5) adds C11 = 2 * SIZE, C3
  12614. }
  12615. { .mfi
  12616. nop __LINE__
  12617. FMA f120 = f32, f55, f120 // A1 * B8
  12618. (p5) adds C12 = 2 * SIZE, C4
  12619. }
  12620. ;;
  12621. { .mfi
  12622. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  12623. FMA f65 = f33, f48, f65 // A2 * B1
  12624. (p5) adds C13 = 2 * SIZE, C5
  12625. }
  12626. { .mfi
  12627. nop __LINE__
  12628. FMA f73 = f33, f49, f73 // A2 * B2
  12629. (p5) adds C14 = 2 * SIZE, C6
  12630. }
  12631. ;;
  12632. { .mfi
  12633. (p3) LDFPD f60, f61 = [BOFFSET], 2 * SIZE
  12634. FMA f81 = f33, f50, f81 // A2 * B3
  12635. (p5) adds C15 = 2 * SIZE, C7
  12636. }
  12637. { .mfi
  12638. nop __LINE__
  12639. FMA f89 = f33, f51, f89 // A2 * B4
  12640. (p5) adds C16 = 2 * SIZE, C8
  12641. }
  12642. ;;
  12643. { .mfb
  12644. (p3) LDFPD f62, f63 = [BOFFSET], 2 * SIZE
  12645. FMA f97 = f33, f52, f97 // A2 * B5
  12646. nop __LINE__
  12647. }
  12648. { .mfb
  12649. nop __LINE__
  12650. FMA f105 = f33, f53, f105 // A2 * B6
  12651. nop __LINE__
  12652. }
  12653. ;;
  12654. { .mfb
  12655. (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  12656. FMA f113 = f33, f54, f113 // A2 * B7
  12657. nop __LINE__
  12658. }
  12659. { .mfb
  12660. nop __LINE__
  12661. FMA f121 = f33, f55, f121 // A2 * B8
  12662. nop __LINE__
  12663. }
  12664. ;;
  12665. { .mfb
  12666. nop __LINE__
  12667. FMA f66 = f34, f48, f66 // A3 * B1
  12668. nop __LINE__
  12669. }
  12670. { .mfb
  12671. nop __LINE__
  12672. FMA f74 = f34, f49, f74 // A3 * B2
  12673. nop __LINE__
  12674. }
  12675. ;;
  12676. { .mfb
  12677. nop __LINE__
  12678. FMA f82 = f34, f50, f82 // A3 * B3
  12679. nop __LINE__
  12680. }
  12681. { .mfb
  12682. nop __LINE__
  12683. FMA f90 = f34, f51, f90 // A3 * B4
  12684. nop __LINE__
  12685. }
  12686. ;;
  12687. { .mfb
  12688. nop __LINE__
  12689. FMA f98 = f34, f52, f98 // A3 * B5
  12690. nop __LINE__
  12691. }
  12692. { .mfb
  12693. nop __LINE__
  12694. FMA f106 = f34, f53, f106 // A3 * B6
  12695. nop __LINE__
  12696. }
  12697. { .mfb
  12698. nop __LINE__
  12699. FMA f114 = f34, f54, f114 // A3 * B7
  12700. nop __LINE__
  12701. }
  12702. { .mfb
  12703. nop __LINE__
  12704. FMA f122 = f34, f55, f122 // A3 * B8
  12705. nop __LINE__
  12706. }
  12707. { .mfb
  12708. nop __LINE__
  12709. FMA f67 = f35, f48, f67 // A4 * B1
  12710. nop __LINE__
  12711. }
  12712. { .mfb
  12713. nop __LINE__
  12714. FMA f75 = f35, f49, f75 // A4 * B2
  12715. nop __LINE__
  12716. }
  12717. { .mfb
  12718. nop __LINE__
  12719. FMA f83 = f35, f50, f83 // A4 * B3
  12720. nop __LINE__
  12721. }
  12722. { .mfb
  12723. nop __LINE__
  12724. FMA f91 = f35, f51, f91 // A4 * B4
  12725. nop __LINE__
  12726. }
  12727. { .mfb
  12728. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  12729. FMA f99 = f35, f52, f99 // A4 * B5
  12730. nop __LINE__
  12731. }
  12732. { .mfb
  12733. nop __LINE__
  12734. FMA f107 = f35, f53, f107 // A4 * B6
  12735. nop __LINE__
  12736. }
  12737. { .mfb
  12738. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  12739. FMA f115 = f35, f54, f115 // A4 * B7
  12740. nop __LINE__
  12741. }
  12742. { .mfb
  12743. nop __LINE__
  12744. FMA f123 = f35, f55, f123 // A4 * B8
  12745. nop __LINE__
  12746. }
  12747. ;;
  12748. { .mfb
  12749. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  12750. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  12751. nop __LINE__
  12752. }
  12753. { .mfb
  12754. nop __LINE__
  12755. (p3) FMA f72 = f40, f57, f72 // A1 * B2
  12756. nop __LINE__
  12757. }
  12758. ;;
  12759. { .mfb
  12760. (p4) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  12761. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  12762. nop __LINE__
  12763. }
  12764. { .mfb
  12765. nop __LINE__
  12766. (p3) FMA f88 = f40, f59, f88 // A1 * B4
  12767. nop __LINE__
  12768. }
  12769. ;;
  12770. { .mfb
  12771. nop __LINE__
  12772. (p3) FMA f96 = f40, f60, f96 // A1 * B5
  12773. nop __LINE__
  12774. }
  12775. { .mfb
  12776. nop __LINE__
  12777. (p3) FMA f104 = f40, f61, f104 // A1 * B6
  12778. nop __LINE__
  12779. }
  12780. ;;
  12781. { .mfb
  12782. nop __LINE__
  12783. (p3) FMA f112 = f40, f62, f112 // A1 * B7
  12784. nop __LINE__
  12785. }
  12786. { .mfb
  12787. nop __LINE__
  12788. (p3) FMA f120 = f40, f63, f120 // A1 * B8
  12789. nop __LINE__
  12790. }
  12791. ;;
  12792. { .mfb
  12793. (p4) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  12794. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  12795. nop __LINE__
  12796. }
  12797. { .mfb
  12798. (p3) FMA f73 = f41, f57, f73 // A2 * B2
  12799. nop __LINE__
  12800. }
  12801. { .mfb
  12802. (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  12803. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  12804. nop __LINE__
  12805. }
  12806. { .mfb
  12807. (p3) FMA f89 = f41, f59, f89 // A2 * B4
  12808. nop __LINE__
  12809. }
  12810. ;;
  12811. { .mfb
  12812. nop __LINE__
  12813. (p3) FMA f97 = f41, f60, f97 // A2 * B5
  12814. nop __LINE__
  12815. }
  12816. { .mfb
  12817. nop __LINE__
  12818. (p3) FMA f105 = f41, f61, f105 // A2 * B6
  12819. nop __LINE__
  12820. }
  12821. ;;
  12822. { .mfb
  12823. nop __LINE__
  12824. (p3) FMA f113 = f41, f62, f113 // A2 * B7
  12825. nop __LINE__
  12826. }
  12827. { .mfb
  12828. nop __LINE__
  12829. (p3) FMA f121 = f41, f63, f121 // A2 * B8
  12830. nop __LINE__
  12831. }
  12832. ;;
  12833. { .mfb
  12834. nop __LINE__
  12835. (p3) FMA f66 = f42, f56, f66 // A3 * B1
  12836. nop __LINE__
  12837. }
  12838. { .mfb
  12839. nop __LINE__
  12840. (p3) FMA f74 = f42, f57, f74 // A3 * B2
  12841. nop __LINE__
  12842. }
  12843. ;;
  12844. { .mfb
  12845. nop __LINE__
  12846. (p3) FMA f82 = f42, f58, f82 // A3 * B3
  12847. nop __LINE__
  12848. }
  12849. { .mfb
  12850. nop __LINE__
  12851. (p3) FMA f90 = f42, f59, f90 // A3 * B4
  12852. nop __LINE__
  12853. }
  12854. ;;
  12855. { .mfb
  12856. nop __LINE__
  12857. (p3) FMA f98 = f42, f60, f98 // A3 * B5
  12858. nop __LINE__
  12859. }
  12860. { .mfb
  12861. nop __LINE__
  12862. (p3) FMA f106 = f42, f61, f106 // A3 * B6
  12863. nop __LINE__
  12864. }
  12865. ;;
  12866. { .mfb
  12867. nop __LINE__
  12868. (p3) FMA f114 = f42, f62, f114 // A3 * B7
  12869. nop __LINE__
  12870. }
  12871. { .mfb
  12872. nop __LINE__
  12873. (p3) FMA f122 = f42, f63, f122 // A3 * B8
  12874. nop __LINE__
  12875. }
  12876. ;;
  12877. { .mfb
  12878. nop __LINE__
  12879. (p3) FMA f67 = f43, f56, f67 // A4 * B1
  12880. nop __LINE__
  12881. }
  12882. { .mfb
  12883. nop __LINE__
  12884. (p3) FMA f75 = f43, f57, f75 // A4 * B2
  12885. nop __LINE__
  12886. }
  12887. ;;
  12888. { .mfb
  12889. nop __LINE__
  12890. (p3) FMA f83 = f43, f58, f83 // A4 * B3
  12891. nop __LINE__
  12892. }
  12893. { .mfb
  12894. nop __LINE__
  12895. (p3) FMA f91 = f43, f59, f91 // A4 * B4
  12896. nop __LINE__
  12897. }
  12898. ;;
  12899. { .mfb
  12900. nop __LINE__
  12901. (p3) FMA f99 = f43, f60, f99 // A4 * B5
  12902. nop __LINE__
  12903. }
  12904. { .mfb
  12905. nop __LINE__
  12906. (p3) FMA f107 = f43, f61, f107 // A4 * B6
  12907. nop __LINE__
  12908. }
  12909. ;;
  12910. { .mfi
  12911. nop __LINE__
  12912. (p3) FMA f115 = f43, f62, f115 // A4 * B7
  12913. adds L = -1, L
  12914. }
  12915. { .mfb
  12916. nop __LINE__
  12917. (p3) FMA f123 = f43, f63, f123 // A4 * B8
  12918. br.cloop.sptk.few .L022
  12919. }
  12920. ;;
  12921. .L028:
  12922. #if defined(LN) || defined(RT)
  12923. #ifdef LN
  12924. adds r2 = -4, KK
  12925. #else
  12926. adds r2 = -8, KK
  12927. #endif
  12928. ;;
  12929. shladd r2 = r2, BASE_SHIFT, r0
  12930. ;;
  12931. shladd AOFFSET = r2, 2, AORIG
  12932. shladd BOFFSET = r2, 3, B
  12933. ;;
  12934. #endif
  12935. adds AOFFSET2 = 4 * SIZE, AOFFSET
  12936. adds BOFFSET2 = 4 * SIZE, BOFFSET
  12937. ;;
  12938. #if defined(LN) || defined(LT)
  12939. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  12940. ;;
  12941. LDFPD f34, f35 = [BOFFSET], 2 * SIZE
  12942. ;;
  12943. LDFPD f36, f37 = [BOFFSET], 2 * SIZE
  12944. ;;
  12945. LDFPD f38, f39 = [BOFFSET], 2 * SIZE
  12946. ;;
  12947. LDFPD f40, f41 = [BOFFSET], 2 * SIZE
  12948. ;;
  12949. LDFPD f42, f43 = [BOFFSET], 2 * SIZE
  12950. ;;
  12951. LDFPD f44, f45 = [BOFFSET], 2 * SIZE
  12952. ;;
  12953. LDFPD f46, f47 = [BOFFSET], 2 * SIZE
  12954. ;;
  12955. { .mfi
  12956. LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  12957. FSUB f64 = f32, f64
  12958. nop __LINE__
  12959. }
  12960. { .mfi
  12961. nop __LINE__
  12962. FSUB f72 = f33, f72
  12963. nop __LINE__
  12964. }
  12965. ;;
  12966. { .mfi
  12967. LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  12968. FSUB f80 = f34, f80
  12969. nop __LINE__
  12970. }
  12971. { .mfi
  12972. nop __LINE__
  12973. FSUB f88 = f35, f88
  12974. nop __LINE__
  12975. }
  12976. ;;
  12977. { .mfi
  12978. LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  12979. FSUB f96 = f36, f96
  12980. nop __LINE__
  12981. }
  12982. { .mfi
  12983. nop __LINE__
  12984. FSUB f104 = f37, f104
  12985. nop __LINE__
  12986. }
  12987. ;;
  12988. { .mfi
  12989. LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  12990. FSUB f112 = f38, f112
  12991. nop __LINE__
  12992. }
  12993. { .mfi
  12994. nop __LINE__
  12995. FSUB f120 = f39, f120
  12996. nop __LINE__
  12997. }
  12998. ;;
  12999. { .mfi
  13000. LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  13001. FSUB f65 = f40, f65
  13002. nop __LINE__
  13003. }
  13004. { .mfi
  13005. nop __LINE__
  13006. FSUB f73 = f41, f73
  13007. nop __LINE__
  13008. }
  13009. ;;
  13010. { .mfi
  13011. LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  13012. FSUB f81 = f42, f81
  13013. nop __LINE__
  13014. }
  13015. { .mfi
  13016. nop __LINE__
  13017. FSUB f89 = f43, f89
  13018. nop __LINE__
  13019. }
  13020. ;;
  13021. { .mfi
  13022. LDFPD f60, f61 = [BOFFSET], 2 * SIZE
  13023. FSUB f97 = f44, f97
  13024. nop __LINE__
  13025. }
  13026. { .mfi
  13027. nop __LINE__
  13028. FSUB f105 = f45, f105
  13029. nop __LINE__
  13030. }
  13031. ;;
  13032. { .mfi
  13033. LDFPD f62, f63 = [BOFFSET]
  13034. FSUB f113 = f46, f113
  13035. adds BOFFSET = -30 * SIZE, BOFFSET
  13036. }
  13037. { .mfi
  13038. nop __LINE__
  13039. FSUB f121 = f47, f121
  13040. nop __LINE__
  13041. }
  13042. ;;
  13043. FSUB f66 = f48, f66
  13044. FSUB f74 = f49, f74
  13045. FSUB f82 = f50, f82
  13046. FSUB f90 = f51, f90
  13047. FSUB f98 = f52, f98
  13048. FSUB f106 = f53, f106
  13049. FSUB f114 = f54, f114
  13050. FSUB f122 = f55, f122
  13051. ;;
  13052. FSUB f67 = f56, f67
  13053. FSUB f75 = f57, f75
  13054. FSUB f83 = f58, f83
  13055. FSUB f91 = f59, f91
  13056. FSUB f99 = f60, f99
  13057. FSUB f107 = f61, f107
  13058. FSUB f115 = f62, f115
  13059. FSUB f123 = f63, f123
  13060. ;;
  13061. #else
  13062. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  13063. ;;
  13064. LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  13065. ;;
  13066. LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  13067. ;;
  13068. LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  13069. ;;
  13070. LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  13071. ;;
  13072. LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  13073. ;;
  13074. LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  13075. ;;
  13076. LDFPD f46, f47 = [AOFFSET], 2 * SIZE
  13077. ;;
  13078. LDFPD f48, f49 = [AOFFSET], 2 * SIZE
  13079. ;;
  13080. LDFPD f50, f51 = [AOFFSET], 2 * SIZE
  13081. ;;
  13082. LDFPD f52, f53 = [AOFFSET], 2 * SIZE
  13083. ;;
  13084. LDFPD f54, f55 = [AOFFSET], 2 * SIZE
  13085. ;;
  13086. LDFPD f56, f57 = [AOFFSET], 2 * SIZE
  13087. ;;
  13088. LDFPD f58, f59 = [AOFFSET], 2 * SIZE
  13089. ;;
  13090. LDFPD f60, f61 = [AOFFSET], 2 * SIZE
  13091. ;;
  13092. LDFPD f62, f63 = [AOFFSET]
  13093. adds AOFFSET = -30 * SIZE, AOFFSET
  13094. ;;
  13095. FSUB f64 = f32, f64
  13096. FSUB f65 = f33, f65
  13097. FSUB f66 = f34, f66
  13098. FSUB f67 = f35, f67
  13099. FSUB f72 = f36, f72
  13100. FSUB f73 = f37, f73
  13101. FSUB f74 = f38, f74
  13102. FSUB f75 = f39, f75
  13103. FSUB f80 = f40, f80
  13104. FSUB f81 = f41, f81
  13105. FSUB f82 = f42, f82
  13106. FSUB f83 = f43, f83
  13107. FSUB f88 = f44, f88
  13108. FSUB f89 = f45, f89
  13109. FSUB f90 = f46, f90
  13110. FSUB f91 = f47, f91
  13111. ;;
  13112. FSUB f96 = f48, f96
  13113. FSUB f97 = f49, f97
  13114. FSUB f98 = f50, f98
  13115. FSUB f99 = f51, f99
  13116. ;;
  13117. FSUB f104 = f52, f104
  13118. FSUB f105 = f53, f105
  13119. FSUB f106 = f54, f106
  13120. FSUB f107 = f55, f107
  13121. ;;
  13122. FSUB f112 = f56, f112
  13123. FSUB f113 = f57, f113
  13124. FSUB f114 = f58, f114
  13125. FSUB f115 = f59, f115
  13126. ;;
  13127. FSUB f120 = f60, f120
  13128. FSUB f121 = f61, f121
  13129. FSUB f122 = f62, f122
  13130. FSUB f123 = f63, f123
  13131. ;;
  13132. #endif
  13133. #ifdef LN
  13134. adds AOFFSET = 14 * SIZE, AOFFSET
  13135. ;;
  13136. LDFPD f33, f32 = [AOFFSET]
  13137. adds AOFFSET = - 2 * SIZE, AOFFSET
  13138. ;;
  13139. LDFPD f35, f34 = [AOFFSET]
  13140. adds AOFFSET = - 2 * SIZE, AOFFSET
  13141. ;;
  13142. LDFD f36 = [AOFFSET], - 2 * SIZE
  13143. ;;
  13144. LDFPD f38, f37 = [AOFFSET]
  13145. adds AOFFSET = - 4 * SIZE, AOFFSET
  13146. ;;
  13147. LDFPD f40, f39 = [AOFFSET]
  13148. adds AOFFSET = - 4 * SIZE, AOFFSET
  13149. ;;
  13150. LDFD f41 = [AOFFSET]
  13151. ;;
  13152. FMPY f67 = f67, f32
  13153. FMPY f99 = f99, f32
  13154. FMPY f75 = f75, f32
  13155. FMPY f107 = f107, f32
  13156. FMPY f83 = f83, f32
  13157. FMPY f115 = f115, f32
  13158. FMPY f91 = f91, f32
  13159. FMPY f123 = f123, f32
  13160. ;;
  13161. FNMA f66 = f67, f33, f66
  13162. FNMA f98 = f99, f33, f98
  13163. FNMA f74 = f75, f33, f74
  13164. FNMA f106 = f107, f33, f106
  13165. FNMA f82 = f83, f33, f82
  13166. FNMA f114 = f115, f33, f114
  13167. FNMA f90 = f91, f33, f90
  13168. FNMA f122 = f123, f33, f122
  13169. ;;
  13170. FNMA f65 = f67, f34, f65
  13171. FNMA f97 = f99, f34, f97
  13172. FNMA f73 = f75, f34, f73
  13173. FNMA f105 = f107, f34, f105
  13174. FNMA f81 = f83, f34, f81
  13175. FNMA f113 = f115, f34, f113
  13176. FNMA f89 = f91, f34, f89
  13177. FNMA f121 = f123, f34, f121
  13178. ;;
  13179. FNMA f64 = f67, f35, f64
  13180. FNMA f96 = f99, f35, f96
  13181. FNMA f72 = f75, f35, f72
  13182. FNMA f104 = f107, f35, f104
  13183. FNMA f80 = f83, f35, f80
  13184. FNMA f112 = f115, f35, f112
  13185. FNMA f88 = f91, f35, f88
  13186. FNMA f120 = f123, f35, f120
  13187. ;;
  13188. FMPY f66 = f66, f36
  13189. FMPY f98 = f98, f36
  13190. FMPY f74 = f74, f36
  13191. FMPY f106 = f106, f36
  13192. FMPY f82 = f82, f36
  13193. FMPY f114 = f114, f36
  13194. FMPY f90 = f90, f36
  13195. FMPY f122 = f122, f36
  13196. ;;
  13197. FNMA f65 = f66, f37, f65
  13198. FNMA f97 = f98, f37, f97
  13199. FNMA f73 = f74, f37, f73
  13200. FNMA f105 = f106, f37, f105
  13201. FNMA f81 = f82, f37, f81
  13202. FNMA f113 = f114, f37, f113
  13203. FNMA f89 = f90, f37, f89
  13204. FNMA f121 = f122, f37, f121
  13205. ;;
  13206. FNMA f64 = f66, f38, f64
  13207. FNMA f96 = f98, f38, f96
  13208. FNMA f72 = f74, f38, f72
  13209. FNMA f104 = f106, f38, f104
  13210. FNMA f80 = f82, f38, f80
  13211. FNMA f112 = f114, f38, f112
  13212. FNMA f88 = f90, f38, f88
  13213. FNMA f120 = f122, f38, f120
  13214. ;;
  13215. adds BOFFSET = 24 * SIZE, BOFFSET
  13216. adds BOFFSET2 = 24 * SIZE, BOFFSET2
  13217. ;;
  13218. { .mfi
  13219. STFD [BOFFSET] = f67, SIZE
  13220. FMPY f65 = f65, f39
  13221. }
  13222. { .mfi
  13223. STFD [BOFFSET2] = f99, SIZE
  13224. FMPY f97 = f97, f39
  13225. }
  13226. ;;
  13227. { .mfi
  13228. STFD [BOFFSET] = f75, SIZE
  13229. FMPY f73 = f73, f39
  13230. }
  13231. { .mfi
  13232. STFD [BOFFSET2] = f107, SIZE
  13233. FMPY f105 = f105, f39
  13234. }
  13235. ;;
  13236. { .mfi
  13237. STFD [BOFFSET] = f83, SIZE
  13238. FMPY f81 = f81, f39
  13239. }
  13240. { .mfi
  13241. STFD [BOFFSET2] = f115, SIZE
  13242. FMPY f113 = f113, f39
  13243. }
  13244. ;;
  13245. { .mfi
  13246. STFD [BOFFSET] = f91, - 11 * SIZE
  13247. FMPY f89 = f89, f39
  13248. }
  13249. { .mfi
  13250. STFD [BOFFSET2] = f123, - 11 * SIZE
  13251. FMPY f121 = f121, f39
  13252. }
  13253. ;;
  13254. { .mfi
  13255. STFD [BOFFSET] = f66, SIZE
  13256. FNMA f64 = f65, f40, f64
  13257. }
  13258. { .mfi
  13259. STFD [BOFFSET2] = f98, SIZE
  13260. FNMA f96 = f97, f40, f96
  13261. }
  13262. ;;
  13263. { .mfi
  13264. STFD [BOFFSET] = f74, SIZE
  13265. FNMA f72 = f73, f40, f72
  13266. }
  13267. { .mfi
  13268. STFD [BOFFSET2] = f106, SIZE
  13269. FNMA f104 = f105, f40, f104
  13270. }
  13271. ;;
  13272. { .mfi
  13273. STFD [BOFFSET] = f82, SIZE
  13274. FNMA f80 = f81, f40, f80
  13275. }
  13276. { .mfi
  13277. STFD [BOFFSET2] = f114, SIZE
  13278. FNMA f112 = f113, f40, f112
  13279. }
  13280. ;;
  13281. { .mfi
  13282. STFD [BOFFSET] = f90, -11 * SIZE
  13283. FNMA f88 = f89, f40, f88
  13284. }
  13285. { .mfi
  13286. STFD [BOFFSET2] = f122, -11 * SIZE
  13287. FNMA f120 = f121, f40, f120
  13288. }
  13289. ;;
  13290. { .mfi
  13291. STFD [BOFFSET] = f65, SIZE
  13292. FMPY f64 = f64, f41
  13293. }
  13294. { .mfi
  13295. STFD [BOFFSET2] = f97, SIZE
  13296. FMPY f96 = f96, f41
  13297. }
  13298. ;;
  13299. { .mfi
  13300. STFD [BOFFSET] = f73, SIZE
  13301. FMPY f72 = f72, f41
  13302. }
  13303. { .mfi
  13304. STFD [BOFFSET2] = f105, SIZE
  13305. FMPY f104 = f104, f41
  13306. }
  13307. ;;
  13308. { .mfi
  13309. STFD [BOFFSET] = f81, SIZE
  13310. FMPY f80 = f80, f41
  13311. }
  13312. { .mfi
  13313. STFD [BOFFSET2] = f113, SIZE
  13314. FMPY f112 = f112, f41
  13315. }
  13316. ;;
  13317. { .mfi
  13318. STFD [BOFFSET] = f89, - 11 * SIZE
  13319. FMPY f88 = f88, f41
  13320. }
  13321. { .mfi
  13322. STFD [BOFFSET2] = f121, - 11 * SIZE
  13323. FMPY f120 = f120, f41
  13324. }
  13325. ;;
  13326. { .mmi
  13327. STFD [BOFFSET] = f64, SIZE
  13328. STFD [BOFFSET2] = f96, SIZE
  13329. adds C1 = -4 * SIZE, C1
  13330. }
  13331. ;;
  13332. { .mmi
  13333. STFD [BOFFSET] = f72, SIZE
  13334. STFD [BOFFSET2] = f104, SIZE
  13335. adds C2 = -4 * SIZE, C2
  13336. }
  13337. ;;
  13338. { .mmi
  13339. STFD [BOFFSET] = f80, SIZE
  13340. STFD [BOFFSET2] = f112, SIZE
  13341. nop __LINE__
  13342. }
  13343. ;;
  13344. { .mmi
  13345. STFD [BOFFSET] = f88, - 3 * SIZE
  13346. STFD [BOFFSET2] = f120, - 3 * SIZE
  13347. }
  13348. ;;
  13349. #endif
  13350. #ifdef LT
  13351. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  13352. ;;
  13353. LDFPD f34, f35 = [AOFFSET]
  13354. adds AOFFSET = 3 * SIZE, AOFFSET
  13355. ;;
  13356. LDFD f36 = [AOFFSET], 1 * SIZE
  13357. ;;
  13358. LDFPD f37, f38 = [AOFFSET]
  13359. adds AOFFSET = 4 * SIZE, AOFFSET
  13360. ;;
  13361. LDFPD f39, f40 = [AOFFSET]
  13362. adds AOFFSET = 5 * SIZE, AOFFSET
  13363. ;;
  13364. LDFD f41 = [AOFFSET], -15 * SIZE
  13365. ;;
  13366. { .mfi
  13367. FMPY f64 = f64, f32
  13368. nop __LINE__
  13369. }
  13370. { .mfi
  13371. nop __LINE__
  13372. FMPY f96 = f96, f32
  13373. nop __LINE__
  13374. }
  13375. ;;
  13376. { .mfi
  13377. FMPY f72 = f72, f32
  13378. nop __LINE__
  13379. }
  13380. { .mfi
  13381. nop __LINE__
  13382. FMPY f104 = f104, f32
  13383. nop __LINE__
  13384. }
  13385. ;;
  13386. { .mfi
  13387. FMPY f80 = f80, f32
  13388. }
  13389. { .mfi
  13390. nop __LINE__
  13391. FMPY f112 = f112, f32
  13392. nop __LINE__
  13393. }
  13394. ;;
  13395. { .mfi
  13396. FMPY f88 = f88, f32
  13397. nop __LINE__
  13398. }
  13399. { .mfi
  13400. nop __LINE__
  13401. FMPY f120 = f120, f32
  13402. nop __LINE__
  13403. }
  13404. ;;
  13405. { .mfi
  13406. FNMA f65 = f64, f33, f65
  13407. nop __LINE__
  13408. }
  13409. { .mfi
  13410. nop __LINE__
  13411. FNMA f97 = f96, f33, f97
  13412. nop __LINE__
  13413. }
  13414. ;;
  13415. { .mfi
  13416. FNMA f73 = f72, f33, f73
  13417. nop __LINE__
  13418. }
  13419. { .mfi
  13420. nop __LINE__
  13421. FNMA f105 = f104, f33, f105
  13422. nop __LINE__
  13423. }
  13424. ;;
  13425. { .mfi
  13426. FNMA f81 = f80, f33, f81
  13427. }
  13428. { .mfi
  13429. nop __LINE__
  13430. FNMA f113 = f112, f33, f113
  13431. nop __LINE__
  13432. }
  13433. ;;
  13434. { .mfi
  13435. FNMA f89 = f88, f33, f89
  13436. nop __LINE__
  13437. }
  13438. { .mfi
  13439. nop __LINE__
  13440. FNMA f121 = f120, f33, f121
  13441. nop __LINE__
  13442. }
  13443. ;;
  13444. { .mfi
  13445. FNMA f66 = f64, f34, f66
  13446. nop __LINE__
  13447. }
  13448. { .mfi
  13449. nop __LINE__
  13450. FNMA f98 = f96, f34, f98
  13451. nop __LINE__
  13452. }
  13453. ;;
  13454. { .mfi
  13455. FNMA f74 = f72, f34, f74
  13456. }
  13457. { .mfi
  13458. nop __LINE__
  13459. FNMA f106 = f104, f34, f106
  13460. nop __LINE__
  13461. }
  13462. ;;
  13463. { .mfi
  13464. FNMA f82 = f80, f34, f82
  13465. nop __LINE__
  13466. }
  13467. { .mfi
  13468. nop __LINE__
  13469. FNMA f114 = f112, f34, f114
  13470. nop __LINE__
  13471. }
  13472. ;;
  13473. { .mfi
  13474. FNMA f90 = f88, f34, f90
  13475. nop __LINE__
  13476. }
  13477. { .mfi
  13478. nop __LINE__
  13479. FNMA f122 = f120, f34, f122
  13480. nop __LINE__
  13481. }
  13482. ;;
  13483. { .mfi
  13484. FNMA f67 = f64, f35, f67
  13485. }
  13486. { .mfi
  13487. nop __LINE__
  13488. FNMA f99 = f96, f35, f99
  13489. nop __LINE__
  13490. }
  13491. ;;
  13492. { .mfi
  13493. FNMA f75 = f72, f35, f75
  13494. nop __LINE__
  13495. }
  13496. { .mfi
  13497. nop __LINE__
  13498. FNMA f107 = f104, f35, f107
  13499. nop __LINE__
  13500. }
  13501. ;;
  13502. { .mfi
  13503. FNMA f83 = f80, f35, f83
  13504. }
  13505. { .mfi
  13506. nop __LINE__
  13507. FNMA f115 = f112, f35, f115
  13508. nop __LINE__
  13509. }
  13510. ;;
  13511. { .mfi
  13512. FNMA f91 = f88, f35, f91
  13513. nop __LINE__
  13514. }
  13515. { .mfi
  13516. nop __LINE__
  13517. FNMA f123 = f120, f35, f123
  13518. nop __LINE__
  13519. }
  13520. ;;
  13521. FMPY f65 = f65, f36
  13522. FMPY f97 = f97, f36
  13523. FMPY f73 = f73, f36
  13524. FMPY f105 = f105, f36
  13525. FMPY f81 = f81, f36
  13526. FMPY f113 = f113, f36
  13527. FMPY f89 = f89, f36
  13528. FMPY f121 = f121, f36
  13529. ;;
  13530. FNMA f66 = f65, f37, f66
  13531. FNMA f98 = f97, f37, f98
  13532. FNMA f74 = f73, f37, f74
  13533. FNMA f106 = f105, f37, f106
  13534. FNMA f82 = f81, f37, f82
  13535. FNMA f114 = f113, f37, f114
  13536. FNMA f90 = f89, f37, f90
  13537. FNMA f122 = f121, f37, f122
  13538. ;;
  13539. FNMA f67 = f65, f38, f67
  13540. FNMA f99 = f97, f38, f99
  13541. FNMA f75 = f73, f38, f75
  13542. FNMA f107 = f105, f38, f107
  13543. FNMA f83 = f81, f38, f83
  13544. FNMA f115 = f113, f38, f115
  13545. FNMA f91 = f89, f38, f91
  13546. FNMA f123 = f121, f38, f123
  13547. ;;
  13548. FMPY f66 = f66, f39
  13549. FMPY f98 = f98, f39
  13550. FMPY f74 = f74, f39
  13551. FMPY f106 = f106, f39
  13552. FMPY f82 = f82, f39
  13553. FMPY f114 = f114, f39
  13554. FMPY f90 = f90, f39
  13555. FMPY f122 = f122, f39
  13556. ;;
  13557. FNMA f67 = f66, f40, f67
  13558. FNMA f99 = f98, f40, f99
  13559. FNMA f75 = f74, f40, f75
  13560. FNMA f107 = f106, f40, f107
  13561. FNMA f83 = f82, f40, f83
  13562. FNMA f115 = f114, f40, f115
  13563. FNMA f91 = f90, f40, f91
  13564. FNMA f123 = f122, f40, f123
  13565. ;;
  13566. FMPY f67 = f67, f41
  13567. FMPY f99 = f99, f41
  13568. FMPY f75 = f75, f41
  13569. FMPY f107 = f107, f41
  13570. FMPY f83 = f83, f41
  13571. FMPY f115 = f115, f41
  13572. FMPY f91 = f91, f41
  13573. FMPY f123 = f123, f41
  13574. ;;
  13575. { .mfi
  13576. STFD [BOFFSET] = f64, SIZE
  13577. }
  13578. { .mfi
  13579. STFD [BOFFSET2] = f96, SIZE
  13580. }
  13581. ;;
  13582. { .mfi
  13583. STFD [BOFFSET] = f72, SIZE
  13584. }
  13585. { .mfi
  13586. STFD [BOFFSET2] = f104, SIZE
  13587. }
  13588. ;;
  13589. { .mfi
  13590. STFD [BOFFSET] = f80, SIZE
  13591. }
  13592. { .mfi
  13593. STFD [BOFFSET2] = f112, SIZE
  13594. }
  13595. ;;
  13596. { .mfi
  13597. STFD [BOFFSET] = f88, 5 * SIZE
  13598. }
  13599. { .mfi
  13600. STFD [BOFFSET2] = f120, 5 * SIZE
  13601. }
  13602. ;;
  13603. { .mfi
  13604. STFD [BOFFSET] = f65, SIZE
  13605. }
  13606. { .mfi
  13607. STFD [BOFFSET2] = f97, SIZE
  13608. }
  13609. ;;
  13610. { .mfi
  13611. STFD [BOFFSET] = f73, SIZE
  13612. }
  13613. { .mfi
  13614. STFD [BOFFSET2] = f105, SIZE
  13615. }
  13616. ;;
  13617. { .mfi
  13618. STFD [BOFFSET] = f81, SIZE
  13619. }
  13620. { .mfi
  13621. STFD [BOFFSET2] = f113, SIZE
  13622. }
  13623. ;;
  13624. { .mfi
  13625. STFD [BOFFSET] = f89, 5 * SIZE
  13626. }
  13627. { .mfi
  13628. STFD [BOFFSET2] = f121, 5 * SIZE
  13629. }
  13630. ;;
  13631. { .mfi
  13632. STFD [BOFFSET] = f66, SIZE
  13633. }
  13634. { .mfi
  13635. STFD [BOFFSET2] = f98, SIZE
  13636. }
  13637. ;;
  13638. { .mfi
  13639. STFD [BOFFSET] = f74, SIZE
  13640. }
  13641. { .mfi
  13642. STFD [BOFFSET2] = f106, SIZE
  13643. }
  13644. ;;
  13645. { .mfi
  13646. STFD [BOFFSET] = f82, SIZE
  13647. }
  13648. { .mfi
  13649. STFD [BOFFSET2] = f114, SIZE
  13650. }
  13651. ;;
  13652. { .mfi
  13653. STFD [BOFFSET] = f90, 5 * SIZE
  13654. }
  13655. { .mfi
  13656. STFD [BOFFSET2] = f122, 5 * SIZE
  13657. }
  13658. ;;
  13659. { .mfi
  13660. STFD [BOFFSET] = f67, SIZE
  13661. }
  13662. { .mfi
  13663. STFD [BOFFSET2] = f99, SIZE
  13664. }
  13665. ;;
  13666. { .mfi
  13667. STFD [BOFFSET] = f75, SIZE
  13668. }
  13669. { .mfi
  13670. STFD [BOFFSET2] = f107, SIZE
  13671. }
  13672. ;;
  13673. { .mfi
  13674. STFD [BOFFSET] = f83, SIZE
  13675. }
  13676. { .mfi
  13677. STFD [BOFFSET2] = f115, SIZE
  13678. }
  13679. ;;
  13680. { .mfi
  13681. STFD [BOFFSET] = f91, -27 * SIZE
  13682. }
  13683. { .mfi
  13684. STFD [BOFFSET2] = f123, -27 * SIZE
  13685. }
  13686. ;;
  13687. #endif
  13688. #ifdef RN
  13689. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  13690. ;;
  13691. LDFPD f34, f35 = [BOFFSET], 2 * SIZE
  13692. ;;
  13693. LDFPD f36, f37 = [BOFFSET], 2 * SIZE
  13694. ;;
  13695. LDFPD f38, f39 = [BOFFSET]
  13696. adds BOFFSET = 3 * SIZE, BOFFSET
  13697. ;;
  13698. LDFD f40 = [BOFFSET], 1 * SIZE
  13699. ;;
  13700. LDFPD f41, f42 = [BOFFSET], 2 * SIZE
  13701. ;;
  13702. LDFPD f43, f44 = [BOFFSET], 2 * SIZE
  13703. ;;
  13704. LDFPD f45, f46 = [BOFFSET]
  13705. adds BOFFSET = 4 * SIZE, BOFFSET
  13706. ;;
  13707. LDFPD f47, f48 = [BOFFSET], 2 * SIZE
  13708. ;;
  13709. LDFPD f49, f50 = [BOFFSET], 2 * SIZE
  13710. ;;
  13711. LDFPD f51, f52 = [BOFFSET]
  13712. adds BOFFSET = 5 * SIZE, BOFFSET
  13713. ;;
  13714. LDFD f53 = [BOFFSET], 1 * SIZE
  13715. ;;
  13716. LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  13717. ;;
  13718. LDFPD f56, f57 = [BOFFSET]
  13719. adds BOFFSET = 6 * SIZE, BOFFSET
  13720. ;;
  13721. LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  13722. ;;
  13723. LDFPD f60, f61 = [BOFFSET]
  13724. adds BOFFSET = 7 * SIZE, BOFFSET
  13725. ;;
  13726. LDFD f16 = [BOFFSET], 1 * SIZE
  13727. ;;
  13728. LDFPD f17, f18 = [BOFFSET]
  13729. adds BOFFSET = 8 * SIZE, BOFFSET
  13730. ;;
  13731. LDFPD f19, f20 = [BOFFSET]
  13732. adds BOFFSET = 9 * SIZE, BOFFSET
  13733. ;;
  13734. LDFD f21 = [BOFFSET]
  13735. adds BOFFSET = -63 * SIZE, BOFFSET
  13736. ;;
  13737. FMPY f64 = f64, f32
  13738. FMPY f65 = f65, f32
  13739. FMPY f66 = f66, f32
  13740. FMPY f67 = f67, f32
  13741. ;;
  13742. FNMA f72 = f64, f33, f72
  13743. FNMA f73 = f65, f33, f73
  13744. FNMA f74 = f66, f33, f74
  13745. FNMA f75 = f67, f33, f75
  13746. ;;
  13747. FNMA f80 = f64, f34, f80
  13748. FNMA f81 = f65, f34, f81
  13749. FNMA f82 = f66, f34, f82
  13750. FNMA f83 = f67, f34, f83
  13751. ;;
  13752. FNMA f88 = f64, f35, f88
  13753. FNMA f89 = f65, f35, f89
  13754. FNMA f90 = f66, f35, f90
  13755. FNMA f91 = f67, f35, f91
  13756. ;;
  13757. FNMA f96 = f64, f36, f96
  13758. FNMA f97 = f65, f36, f97
  13759. FNMA f98 = f66, f36, f98
  13760. FNMA f99 = f67, f36, f99
  13761. ;;
  13762. FNMA f104 = f64, f37, f104
  13763. FNMA f105 = f65, f37, f105
  13764. FNMA f106 = f66, f37, f106
  13765. FNMA f107 = f67, f37, f107
  13766. ;;
  13767. FNMA f112 = f64, f38, f112
  13768. FNMA f113 = f65, f38, f113
  13769. FNMA f114 = f66, f38, f114
  13770. FNMA f115 = f67, f38, f115
  13771. ;;
  13772. FNMA f120 = f64, f39, f120
  13773. FNMA f121 = f65, f39, f121
  13774. FNMA f122 = f66, f39, f122
  13775. FNMA f123 = f67, f39, f123
  13776. ;;
  13777. FMPY f72 = f72, f40
  13778. FMPY f73 = f73, f40
  13779. FMPY f74 = f74, f40
  13780. FMPY f75 = f75, f40
  13781. ;;
  13782. FNMA f80 = f72, f41, f80
  13783. FNMA f81 = f73, f41, f81
  13784. FNMA f82 = f74, f41, f82
  13785. FNMA f83 = f75, f41, f83
  13786. ;;
  13787. FNMA f88 = f72, f42, f88
  13788. FNMA f89 = f73, f42, f89
  13789. FNMA f90 = f74, f42, f90
  13790. FNMA f91 = f75, f42, f91
  13791. ;;
  13792. FNMA f96 = f72, f43, f96
  13793. FNMA f97 = f73, f43, f97
  13794. FNMA f98 = f74, f43, f98
  13795. FNMA f99 = f75, f43, f99
  13796. ;;
  13797. FNMA f104 = f72, f44, f104
  13798. FNMA f105 = f73, f44, f105
  13799. FNMA f106 = f74, f44, f106
  13800. FNMA f107 = f75, f44, f107
  13801. ;;
  13802. FNMA f112 = f72, f45, f112
  13803. FNMA f113 = f73, f45, f113
  13804. FNMA f114 = f74, f45, f114
  13805. FNMA f115 = f75, f45, f115
  13806. ;;
  13807. FNMA f120 = f72, f46, f120
  13808. FNMA f121 = f73, f46, f121
  13809. FNMA f122 = f74, f46, f122
  13810. FNMA f123 = f75, f46, f123
  13811. ;;
  13812. FMPY f80 = f80, f47
  13813. FMPY f81 = f81, f47
  13814. FMPY f82 = f82, f47
  13815. FMPY f83 = f83, f47
  13816. ;;
  13817. FNMA f88 = f80, f48, f88
  13818. FNMA f89 = f81, f48, f89
  13819. FNMA f90 = f82, f48, f90
  13820. FNMA f91 = f83, f48, f91
  13821. ;;
  13822. FNMA f96 = f80, f49, f96
  13823. FNMA f97 = f81, f49, f97
  13824. FNMA f98 = f82, f49, f98
  13825. FNMA f99 = f83, f49, f99
  13826. ;;
  13827. FNMA f104 = f80, f50, f104
  13828. FNMA f105 = f81, f50, f105
  13829. FNMA f106 = f82, f50, f106
  13830. FNMA f107 = f83, f50, f107
  13831. ;;
  13832. FNMA f112 = f80, f51, f112
  13833. FNMA f113 = f81, f51, f113
  13834. FNMA f114 = f82, f51, f114
  13835. FNMA f115 = f83, f51, f115
  13836. ;;
  13837. FNMA f120 = f80, f52, f120
  13838. FNMA f121 = f81, f52, f121
  13839. FNMA f122 = f82, f52, f122
  13840. FNMA f123 = f83, f52, f123
  13841. ;;
  13842. FMPY f88 = f88, f53
  13843. FMPY f89 = f89, f53
  13844. FMPY f90 = f90, f53
  13845. FMPY f91 = f91, f53
  13846. ;;
  13847. FNMA f96 = f88, f54, f96
  13848. FNMA f97 = f89, f54, f97
  13849. FNMA f98 = f90, f54, f98
  13850. FNMA f99 = f91, f54, f99
  13851. ;;
  13852. FNMA f104 = f88, f55, f104
  13853. FNMA f105 = f89, f55, f105
  13854. FNMA f106 = f90, f55, f106
  13855. FNMA f107 = f91, f55, f107
  13856. ;;
  13857. FNMA f112 = f88, f56, f112
  13858. FNMA f113 = f89, f56, f113
  13859. FNMA f114 = f90, f56, f114
  13860. FNMA f115 = f91, f56, f115
  13861. ;;
  13862. FNMA f120 = f88, f57, f120
  13863. FNMA f121 = f89, f57, f121
  13864. FNMA f122 = f90, f57, f122
  13865. FNMA f123 = f91, f57, f123
  13866. ;;
  13867. FMPY f96 = f96, f58
  13868. FMPY f97 = f97, f58
  13869. FMPY f98 = f98, f58
  13870. FMPY f99 = f99, f58
  13871. ;;
  13872. FNMA f104 = f96, f59, f104
  13873. FNMA f105 = f97, f59, f105
  13874. FNMA f106 = f98, f59, f106
  13875. FNMA f107 = f99, f59, f107
  13876. ;;
  13877. FNMA f112 = f96, f60, f112
  13878. FNMA f113 = f97, f60, f113
  13879. FNMA f114 = f98, f60, f114
  13880. FNMA f115 = f99, f60, f115
  13881. ;;
  13882. FNMA f120 = f96, f61, f120
  13883. FNMA f121 = f97, f61, f121
  13884. FNMA f122 = f98, f61, f122
  13885. FNMA f123 = f99, f61, f123
  13886. ;;
  13887. FMPY f104 = f104, f16
  13888. FMPY f105 = f105, f16
  13889. FMPY f106 = f106, f16
  13890. FMPY f107 = f107, f16
  13891. ;;
  13892. FNMA f112 = f104, f17, f112
  13893. FNMA f113 = f105, f17, f113
  13894. FNMA f114 = f106, f17, f114
  13895. FNMA f115 = f107, f17, f115
  13896. ;;
  13897. FNMA f120 = f104, f18, f120
  13898. FNMA f121 = f105, f18, f121
  13899. FNMA f122 = f106, f18, f122
  13900. FNMA f123 = f107, f18, f123
  13901. ;;
  13902. FMPY f112 = f112, f19
  13903. FMPY f113 = f113, f19
  13904. FMPY f114 = f114, f19
  13905. FMPY f115 = f115, f19
  13906. ;;
  13907. FNMA f120 = f112, f20, f120
  13908. FNMA f121 = f113, f20, f121
  13909. FNMA f122 = f114, f20, f122
  13910. FNMA f123 = f115, f20, f123
  13911. ;;
  13912. FMPY f120 = f120, f21
  13913. FMPY f121 = f121, f21
  13914. FMPY f122 = f122, f21
  13915. FMPY f123 = f123, f21
  13916. ;;
  13917. STFD [AOFFSET] = f64, SIZE
  13918. STFD [AOFFSET2] = f72, SIZE
  13919. ;;
  13920. STFD [AOFFSET] = f65, SIZE
  13921. STFD [AOFFSET2] = f73, SIZE
  13922. ;;
  13923. STFD [AOFFSET] = f66, SIZE
  13924. STFD [AOFFSET2] = f74, SIZE
  13925. ;;
  13926. STFD [AOFFSET] = f67, 5 * SIZE
  13927. STFD [AOFFSET2] = f75, 5 * SIZE
  13928. ;;
  13929. STFD [AOFFSET] = f80, SIZE
  13930. STFD [AOFFSET2] = f88, SIZE
  13931. ;;
  13932. STFD [AOFFSET] = f81, SIZE
  13933. STFD [AOFFSET2] = f89, SIZE
  13934. ;;
  13935. STFD [AOFFSET] = f82, SIZE
  13936. STFD [AOFFSET2] = f90, SIZE
  13937. ;;
  13938. STFD [AOFFSET] = f83, 5 * SIZE
  13939. STFD [AOFFSET2] = f91, 5 * SIZE
  13940. ;;
  13941. STFD [AOFFSET] = f96, SIZE
  13942. STFD [AOFFSET2] = f104, SIZE
  13943. ;;
  13944. STFD [AOFFSET] = f97, SIZE
  13945. STFD [AOFFSET2] = f105, SIZE
  13946. ;;
  13947. STFD [AOFFSET] = f98, SIZE
  13948. STFD [AOFFSET2] = f106, SIZE
  13949. ;;
  13950. STFD [AOFFSET] = f99, 5 * SIZE
  13951. STFD [AOFFSET2] = f107, 5 * SIZE
  13952. ;;
  13953. STFD [AOFFSET] = f112, SIZE
  13954. STFD [AOFFSET2] = f120, SIZE
  13955. ;;
  13956. STFD [AOFFSET] = f113, SIZE
  13957. STFD [AOFFSET2] = f121, SIZE
  13958. ;;
  13959. STFD [AOFFSET] = f114, SIZE
  13960. STFD [AOFFSET2] = f122, SIZE
  13961. ;;
  13962. STFD [AOFFSET] = f115, -27 * SIZE
  13963. STFD [AOFFSET2] = f123, - 27 * SIZE
  13964. ;;
  13965. #endif
  13966. #ifdef RT
  13967. adds BOFFSET = 62 * SIZE, BOFFSET
  13968. ;;
  13969. LDFPD f33, f32 = [BOFFSET]
  13970. adds BOFFSET = - 2 * SIZE, BOFFSET
  13971. ;;
  13972. LDFPD f35, f34 = [BOFFSET]
  13973. adds BOFFSET = - 2 * SIZE, BOFFSET
  13974. ;;
  13975. LDFPD f37, f36 = [BOFFSET]
  13976. adds BOFFSET = - 2 * SIZE, BOFFSET
  13977. ;;
  13978. LDFPD f39, f38 = [BOFFSET]
  13979. adds BOFFSET = - 2 * SIZE, BOFFSET
  13980. ;;
  13981. LDFD f40 = [BOFFSET], -2 * SIZE
  13982. ;;
  13983. LDFPD f42, f41 = [BOFFSET]
  13984. adds BOFFSET = - 2 * SIZE, BOFFSET
  13985. ;;
  13986. LDFPD f44, f43 = [BOFFSET]
  13987. adds BOFFSET = - 2 * SIZE, BOFFSET
  13988. ;;
  13989. LDFPD f46, f45 = [BOFFSET]
  13990. adds BOFFSET = - 4 * SIZE, BOFFSET
  13991. ;;
  13992. LDFPD f48, f47 = [BOFFSET]
  13993. adds BOFFSET = - 2 * SIZE, BOFFSET
  13994. ;;
  13995. LDFPD f50, f49 = [BOFFSET]
  13996. adds BOFFSET = - 2 * SIZE, BOFFSET
  13997. ;;
  13998. LDFPD f52, f51 = [BOFFSET]
  13999. adds BOFFSET = - 4 * SIZE, BOFFSET
  14000. ;;
  14001. LDFD f53 = [BOFFSET], -2 * SIZE
  14002. ;;
  14003. LDFPD f55, f54 = [BOFFSET]
  14004. adds BOFFSET = - 2 * SIZE, BOFFSET
  14005. ;;
  14006. LDFPD f57, f56 = [BOFFSET]
  14007. adds BOFFSET = - 6 * SIZE, BOFFSET
  14008. ;;
  14009. LDFPD f59, f58 = [BOFFSET]
  14010. adds BOFFSET = - 2 * SIZE, BOFFSET
  14011. ;;
  14012. LDFPD f61, f60 = [BOFFSET]
  14013. adds BOFFSET = - 6 * SIZE, BOFFSET
  14014. ;;
  14015. LDFD f16 = [BOFFSET], -2 * SIZE
  14016. ;;
  14017. LDFPD f18, f17 = [BOFFSET]
  14018. adds BOFFSET = - 8 * SIZE, BOFFSET
  14019. ;;
  14020. LDFPD f20, f19 = [BOFFSET]
  14021. adds BOFFSET = - 8 * SIZE, BOFFSET
  14022. ;;
  14023. LDFD f21 = [BOFFSET]
  14024. ;;
  14025. FMPY f120 = f120, f32
  14026. FMPY f121 = f121, f32
  14027. FMPY f122 = f122, f32
  14028. FMPY f123 = f123, f32
  14029. ;;
  14030. FNMA f112 = f120, f33, f112
  14031. FNMA f113 = f121, f33, f113
  14032. FNMA f114 = f122, f33, f114
  14033. FNMA f115 = f123, f33, f115
  14034. ;;
  14035. FNMA f104 = f120, f34, f104
  14036. FNMA f105 = f121, f34, f105
  14037. FNMA f106 = f122, f34, f106
  14038. FNMA f107 = f123, f34, f107
  14039. ;;
  14040. FNMA f96 = f120, f35, f96
  14041. FNMA f97 = f121, f35, f97
  14042. FNMA f98 = f122, f35, f98
  14043. FNMA f99 = f123, f35, f99
  14044. ;;
  14045. FNMA f88 = f120, f36, f88
  14046. FNMA f89 = f121, f36, f89
  14047. FNMA f90 = f122, f36, f90
  14048. FNMA f91 = f123, f36, f91
  14049. ;;
  14050. FNMA f80 = f120, f37, f80
  14051. FNMA f81 = f121, f37, f81
  14052. FNMA f82 = f122, f37, f82
  14053. FNMA f83 = f123, f37, f83
  14054. ;;
  14055. FNMA f72 = f120, f38, f72
  14056. FNMA f73 = f121, f38, f73
  14057. FNMA f74 = f122, f38, f74
  14058. FNMA f75 = f123, f38, f75
  14059. ;;
  14060. FNMA f64 = f120, f39, f64
  14061. FNMA f65 = f121, f39, f65
  14062. FNMA f66 = f122, f39, f66
  14063. FNMA f67 = f123, f39, f67
  14064. ;;
  14065. FMPY f112 = f112, f40
  14066. FMPY f113 = f113, f40
  14067. FMPY f114 = f114, f40
  14068. FMPY f115 = f115, f40
  14069. ;;
  14070. FNMA f104 = f112, f41, f104
  14071. FNMA f105 = f113, f41, f105
  14072. FNMA f106 = f114, f41, f106
  14073. FNMA f107 = f115, f41, f107
  14074. ;;
  14075. FNMA f96 = f112, f42, f96
  14076. FNMA f97 = f113, f42, f97
  14077. FNMA f98 = f114, f42, f98
  14078. FNMA f99 = f115, f42, f99
  14079. ;;
  14080. FNMA f88 = f112, f43, f88
  14081. FNMA f89 = f113, f43, f89
  14082. FNMA f90 = f114, f43, f90
  14083. FNMA f91 = f115, f43, f91
  14084. ;;
  14085. FNMA f80 = f112, f44, f80
  14086. FNMA f81 = f113, f44, f81
  14087. FNMA f82 = f114, f44, f82
  14088. FNMA f83 = f115, f44, f83
  14089. ;;
  14090. FNMA f72 = f112, f45, f72
  14091. FNMA f73 = f113, f45, f73
  14092. FNMA f74 = f114, f45, f74
  14093. FNMA f75 = f115, f45, f75
  14094. ;;
  14095. FNMA f64 = f112, f46, f64
  14096. FNMA f65 = f113, f46, f65
  14097. FNMA f66 = f114, f46, f66
  14098. FNMA f67 = f115, f46, f67
  14099. ;;
  14100. FMPY f104 = f104, f47
  14101. FMPY f105 = f105, f47
  14102. FMPY f106 = f106, f47
  14103. FMPY f107 = f107, f47
  14104. ;;
  14105. FNMA f96 = f104, f48, f96
  14106. FNMA f97 = f105, f48, f97
  14107. FNMA f98 = f106, f48, f98
  14108. FNMA f99 = f107, f48, f99
  14109. ;;
  14110. FNMA f88 = f104, f49, f88
  14111. FNMA f89 = f105, f49, f89
  14112. FNMA f90 = f106, f49, f90
  14113. FNMA f91 = f107, f49, f91
  14114. ;;
  14115. FNMA f80 = f104, f50, f80
  14116. FNMA f81 = f105, f50, f81
  14117. FNMA f82 = f106, f50, f82
  14118. FNMA f83 = f107, f50, f83
  14119. ;;
  14120. FNMA f72 = f104, f51, f72
  14121. FNMA f73 = f105, f51, f73
  14122. FNMA f74 = f106, f51, f74
  14123. FNMA f75 = f107, f51, f75
  14124. ;;
  14125. FNMA f64 = f104, f52, f64
  14126. FNMA f65 = f105, f52, f65
  14127. FNMA f66 = f106, f52, f66
  14128. FNMA f67 = f107, f52, f67
  14129. ;;
  14130. FMPY f96 = f96, f53
  14131. FMPY f97 = f97, f53
  14132. FMPY f98 = f98, f53
  14133. FMPY f99 = f99, f53
  14134. ;;
  14135. FNMA f88 = f96, f54, f88
  14136. FNMA f89 = f97, f54, f89
  14137. FNMA f90 = f98, f54, f90
  14138. FNMA f91 = f99, f54, f91
  14139. ;;
  14140. FNMA f80 = f96, f55, f80
  14141. FNMA f81 = f97, f55, f81
  14142. FNMA f82 = f98, f55, f82
  14143. FNMA f83 = f99, f55, f83
  14144. ;;
  14145. FNMA f72 = f96, f56, f72
  14146. FNMA f73 = f97, f56, f73
  14147. FNMA f74 = f98, f56, f74
  14148. FNMA f75 = f99, f56, f75
  14149. ;;
  14150. FNMA f64 = f96, f57, f64
  14151. FNMA f65 = f97, f57, f65
  14152. FNMA f66 = f98, f57, f66
  14153. FNMA f67 = f99, f57, f67
  14154. ;;
  14155. FMPY f88 = f88, f58
  14156. FMPY f89 = f89, f58
  14157. FMPY f90 = f90, f58
  14158. FMPY f91 = f91, f58
  14159. ;;
  14160. FNMA f80 = f88, f59, f80
  14161. FNMA f81 = f89, f59, f81
  14162. FNMA f82 = f90, f59, f82
  14163. FNMA f83 = f91, f59, f83
  14164. ;;
  14165. FNMA f72 = f88, f60, f72
  14166. FNMA f73 = f89, f60, f73
  14167. FNMA f74 = f90, f60, f74
  14168. FNMA f75 = f91, f60, f75
  14169. ;;
  14170. FNMA f64 = f88, f61, f64
  14171. FNMA f65 = f89, f61, f65
  14172. FNMA f66 = f90, f61, f66
  14173. FNMA f67 = f91, f61, f67
  14174. ;;
  14175. FMPY f80 = f80, f16
  14176. FMPY f81 = f81, f16
  14177. FMPY f82 = f82, f16
  14178. FMPY f83 = f83, f16
  14179. ;;
  14180. FNMA f72 = f80, f17, f72
  14181. FNMA f73 = f81, f17, f73
  14182. FNMA f74 = f82, f17, f74
  14183. FNMA f75 = f83, f17, f75
  14184. ;;
  14185. FNMA f64 = f80, f18, f64
  14186. FNMA f65 = f81, f18, f65
  14187. FNMA f66 = f82, f18, f66
  14188. FNMA f67 = f83, f18, f67
  14189. ;;
  14190. FMPY f72 = f72, f19
  14191. FMPY f73 = f73, f19
  14192. FMPY f74 = f74, f19
  14193. FMPY f75 = f75, f19
  14194. ;;
  14195. FNMA f64 = f72, f20, f64
  14196. FNMA f65 = f73, f20, f65
  14197. FNMA f66 = f74, f20, f66
  14198. FNMA f67 = f75, f20, f67
  14199. ;;
  14200. FMPY f64 = f64, f21
  14201. FMPY f65 = f65, f21
  14202. FMPY f66 = f66, f21
  14203. FMPY f67 = f67, f21
  14204. ;;
  14205. adds AOFFSET = 24 * SIZE, AOFFSET
  14206. adds AOFFSET2 = 24 * SIZE, AOFFSET2
  14207. ;;
  14208. STFD [AOFFSET] = f112, SIZE
  14209. STFD [AOFFSET2] = f120, SIZE
  14210. ;;
  14211. STFD [AOFFSET] = f113, SIZE
  14212. STFD [AOFFSET2] = f121, SIZE
  14213. ;;
  14214. STFD [AOFFSET] = f114, SIZE
  14215. STFD [AOFFSET2] = f122, SIZE
  14216. ;;
  14217. STFD [AOFFSET] = f115, - 11 * SIZE
  14218. STFD [AOFFSET2] = f123, - 11 * SIZE
  14219. ;;
  14220. STFD [AOFFSET] = f96, SIZE
  14221. STFD [AOFFSET2] = f104, SIZE
  14222. ;;
  14223. STFD [AOFFSET] = f97, SIZE
  14224. STFD [AOFFSET2] = f105, SIZE
  14225. ;;
  14226. STFD [AOFFSET] = f98, SIZE
  14227. STFD [AOFFSET2] = f106, SIZE
  14228. ;;
  14229. STFD [AOFFSET] = f99, - 11 * SIZE
  14230. STFD [AOFFSET2] = f107, - 11 * SIZE
  14231. ;;
  14232. STFD [AOFFSET] = f80, SIZE
  14233. STFD [AOFFSET2] = f88, SIZE
  14234. ;;
  14235. STFD [AOFFSET] = f81, SIZE
  14236. STFD [AOFFSET2] = f89, SIZE
  14237. ;;
  14238. STFD [AOFFSET] = f82, SIZE
  14239. STFD [AOFFSET2] = f90, SIZE
  14240. ;;
  14241. STFD [AOFFSET] = f83, - 11 * SIZE
  14242. STFD [AOFFSET2] = f91, - 11 * SIZE
  14243. ;;
  14244. STFD [AOFFSET] = f64, SIZE
  14245. STFD [AOFFSET2] = f72, SIZE
  14246. ;;
  14247. STFD [AOFFSET] = f65, SIZE
  14248. STFD [AOFFSET2] = f73, SIZE
  14249. ;;
  14250. STFD [AOFFSET] = f66, SIZE
  14251. STFD [AOFFSET2] = f74, SIZE
  14252. ;;
  14253. STFD [AOFFSET] = f67, - 3 * SIZE
  14254. STFD [AOFFSET2] = f75, - 3 * SIZE
  14255. ;;
  14256. #endif
  14257. { .mmf
  14258. STFD [C1 ] = f64, SIZE
  14259. mov f64 = f0
  14260. }
  14261. ;;
  14262. { .mmi
  14263. STFD [C1 ] = f65, SIZE
  14264. }
  14265. ;;
  14266. { .mmi
  14267. STFD [C1 ] = f66, SIZE
  14268. #ifdef LN
  14269. adds C3 = -4 * SIZE, C3
  14270. #else
  14271. nop __LINE__
  14272. #endif
  14273. }
  14274. ;;
  14275. { .mmi
  14276. #ifndef LN
  14277. STFD [C1 ] = f67, SIZE
  14278. #else
  14279. STFD [C1 ] = f67, - 3 * SIZE
  14280. #endif
  14281. }
  14282. ;;
  14283. { .mmf
  14284. STFD [C2 ] = f72, SIZE
  14285. mov f72 = f0
  14286. }
  14287. ;;
  14288. { .mmi
  14289. STFD [C2 ] = f73, SIZE
  14290. #ifdef LN
  14291. adds C4 = -4 * SIZE, C4
  14292. #else
  14293. nop __LINE__
  14294. #endif
  14295. }
  14296. ;;
  14297. { .mmi
  14298. STFD [C2 ] = f74, SIZE
  14299. }
  14300. ;;
  14301. { .mmi
  14302. #ifndef LN
  14303. STFD [C2 ] = f75, SIZE
  14304. #else
  14305. STFD [C2 ] = f75, - 3 * SIZE
  14306. #endif
  14307. #ifdef LN
  14308. adds C5 = -4 * SIZE, C5
  14309. #else
  14310. nop __LINE__
  14311. #endif
  14312. }
  14313. ;;
  14314. { .mmf
  14315. STFD [C3 ] = f80, SIZE
  14316. mov f80 = f0
  14317. }
  14318. ;;
  14319. { .mmi
  14320. STFD [C3 ] = f81, SIZE
  14321. }
  14322. ;;
  14323. { .mmi
  14324. STFD [C3 ] = f82, SIZE
  14325. #ifdef LN
  14326. adds C6 = -4 * SIZE, C6
  14327. #else
  14328. nop __LINE__
  14329. #endif
  14330. }
  14331. ;;
  14332. { .mmi
  14333. #ifndef LN
  14334. STFD [C3 ] = f83, SIZE
  14335. #else
  14336. STFD [C3 ] = f83, - 3 * SIZE
  14337. #endif
  14338. }
  14339. ;;
  14340. { .mmf
  14341. STFD [C4 ] = f88, SIZE
  14342. mov f88 = f0
  14343. }
  14344. ;;
  14345. { .mmi
  14346. STFD [C4 ] = f89, SIZE
  14347. #ifdef LN
  14348. adds C8 = -4 * SIZE, C8
  14349. #else
  14350. nop __LINE__
  14351. #endif
  14352. }
  14353. ;;
  14354. { .mmi
  14355. STFD [C4 ] = f90, SIZE
  14356. }
  14357. ;;
  14358. { .mmi
  14359. #ifndef LN
  14360. STFD [C4 ] = f91, SIZE
  14361. #else
  14362. STFD [C4 ] = f91, - 3 * SIZE
  14363. #endif
  14364. nop __LINE__
  14365. }
  14366. ;;
  14367. { .mmf
  14368. STFD [C5 ] = f96, SIZE
  14369. mov f96 = f0
  14370. }
  14371. ;;
  14372. { .mmi
  14373. STFD [C5 ] = f97, SIZE
  14374. nop __LINE__
  14375. }
  14376. ;;
  14377. { .mmi
  14378. STFD [C5 ] = f98, SIZE
  14379. #ifdef LN
  14380. adds C7 = -4 * SIZE, C7
  14381. #else
  14382. nop __LINE__
  14383. #endif
  14384. }
  14385. ;;
  14386. { .mmi
  14387. #ifndef LN
  14388. STFD [C5 ] = f99, SIZE
  14389. #else
  14390. STFD [C5 ] = f99, - 3 * SIZE
  14391. #endif
  14392. }
  14393. ;;
  14394. { .mmf
  14395. STFD [C6 ] = f104, SIZE
  14396. mov f104 = f0
  14397. }
  14398. ;;
  14399. { .mmi
  14400. STFD [C6 ] = f105, SIZE
  14401. shladd r2 = K, BASE_SHIFT, r0
  14402. }
  14403. ;;
  14404. { .mmi
  14405. STFD [C6 ] = f106, SIZE
  14406. sub L = K, KK
  14407. }
  14408. ;;
  14409. { .mmi
  14410. #ifndef LN
  14411. STFD [C6 ] = f107, SIZE
  14412. #else
  14413. STFD [C6 ] = f107, - 3 * SIZE
  14414. #endif
  14415. #ifdef RT
  14416. shladd AORIG = r2, 2, AORIG
  14417. #else
  14418. nop __LINE__
  14419. #endif
  14420. }
  14421. ;;
  14422. { .mmf
  14423. STFD [C7 ] = f112, SIZE
  14424. mov f112 = f0
  14425. }
  14426. ;;
  14427. { .mmi
  14428. STFD [C7 ] = f113, SIZE
  14429. #if defined(LT) || defined(RN)
  14430. shladd L = L, BASE_SHIFT, r0
  14431. #else
  14432. nop __LINE__
  14433. #endif
  14434. }
  14435. ;;
  14436. { .mmi
  14437. STFD [C7 ] = f114, SIZE
  14438. #if defined(LT) || defined(RN)
  14439. shladd AOFFSET = L, 2, AOFFSET
  14440. #else
  14441. nop __LINE__
  14442. #endif
  14443. }
  14444. ;;
  14445. { .mmi
  14446. #ifndef LN
  14447. STFD [C7 ] = f115, SIZE
  14448. #else
  14449. STFD [C7 ] = f115, - 3 * SIZE
  14450. #endif
  14451. #if defined(LT) || defined(RN)
  14452. shladd BOFFSET = L, 3, BOFFSET
  14453. #else
  14454. nop __LINE__
  14455. #endif
  14456. }
  14457. ;;
  14458. { .mmf
  14459. STFD [C8 ] = f120, SIZE
  14460. mov f120 = f0
  14461. }
  14462. ;;
  14463. { .mmi
  14464. STFD [C8 ] = f121, SIZE
  14465. #ifdef LT
  14466. adds KK = 4, KK
  14467. #elif defined LN
  14468. adds KK = -4, KK
  14469. #else
  14470. nop __LINE__
  14471. #endif
  14472. }
  14473. ;;
  14474. { .mmi
  14475. STFD [C8 ] = f122, SIZE
  14476. #if defined(LT) || defined(RN)
  14477. mov L = KK
  14478. #else
  14479. sub L = K, KK
  14480. #endif
  14481. }
  14482. ;;
  14483. { .mmb
  14484. #ifndef LN
  14485. STFD [C8 ] = f123, SIZE
  14486. #else
  14487. STFD [C8 ] = f123, - 3 * SIZE
  14488. #endif
  14489. }
  14490. ;;
  14491. .align 8
  14492. .L030:
  14493. { .mib
  14494. #if defined(LT) || defined(RN)
  14495. mov L = KK
  14496. #else
  14497. sub L = K, KK
  14498. #endif
  14499. tbit.z p6, p0 = M, 1
  14500. (p6) br.cond.dptk .L040
  14501. }
  14502. ;;
  14503. { .mmi
  14504. cmp.ne p7, p0 = r0, L
  14505. adds BOFFSET = 0 * SIZE, B
  14506. shl r2 = K, 1 + BASE_SHIFT
  14507. }
  14508. { .mmi
  14509. shladd r3 = KK, BASE_SHIFT, r0
  14510. nop __LINE__
  14511. nop __LINE__
  14512. }
  14513. ;;
  14514. #if defined(LT) || defined(RN)
  14515. { .mmf
  14516. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  14517. setf.d f73 = r0
  14518. mov f65 = f0
  14519. }
  14520. ;;
  14521. #else
  14522. { .mfi
  14523. shladd BOFFSET = r3, 3, B
  14524. mov f65 = f0
  14525. #ifdef LN
  14526. sub AORIG = AORIG, r2
  14527. #else
  14528. nop __LINE__
  14529. #endif
  14530. }
  14531. ;;
  14532. { .mfi
  14533. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  14534. mov f73 = f0
  14535. shladd AOFFSET = r3, 1, AORIG
  14536. }
  14537. ;;
  14538. #endif
  14539. { .mfi
  14540. setf.d f105 = r0
  14541. mov f81 = f0
  14542. adds L = 1, L
  14543. }
  14544. { .mfi
  14545. adds PREA = (PREFETCHSIZE + 8) * SIZE, AOFFSET
  14546. mov f89 = f0
  14547. cmp.eq p3, p0 = r0, r0
  14548. }
  14549. ;;
  14550. { .mfi
  14551. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  14552. mov f113 = f0
  14553. tbit.z p12, p0 = L, 0
  14554. }
  14555. { .mfi
  14556. setf.d f97 = r0
  14557. mov f121 = f0
  14558. shr L = L, 1
  14559. }
  14560. ;;
  14561. { .mmf
  14562. (p7) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  14563. adds L = -1, L
  14564. }
  14565. ;;
  14566. { .mmf
  14567. (p7) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  14568. cmp.eq p6, p0 = -1, L
  14569. }
  14570. ;;
  14571. { .mib
  14572. (p7) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  14573. mov ar.lc = L
  14574. (p6) br.cond.dpnt .L038
  14575. }
  14576. ;;
  14577. .L032:
  14578. { .mfb
  14579. lfetch.nt1 [PREA], 4 * SIZE
  14580. FMA f64 = f32, f48, f64 // A1 * B1
  14581. nop __LINE__
  14582. }
  14583. { .mfi
  14584. nop __LINE__
  14585. FMA f72 = f32, f49, f72 // A1 * B2
  14586. (p12) cmp.ne p3, p0 = 0, L
  14587. }
  14588. ;;
  14589. { .mfi
  14590. lfetch.nt1 [PREB], 16 * SIZE
  14591. FMA f80 = f32, f50, f80 // A1 * B3
  14592. cmp.ne p4, p5 = 0, L
  14593. }
  14594. { .mfb
  14595. nop __LINE__
  14596. FMA f88 = f32, f51, f88 // A1 * B4
  14597. nop __LINE__
  14598. }
  14599. ;;
  14600. { .mfb
  14601. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  14602. FMA f96 = f32, f52, f96 // A1 * B5
  14603. nop __LINE__
  14604. }
  14605. { .mfb
  14606. nop __LINE__
  14607. FMA f104 = f32, f53, f104 // A1 * B6
  14608. nop __LINE__
  14609. }
  14610. ;;
  14611. { .mfb
  14612. (p3) LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  14613. FMA f112 = f32, f54, f112 // A1 * B7
  14614. nop __LINE__
  14615. }
  14616. { .mfb
  14617. nop __LINE__
  14618. FMA f120 = f32, f55, f120 // A1 * B8
  14619. nop __LINE__
  14620. }
  14621. ;;
  14622. { .mfb
  14623. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  14624. FMA f65 = f33, f48, f65 // A2 * B1
  14625. nop __LINE__
  14626. }
  14627. { .mfb
  14628. nop __LINE__
  14629. FMA f73 = f33, f49, f73 // A2 * B2
  14630. nop __LINE__
  14631. }
  14632. ;;
  14633. { .mfb
  14634. (p3) LDFPD f60, f61 = [BOFFSET], 2 * SIZE
  14635. FMA f81 = f33, f50, f81 // A2 * B3
  14636. nop __LINE__
  14637. }
  14638. { .mfb
  14639. nop __LINE__
  14640. FMA f89 = f33, f51, f89 // A2 * B4
  14641. nop __LINE__
  14642. }
  14643. ;;
  14644. { .mfb
  14645. (p3) LDFPD f62, f63 = [BOFFSET], 2 * SIZE
  14646. FMA f97 = f33, f52, f97 // A2 * B5
  14647. nop __LINE__
  14648. }
  14649. { .mfb
  14650. nop __LINE__
  14651. FMA f105 = f33, f53, f105 // A2 * B6
  14652. nop __LINE__
  14653. }
  14654. ;;
  14655. { .mfb
  14656. nop __LINE__
  14657. FMA f113 = f33, f54, f113 // A2 * B7
  14658. nop __LINE__
  14659. }
  14660. { .mfb
  14661. nop __LINE__
  14662. FMA f121 = f33, f55, f121 // A2 * B8
  14663. nop __LINE__
  14664. }
  14665. ;;
  14666. { .mfb
  14667. (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  14668. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  14669. nop __LINE__
  14670. }
  14671. { .mfb
  14672. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  14673. (p3) FMA f72 = f40, f57, f72 // A1 * B2
  14674. nop __LINE__
  14675. }
  14676. ;;
  14677. { .mfb
  14678. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  14679. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  14680. nop __LINE__
  14681. }
  14682. { .mfb
  14683. nop __LINE__
  14684. (p3) FMA f88 = f40, f59, f88 // A1 * B4
  14685. nop __LINE__
  14686. }
  14687. ;;
  14688. { .mfb
  14689. (p4) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  14690. (p3) FMA f96 = f40, f60, f96 // A1 * B5
  14691. nop __LINE__
  14692. }
  14693. { .mfb
  14694. nop __LINE__
  14695. (p3) FMA f104 = f40, f61, f104 // A1 * B6
  14696. nop __LINE__
  14697. }
  14698. ;;
  14699. { .mfb
  14700. nop __LINE__
  14701. (p3) FMA f112 = f40, f62, f112 // A1 * B7
  14702. nop __LINE__
  14703. }
  14704. { .mfb
  14705. nop __LINE__
  14706. (p3) FMA f120 = f40, f63, f120 // A1 * B8
  14707. nop __LINE__
  14708. }
  14709. ;;
  14710. { .mfb
  14711. (p4) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  14712. (p3) FMA f65 = f41, f56, f65 // A2 * B1
  14713. nop __LINE__
  14714. }
  14715. { .mfb
  14716. (p3) FMA f73 = f41, f57, f73 // A2 * B2
  14717. nop __LINE__
  14718. }
  14719. { .mfb
  14720. nop __LINE__
  14721. (p3) FMA f81 = f41, f58, f81 // A2 * B3
  14722. nop __LINE__
  14723. }
  14724. { .mfb
  14725. nop __LINE__
  14726. (p3) FMA f89 = f41, f59, f89 // A2 * B4
  14727. nop __LINE__
  14728. }
  14729. ;;
  14730. { .mfb
  14731. nop __LINE__
  14732. (p3) FMA f97 = f41, f60, f97 // A2 * B5
  14733. nop __LINE__
  14734. }
  14735. { .mfb
  14736. nop __LINE__
  14737. (p3) FMA f105 = f41, f61, f105 // A2 * B6
  14738. nop __LINE__
  14739. }
  14740. ;;
  14741. { .mfi
  14742. nop __LINE__
  14743. (p3) FMA f113 = f41, f62, f113 // A2 * B7
  14744. adds L = -1, L
  14745. }
  14746. { .mfb
  14747. nop __LINE__
  14748. (p3) FMA f121 = f41, f63, f121 // A2 * B8
  14749. br.cloop.sptk.few .L032
  14750. }
  14751. ;;
  14752. .L038:
  14753. #if defined(LN) || defined(RT)
  14754. #ifdef LN
  14755. adds r2 = -2, KK
  14756. #else
  14757. adds r2 = -8, KK
  14758. #endif
  14759. ;;
  14760. shladd r2 = r2, BASE_SHIFT, r0
  14761. ;;
  14762. shladd AOFFSET = r2, 1, AORIG
  14763. shladd BOFFSET = r2, 3, B
  14764. ;;
  14765. #endif
  14766. adds AOFFSET2 = 4 * SIZE, AOFFSET
  14767. adds BOFFSET2 = 4 * SIZE, BOFFSET
  14768. ;;
  14769. #if defined(LN) || defined(LT)
  14770. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  14771. ;;
  14772. LDFPD f34, f35 = [BOFFSET], 2 * SIZE
  14773. ;;
  14774. LDFPD f36, f37 = [BOFFSET], 2 * SIZE
  14775. ;;
  14776. LDFPD f38, f39 = [BOFFSET], 2 * SIZE
  14777. ;;
  14778. LDFPD f40, f41 = [BOFFSET], 2 * SIZE
  14779. ;;
  14780. LDFPD f42, f43 = [BOFFSET], 2 * SIZE
  14781. ;;
  14782. LDFPD f44, f45 = [BOFFSET], 2 * SIZE
  14783. ;;
  14784. LDFPD f46, f47 = [BOFFSET]
  14785. adds BOFFSET = -14 * SIZE, BOFFSET
  14786. ;;
  14787. { .mfi
  14788. FSUB f64 = f32, f64
  14789. nop __LINE__
  14790. }
  14791. { .mfi
  14792. nop __LINE__
  14793. FSUB f72 = f33, f72
  14794. nop __LINE__
  14795. }
  14796. ;;
  14797. { .mfi
  14798. FSUB f80 = f34, f80
  14799. nop __LINE__
  14800. }
  14801. { .mfi
  14802. nop __LINE__
  14803. FSUB f88 = f35, f88
  14804. nop __LINE__
  14805. }
  14806. ;;
  14807. { .mfi
  14808. FSUB f96 = f36, f96
  14809. nop __LINE__
  14810. }
  14811. { .mfi
  14812. nop __LINE__
  14813. FSUB f104 = f37, f104
  14814. nop __LINE__
  14815. }
  14816. ;;
  14817. { .mfi
  14818. FSUB f112 = f38, f112
  14819. nop __LINE__
  14820. }
  14821. { .mfi
  14822. nop __LINE__
  14823. FSUB f120 = f39, f120
  14824. nop __LINE__
  14825. }
  14826. ;;
  14827. { .mfi
  14828. FSUB f65 = f40, f65
  14829. nop __LINE__
  14830. }
  14831. { .mfi
  14832. nop __LINE__
  14833. FSUB f73 = f41, f73
  14834. nop __LINE__
  14835. }
  14836. ;;
  14837. { .mfi
  14838. FSUB f81 = f42, f81
  14839. nop __LINE__
  14840. }
  14841. { .mfi
  14842. nop __LINE__
  14843. FSUB f89 = f43, f89
  14844. nop __LINE__
  14845. }
  14846. ;;
  14847. { .mfi
  14848. FSUB f97 = f44, f97
  14849. nop __LINE__
  14850. }
  14851. { .mfi
  14852. nop __LINE__
  14853. FSUB f105 = f45, f105
  14854. nop __LINE__
  14855. }
  14856. ;;
  14857. { .mfi
  14858. FSUB f113 = f46, f113
  14859. }
  14860. { .mfi
  14861. nop __LINE__
  14862. FSUB f121 = f47, f121
  14863. nop __LINE__
  14864. }
  14865. ;;
  14866. #else
  14867. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  14868. ;;
  14869. LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  14870. ;;
  14871. LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  14872. ;;
  14873. LDFPD f38, f39 = [AOFFSET], 2 * SIZE
  14874. ;;
  14875. LDFPD f40, f41 = [AOFFSET], 2 * SIZE
  14876. ;;
  14877. LDFPD f42, f43 = [AOFFSET], 2 * SIZE
  14878. ;;
  14879. LDFPD f44, f45 = [AOFFSET], 2 * SIZE
  14880. ;;
  14881. LDFPD f46, f47 = [AOFFSET]
  14882. adds AOFFSET = -14 * SIZE, AOFFSET
  14883. ;;
  14884. FSUB f64 = f32, f64
  14885. FSUB f65 = f33, f65
  14886. FSUB f72 = f34, f72
  14887. FSUB f73 = f35, f73
  14888. FSUB f80 = f36, f80
  14889. FSUB f81 = f37, f81
  14890. FSUB f88 = f38, f88
  14891. FSUB f89 = f39, f89
  14892. ;;
  14893. FSUB f96 = f40, f96
  14894. FSUB f97 = f41, f97
  14895. ;;
  14896. FSUB f104 = f42, f104
  14897. FSUB f105 = f43, f105
  14898. ;;
  14899. FSUB f112 = f44, f112
  14900. FSUB f113 = f45, f113
  14901. ;;
  14902. FSUB f120 = f46, f120
  14903. FSUB f121 = f47, f121
  14904. ;;
  14905. #endif
  14906. #ifdef LN
  14907. adds AOFFSET = 2 * SIZE, AOFFSET
  14908. ;;
  14909. LDFPD f33, f32 = [AOFFSET]
  14910. adds AOFFSET = - 2 * SIZE, AOFFSET
  14911. ;;
  14912. LDFD f34 = [AOFFSET]
  14913. ;;
  14914. FMPY f65 = f65, f32
  14915. FMPY f97 = f97, f32
  14916. FMPY f73 = f73, f32
  14917. FMPY f105 = f105, f32
  14918. FMPY f81 = f81, f32
  14919. FMPY f113 = f113, f32
  14920. FMPY f89 = f89, f32
  14921. FMPY f121 = f121, f32
  14922. ;;
  14923. FNMA f64 = f65, f33, f64
  14924. FNMA f96 = f97, f33, f96
  14925. FNMA f72 = f73, f33, f72
  14926. FNMA f104 = f105, f33, f104
  14927. FNMA f80 = f81, f33, f80
  14928. FNMA f112 = f113, f33, f112
  14929. FNMA f88 = f89, f33, f88
  14930. FNMA f120 = f121, f33, f120
  14931. ;;
  14932. FMPY f64 = f64, f34
  14933. FMPY f96 = f96, f34
  14934. FMPY f72 = f72, f34
  14935. FMPY f104 = f104, f34
  14936. FMPY f80 = f80, f34
  14937. FMPY f112 = f112, f34
  14938. FMPY f88 = f88, f34
  14939. FMPY f120 = f120, f34
  14940. ;;
  14941. adds BOFFSET = 8 * SIZE, BOFFSET
  14942. adds BOFFSET2 = 8 * SIZE, BOFFSET2
  14943. ;;
  14944. { .mfi
  14945. STFD [BOFFSET] = f65, SIZE
  14946. }
  14947. { .mfi
  14948. STFD [BOFFSET2] = f97, SIZE
  14949. }
  14950. ;;
  14951. { .mfi
  14952. STFD [BOFFSET] = f73, SIZE
  14953. }
  14954. { .mfi
  14955. STFD [BOFFSET2] = f105, SIZE
  14956. }
  14957. ;;
  14958. { .mfi
  14959. STFD [BOFFSET] = f81, SIZE
  14960. }
  14961. { .mfi
  14962. STFD [BOFFSET2] = f113, SIZE
  14963. }
  14964. ;;
  14965. { .mfi
  14966. STFD [BOFFSET] = f89, - 11 * SIZE
  14967. }
  14968. { .mfi
  14969. STFD [BOFFSET2] = f121, - 11 * SIZE
  14970. }
  14971. ;;
  14972. { .mmi
  14973. STFD [BOFFSET] = f64, SIZE
  14974. STFD [BOFFSET2] = f96, SIZE
  14975. adds C1 = -2 * SIZE, C1
  14976. }
  14977. ;;
  14978. { .mmi
  14979. STFD [BOFFSET] = f72, SIZE
  14980. STFD [BOFFSET2] = f104, SIZE
  14981. adds C2 = -2 * SIZE, C2
  14982. }
  14983. ;;
  14984. { .mmi
  14985. STFD [BOFFSET] = f80, SIZE
  14986. STFD [BOFFSET2] = f112, SIZE
  14987. nop __LINE__
  14988. }
  14989. ;;
  14990. { .mmi
  14991. STFD [BOFFSET] = f88, - 3 * SIZE
  14992. STFD [BOFFSET2] = f120, - 3 * SIZE
  14993. }
  14994. ;;
  14995. adds C3 = -2 * SIZE, C3
  14996. adds C4 = -2 * SIZE, C4
  14997. adds C5 = -2 * SIZE, C5
  14998. adds C6 = -2 * SIZE, C6
  14999. adds C7 = -2 * SIZE, C7
  15000. adds C8 = -2 * SIZE, C8
  15001. ;;
  15002. #endif
  15003. #ifdef LT
  15004. LDFPD f32, f33 = [AOFFSET]
  15005. adds AOFFSET = 3 * SIZE, AOFFSET
  15006. ;;
  15007. LDFD f34 = [AOFFSET], - 3 * SIZE
  15008. ;;
  15009. { .mfi
  15010. FMPY f64 = f64, f32
  15011. nop __LINE__
  15012. }
  15013. { .mfi
  15014. nop __LINE__
  15015. FMPY f96 = f96, f32
  15016. nop __LINE__
  15017. }
  15018. ;;
  15019. { .mfi
  15020. FMPY f72 = f72, f32
  15021. nop __LINE__
  15022. }
  15023. { .mfi
  15024. nop __LINE__
  15025. FMPY f104 = f104, f32
  15026. nop __LINE__
  15027. }
  15028. ;;
  15029. { .mfi
  15030. FMPY f80 = f80, f32
  15031. }
  15032. { .mfi
  15033. nop __LINE__
  15034. FMPY f112 = f112, f32
  15035. nop __LINE__
  15036. }
  15037. ;;
  15038. { .mfi
  15039. FMPY f88 = f88, f32
  15040. nop __LINE__
  15041. }
  15042. { .mfi
  15043. nop __LINE__
  15044. FMPY f120 = f120, f32
  15045. nop __LINE__
  15046. }
  15047. ;;
  15048. { .mfi
  15049. FNMA f65 = f64, f33, f65
  15050. nop __LINE__
  15051. }
  15052. { .mfi
  15053. nop __LINE__
  15054. FNMA f97 = f96, f33, f97
  15055. nop __LINE__
  15056. }
  15057. ;;
  15058. { .mfi
  15059. FNMA f73 = f72, f33, f73
  15060. nop __LINE__
  15061. }
  15062. { .mfi
  15063. nop __LINE__
  15064. FNMA f105 = f104, f33, f105
  15065. nop __LINE__
  15066. }
  15067. ;;
  15068. { .mfi
  15069. FNMA f81 = f80, f33, f81
  15070. }
  15071. { .mfi
  15072. nop __LINE__
  15073. FNMA f113 = f112, f33, f113
  15074. nop __LINE__
  15075. }
  15076. ;;
  15077. { .mfi
  15078. FNMA f89 = f88, f33, f89
  15079. nop __LINE__
  15080. }
  15081. { .mfi
  15082. nop __LINE__
  15083. FNMA f121 = f120, f33, f121
  15084. nop __LINE__
  15085. }
  15086. ;;
  15087. FMPY f65 = f65, f34
  15088. FMPY f97 = f97, f34
  15089. FMPY f73 = f73, f34
  15090. FMPY f105 = f105, f34
  15091. FMPY f81 = f81, f34
  15092. FMPY f113 = f113, f34
  15093. FMPY f89 = f89, f34
  15094. FMPY f121 = f121, f34
  15095. ;;
  15096. { .mfi
  15097. STFD [BOFFSET] = f64, SIZE
  15098. }
  15099. { .mfi
  15100. STFD [BOFFSET2] = f96, SIZE
  15101. }
  15102. ;;
  15103. { .mfi
  15104. STFD [BOFFSET] = f72, SIZE
  15105. }
  15106. { .mfi
  15107. STFD [BOFFSET2] = f104, SIZE
  15108. }
  15109. ;;
  15110. { .mfi
  15111. STFD [BOFFSET] = f80, SIZE
  15112. }
  15113. { .mfi
  15114. STFD [BOFFSET2] = f112, SIZE
  15115. }
  15116. ;;
  15117. { .mfi
  15118. STFD [BOFFSET] = f88, 5 * SIZE
  15119. }
  15120. { .mfi
  15121. STFD [BOFFSET2] = f120, 5 * SIZE
  15122. }
  15123. ;;
  15124. { .mfi
  15125. STFD [BOFFSET] = f65, SIZE
  15126. }
  15127. { .mfi
  15128. STFD [BOFFSET2] = f97, SIZE
  15129. }
  15130. ;;
  15131. { .mfi
  15132. STFD [BOFFSET] = f73, SIZE
  15133. }
  15134. { .mfi
  15135. STFD [BOFFSET2] = f105, SIZE
  15136. }
  15137. ;;
  15138. { .mfi
  15139. STFD [BOFFSET] = f81, SIZE
  15140. }
  15141. { .mfi
  15142. STFD [BOFFSET2] = f113, SIZE
  15143. }
  15144. ;;
  15145. { .mfi
  15146. STFD [BOFFSET] = f89, -11 * SIZE
  15147. }
  15148. { .mfi
  15149. STFD [BOFFSET2] = f121, -11 * SIZE
  15150. }
  15151. #endif
  15152. #ifdef RN
  15153. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  15154. ;;
  15155. LDFPD f34, f35 = [BOFFSET], 2 * SIZE
  15156. ;;
  15157. LDFPD f36, f37 = [BOFFSET], 2 * SIZE
  15158. ;;
  15159. LDFPD f38, f39 = [BOFFSET]
  15160. adds BOFFSET = 3 * SIZE, BOFFSET
  15161. ;;
  15162. LDFD f40 = [BOFFSET], 1 * SIZE
  15163. ;;
  15164. LDFPD f41, f42 = [BOFFSET], 2 * SIZE
  15165. ;;
  15166. LDFPD f43, f44 = [BOFFSET], 2 * SIZE
  15167. ;;
  15168. LDFPD f45, f46 = [BOFFSET]
  15169. adds BOFFSET = 4 * SIZE, BOFFSET
  15170. ;;
  15171. LDFPD f47, f48 = [BOFFSET], 2 * SIZE
  15172. ;;
  15173. LDFPD f49, f50 = [BOFFSET], 2 * SIZE
  15174. ;;
  15175. LDFPD f51, f52 = [BOFFSET]
  15176. adds BOFFSET = 5 * SIZE, BOFFSET
  15177. ;;
  15178. LDFD f53 = [BOFFSET], 1 * SIZE
  15179. ;;
  15180. LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  15181. ;;
  15182. LDFPD f56, f57 = [BOFFSET]
  15183. adds BOFFSET = 6 * SIZE, BOFFSET
  15184. ;;
  15185. LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  15186. ;;
  15187. LDFPD f60, f61 = [BOFFSET]
  15188. adds BOFFSET = 7 * SIZE, BOFFSET
  15189. ;;
  15190. LDFD f16 = [BOFFSET], 1 * SIZE
  15191. ;;
  15192. LDFPD f17, f18 = [BOFFSET]
  15193. adds BOFFSET = 8 * SIZE, BOFFSET
  15194. ;;
  15195. LDFPD f19, f20 = [BOFFSET]
  15196. adds BOFFSET = 9 * SIZE, BOFFSET
  15197. ;;
  15198. LDFD f21 = [BOFFSET]
  15199. adds BOFFSET = -63 * SIZE, BOFFSET
  15200. ;;
  15201. FMPY f64 = f64, f32
  15202. FMPY f65 = f65, f32
  15203. ;;
  15204. FNMA f72 = f64, f33, f72
  15205. FNMA f73 = f65, f33, f73
  15206. ;;
  15207. FNMA f80 = f64, f34, f80
  15208. FNMA f81 = f65, f34, f81
  15209. ;;
  15210. FNMA f88 = f64, f35, f88
  15211. FNMA f89 = f65, f35, f89
  15212. ;;
  15213. FNMA f96 = f64, f36, f96
  15214. FNMA f97 = f65, f36, f97
  15215. ;;
  15216. FNMA f104 = f64, f37, f104
  15217. FNMA f105 = f65, f37, f105
  15218. ;;
  15219. FNMA f112 = f64, f38, f112
  15220. FNMA f113 = f65, f38, f113
  15221. ;;
  15222. FNMA f120 = f64, f39, f120
  15223. FNMA f121 = f65, f39, f121
  15224. ;;
  15225. FMPY f72 = f72, f40
  15226. FMPY f73 = f73, f40
  15227. ;;
  15228. FNMA f80 = f72, f41, f80
  15229. FNMA f81 = f73, f41, f81
  15230. ;;
  15231. FNMA f88 = f72, f42, f88
  15232. FNMA f89 = f73, f42, f89
  15233. ;;
  15234. FNMA f96 = f72, f43, f96
  15235. FNMA f97 = f73, f43, f97
  15236. ;;
  15237. FNMA f104 = f72, f44, f104
  15238. FNMA f105 = f73, f44, f105
  15239. ;;
  15240. FNMA f112 = f72, f45, f112
  15241. FNMA f113 = f73, f45, f113
  15242. ;;
  15243. FNMA f120 = f72, f46, f120
  15244. FNMA f121 = f73, f46, f121
  15245. ;;
  15246. FMPY f80 = f80, f47
  15247. FMPY f81 = f81, f47
  15248. ;;
  15249. FNMA f88 = f80, f48, f88
  15250. FNMA f89 = f81, f48, f89
  15251. ;;
  15252. FNMA f96 = f80, f49, f96
  15253. FNMA f97 = f81, f49, f97
  15254. ;;
  15255. FNMA f104 = f80, f50, f104
  15256. FNMA f105 = f81, f50, f105
  15257. ;;
  15258. FNMA f112 = f80, f51, f112
  15259. FNMA f113 = f81, f51, f113
  15260. ;;
  15261. FNMA f120 = f80, f52, f120
  15262. FNMA f121 = f81, f52, f121
  15263. ;;
  15264. FMPY f88 = f88, f53
  15265. FMPY f89 = f89, f53
  15266. ;;
  15267. FNMA f96 = f88, f54, f96
  15268. FNMA f97 = f89, f54, f97
  15269. ;;
  15270. FNMA f104 = f88, f55, f104
  15271. FNMA f105 = f89, f55, f105
  15272. ;;
  15273. FNMA f112 = f88, f56, f112
  15274. FNMA f113 = f89, f56, f113
  15275. ;;
  15276. FNMA f120 = f88, f57, f120
  15277. FNMA f121 = f89, f57, f121
  15278. ;;
  15279. FMPY f96 = f96, f58
  15280. FMPY f97 = f97, f58
  15281. ;;
  15282. FNMA f104 = f96, f59, f104
  15283. FNMA f105 = f97, f59, f105
  15284. ;;
  15285. FNMA f112 = f96, f60, f112
  15286. FNMA f113 = f97, f60, f113
  15287. ;;
  15288. FNMA f120 = f96, f61, f120
  15289. FNMA f121 = f97, f61, f121
  15290. ;;
  15291. FMPY f104 = f104, f16
  15292. FMPY f105 = f105, f16
  15293. ;;
  15294. FNMA f112 = f104, f17, f112
  15295. FNMA f113 = f105, f17, f113
  15296. ;;
  15297. FNMA f120 = f104, f18, f120
  15298. FNMA f121 = f105, f18, f121
  15299. ;;
  15300. FMPY f112 = f112, f19
  15301. FMPY f113 = f113, f19
  15302. ;;
  15303. FNMA f120 = f112, f20, f120
  15304. FNMA f121 = f113, f20, f121
  15305. ;;
  15306. FMPY f120 = f120, f21
  15307. FMPY f121 = f121, f21
  15308. ;;
  15309. STFD [AOFFSET] = f64, SIZE
  15310. STFD [AOFFSET2] = f80, SIZE
  15311. ;;
  15312. STFD [AOFFSET] = f65, SIZE
  15313. STFD [AOFFSET2] = f81, SIZE
  15314. ;;
  15315. STFD [AOFFSET] = f72, SIZE
  15316. STFD [AOFFSET2] = f88, SIZE
  15317. ;;
  15318. STFD [AOFFSET] = f73, 5 * SIZE
  15319. STFD [AOFFSET2] = f89, 5 * SIZE
  15320. ;;
  15321. STFD [AOFFSET] = f96, SIZE
  15322. STFD [AOFFSET2] = f112, SIZE
  15323. ;;
  15324. STFD [AOFFSET] = f97, SIZE
  15325. STFD [AOFFSET2] = f113, SIZE
  15326. ;;
  15327. STFD [AOFFSET] = f104, SIZE
  15328. STFD [AOFFSET2] = f120, SIZE
  15329. ;;
  15330. STFD [AOFFSET] = f105, -11 * SIZE
  15331. STFD [AOFFSET2] = f121, - 11 * SIZE
  15332. ;;
  15333. #endif
  15334. #ifdef RT
  15335. adds BOFFSET = 62 * SIZE, BOFFSET
  15336. ;;
  15337. LDFPD f33, f32 = [BOFFSET]
  15338. adds BOFFSET = - 2 * SIZE, BOFFSET
  15339. ;;
  15340. LDFPD f35, f34 = [BOFFSET]
  15341. adds BOFFSET = - 2 * SIZE, BOFFSET
  15342. ;;
  15343. LDFPD f37, f36 = [BOFFSET]
  15344. adds BOFFSET = - 2 * SIZE, BOFFSET
  15345. ;;
  15346. LDFPD f39, f38 = [BOFFSET]
  15347. adds BOFFSET = - 2 * SIZE, BOFFSET
  15348. ;;
  15349. LDFD f40 = [BOFFSET], -2 * SIZE
  15350. ;;
  15351. LDFPD f42, f41 = [BOFFSET]
  15352. adds BOFFSET = - 2 * SIZE, BOFFSET
  15353. ;;
  15354. LDFPD f44, f43 = [BOFFSET]
  15355. adds BOFFSET = - 2 * SIZE, BOFFSET
  15356. ;;
  15357. LDFPD f46, f45 = [BOFFSET]
  15358. adds BOFFSET = - 4 * SIZE, BOFFSET
  15359. ;;
  15360. LDFPD f48, f47 = [BOFFSET]
  15361. adds BOFFSET = - 2 * SIZE, BOFFSET
  15362. ;;
  15363. LDFPD f50, f49 = [BOFFSET]
  15364. adds BOFFSET = - 2 * SIZE, BOFFSET
  15365. ;;
  15366. LDFPD f52, f51 = [BOFFSET]
  15367. adds BOFFSET = - 4 * SIZE, BOFFSET
  15368. ;;
  15369. LDFD f53 = [BOFFSET], -2 * SIZE
  15370. ;;
  15371. LDFPD f55, f54 = [BOFFSET]
  15372. adds BOFFSET = - 2 * SIZE, BOFFSET
  15373. ;;
  15374. LDFPD f57, f56 = [BOFFSET]
  15375. adds BOFFSET = - 6 * SIZE, BOFFSET
  15376. ;;
  15377. LDFPD f59, f58 = [BOFFSET]
  15378. adds BOFFSET = - 2 * SIZE, BOFFSET
  15379. ;;
  15380. LDFPD f61, f60 = [BOFFSET]
  15381. adds BOFFSET = - 6 * SIZE, BOFFSET
  15382. ;;
  15383. LDFD f16 = [BOFFSET], -2 * SIZE
  15384. ;;
  15385. LDFPD f18, f17 = [BOFFSET]
  15386. adds BOFFSET = - 8 * SIZE, BOFFSET
  15387. ;;
  15388. LDFPD f20, f19 = [BOFFSET]
  15389. adds BOFFSET = - 8 * SIZE, BOFFSET
  15390. ;;
  15391. LDFD f21 = [BOFFSET]
  15392. ;;
  15393. FMPY f120 = f120, f32
  15394. FMPY f121 = f121, f32
  15395. ;;
  15396. FNMA f112 = f120, f33, f112
  15397. FNMA f113 = f121, f33, f113
  15398. ;;
  15399. FNMA f104 = f120, f34, f104
  15400. FNMA f105 = f121, f34, f105
  15401. ;;
  15402. FNMA f96 = f120, f35, f96
  15403. FNMA f97 = f121, f35, f97
  15404. ;;
  15405. FNMA f88 = f120, f36, f88
  15406. FNMA f89 = f121, f36, f89
  15407. ;;
  15408. FNMA f80 = f120, f37, f80
  15409. FNMA f81 = f121, f37, f81
  15410. ;;
  15411. FNMA f72 = f120, f38, f72
  15412. FNMA f73 = f121, f38, f73
  15413. ;;
  15414. FNMA f64 = f120, f39, f64
  15415. FNMA f65 = f121, f39, f65
  15416. ;;
  15417. FMPY f112 = f112, f40
  15418. FMPY f113 = f113, f40
  15419. ;;
  15420. FNMA f104 = f112, f41, f104
  15421. FNMA f105 = f113, f41, f105
  15422. ;;
  15423. FNMA f96 = f112, f42, f96
  15424. FNMA f97 = f113, f42, f97
  15425. ;;
  15426. FNMA f88 = f112, f43, f88
  15427. FNMA f89 = f113, f43, f89
  15428. ;;
  15429. FNMA f80 = f112, f44, f80
  15430. FNMA f81 = f113, f44, f81
  15431. ;;
  15432. FNMA f72 = f112, f45, f72
  15433. FNMA f73 = f113, f45, f73
  15434. ;;
  15435. FNMA f64 = f112, f46, f64
  15436. FNMA f65 = f113, f46, f65
  15437. ;;
  15438. FMPY f104 = f104, f47
  15439. FMPY f105 = f105, f47
  15440. ;;
  15441. FNMA f96 = f104, f48, f96
  15442. FNMA f97 = f105, f48, f97
  15443. ;;
  15444. FNMA f88 = f104, f49, f88
  15445. FNMA f89 = f105, f49, f89
  15446. ;;
  15447. FNMA f80 = f104, f50, f80
  15448. FNMA f81 = f105, f50, f81
  15449. ;;
  15450. FNMA f72 = f104, f51, f72
  15451. FNMA f73 = f105, f51, f73
  15452. ;;
  15453. FNMA f64 = f104, f52, f64
  15454. FNMA f65 = f105, f52, f65
  15455. ;;
  15456. FMPY f96 = f96, f53
  15457. FMPY f97 = f97, f53
  15458. ;;
  15459. FNMA f88 = f96, f54, f88
  15460. FNMA f89 = f97, f54, f89
  15461. ;;
  15462. FNMA f80 = f96, f55, f80
  15463. FNMA f81 = f97, f55, f81
  15464. ;;
  15465. FNMA f72 = f96, f56, f72
  15466. FNMA f73 = f97, f56, f73
  15467. ;;
  15468. FNMA f64 = f96, f57, f64
  15469. FNMA f65 = f97, f57, f65
  15470. ;;
  15471. FMPY f88 = f88, f58
  15472. FMPY f89 = f89, f58
  15473. ;;
  15474. FNMA f80 = f88, f59, f80
  15475. FNMA f81 = f89, f59, f81
  15476. ;;
  15477. FNMA f72 = f88, f60, f72
  15478. FNMA f73 = f89, f60, f73
  15479. ;;
  15480. FNMA f64 = f88, f61, f64
  15481. FNMA f65 = f89, f61, f65
  15482. ;;
  15483. FMPY f80 = f80, f16
  15484. FMPY f81 = f81, f16
  15485. ;;
  15486. FNMA f72 = f80, f17, f72
  15487. FNMA f73 = f81, f17, f73
  15488. ;;
  15489. FNMA f64 = f80, f18, f64
  15490. FNMA f65 = f81, f18, f65
  15491. ;;
  15492. FMPY f72 = f72, f19
  15493. FMPY f73 = f73, f19
  15494. ;;
  15495. FNMA f64 = f72, f20, f64
  15496. FNMA f65 = f73, f20, f65
  15497. ;;
  15498. FMPY f64 = f64, f21
  15499. FMPY f65 = f65, f21
  15500. ;;
  15501. adds AOFFSET = 8 * SIZE, AOFFSET
  15502. adds AOFFSET2 = 8 * SIZE, AOFFSET2
  15503. ;;
  15504. STFD [AOFFSET] = f96, SIZE
  15505. STFD [AOFFSET2] = f112, SIZE
  15506. ;;
  15507. STFD [AOFFSET] = f97, SIZE
  15508. STFD [AOFFSET2] = f113, SIZE
  15509. ;;
  15510. STFD [AOFFSET] = f104, SIZE
  15511. STFD [AOFFSET2] = f120, SIZE
  15512. ;;
  15513. STFD [AOFFSET] = f105, - 11 * SIZE
  15514. STFD [AOFFSET2] = f121, - 11 * SIZE
  15515. ;;
  15516. STFD [AOFFSET] = f64, SIZE
  15517. STFD [AOFFSET2] = f80, SIZE
  15518. ;;
  15519. STFD [AOFFSET] = f65, SIZE
  15520. STFD [AOFFSET2] = f81, SIZE
  15521. ;;
  15522. STFD [AOFFSET] = f72, SIZE
  15523. STFD [AOFFSET2] = f88, SIZE
  15524. ;;
  15525. STFD [AOFFSET] = f73, - 3 * SIZE
  15526. STFD [AOFFSET2] = f89, - 3 * SIZE
  15527. ;;
  15528. #endif
  15529. STFD [C1 ] = f64, SIZE
  15530. mov f64 = f0
  15531. ;;
  15532. #ifndef LN
  15533. STFD [C1 ] = f65, SIZE
  15534. #else
  15535. STFD [C1 ] = f65, -SIZE
  15536. #endif
  15537. ;;
  15538. STFD [C2 ] = f72, SIZE
  15539. mov f72 = f0
  15540. ;;
  15541. #ifndef LN
  15542. STFD [C2 ] = f73, SIZE
  15543. #else
  15544. STFD [C2 ] = f73, -SIZE
  15545. #endif
  15546. ;;
  15547. STFD [C3 ] = f80, SIZE
  15548. mov f80 = f0
  15549. ;;
  15550. #ifndef LN
  15551. STFD [C3 ] = f81, SIZE
  15552. #else
  15553. STFD [C3 ] = f81, - SIZE
  15554. #endif
  15555. ;;
  15556. STFD [C4 ] = f88, SIZE
  15557. mov f88 = f0
  15558. ;;
  15559. #ifndef LN
  15560. STFD [C4 ] = f89, SIZE
  15561. #else
  15562. STFD [C4 ] = f89, -SIZE
  15563. #endif
  15564. ;;
  15565. STFD [C5 ] = f96, SIZE
  15566. mov f96 = f0
  15567. ;;
  15568. #ifndef LN
  15569. STFD [C5 ] = f97, SIZE
  15570. #else
  15571. STFD [C5 ] = f97, -SIZE
  15572. #endif
  15573. ;;
  15574. STFD [C6 ] = f104, SIZE
  15575. mov f104 = f0
  15576. ;;
  15577. #ifndef LN
  15578. STFD [C6 ] = f105, SIZE
  15579. #else
  15580. STFD [C6 ] = f105, -SIZE
  15581. #endif
  15582. ;;
  15583. shladd r2 = K, BASE_SHIFT, r0
  15584. ;;
  15585. sub L = K, KK
  15586. ;;
  15587. #ifdef RT
  15588. shladd AORIG = r2, 1, AORIG
  15589. #else
  15590. nop __LINE__
  15591. #endif
  15592. ;;
  15593. STFD [C7 ] = f112, SIZE
  15594. mov f112 = f0
  15595. ;;
  15596. { .mmi
  15597. #ifndef LN
  15598. STFD [C7 ] = f113, SIZE
  15599. #else
  15600. STFD [C7 ] = f113, -SIZE
  15601. #endif
  15602. #if defined(LT) || defined(RN)
  15603. shladd L = L, BASE_SHIFT, r0
  15604. #else
  15605. nop __LINE__
  15606. #endif
  15607. }
  15608. ;;
  15609. { .mmi
  15610. #if defined(LT) || defined(RN)
  15611. shladd AOFFSET = L, 1, AOFFSET
  15612. #else
  15613. nop __LINE__
  15614. #endif
  15615. }
  15616. ;;
  15617. { .mmi
  15618. #if defined(LT) || defined(RN)
  15619. shladd BOFFSET = L, 3, BOFFSET
  15620. #else
  15621. nop __LINE__
  15622. #endif
  15623. }
  15624. ;;
  15625. { .mmf
  15626. STFD [C8 ] = f120, SIZE
  15627. mov f120 = f0
  15628. }
  15629. ;;
  15630. { .mmi
  15631. #ifndef LN
  15632. STFD [C8 ] = f121, SIZE
  15633. #else
  15634. STFD [C8 ] = f121, -SIZE
  15635. #endif
  15636. #ifdef LT
  15637. adds KK = 2, KK
  15638. #elif defined LN
  15639. adds KK = -2, KK
  15640. #else
  15641. nop __LINE__
  15642. #endif
  15643. }
  15644. ;;
  15645. { .mmi
  15646. #if defined(LT) || defined(RN)
  15647. mov L = KK
  15648. #else
  15649. sub L = K, KK
  15650. #endif
  15651. }
  15652. ;;
  15653. .align 8
  15654. .L040:
  15655. { .mib
  15656. #if defined(LT) || defined(RN)
  15657. mov L = KK
  15658. #else
  15659. sub L = K, KK
  15660. #endif
  15661. tbit.z p6, p0 = M, 0
  15662. (p6) br.cond.dptk .L049
  15663. }
  15664. ;;
  15665. { .mmi
  15666. cmp.ne p7, p0 = r0, L
  15667. adds BOFFSET = 0 * SIZE, B
  15668. shl r2 = K, 0 + BASE_SHIFT
  15669. }
  15670. { .mmi
  15671. shladd r3 = KK, BASE_SHIFT, r0
  15672. nop __LINE__
  15673. nop __LINE__
  15674. }
  15675. ;;
  15676. #if defined(LT) || defined(RN)
  15677. { .mmf
  15678. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  15679. }
  15680. ;;
  15681. #else
  15682. { .mfi
  15683. shladd BOFFSET = r3, 3, B
  15684. #ifdef LN
  15685. sub AORIG = AORIG, r2
  15686. #else
  15687. nop __LINE__
  15688. #endif
  15689. }
  15690. ;;
  15691. { .mfi
  15692. (p7) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  15693. add AOFFSET = r3, AORIG
  15694. }
  15695. ;;
  15696. #endif
  15697. { .mmi
  15698. adds L = 1, L
  15699. adds PREA = (PREFETCHSIZE + 8) * SIZE, AOFFSET
  15700. cmp.eq p3, p0 = r0, r0
  15701. }
  15702. ;;
  15703. { .mii
  15704. (p7) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  15705. tbit.z p12, p0 = L, 0
  15706. shr L = L, 1
  15707. }
  15708. ;;
  15709. { .mmi
  15710. (p7) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  15711. adds L = -1, L
  15712. }
  15713. ;;
  15714. { .mmi
  15715. (p7) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  15716. cmp.eq p6, p0 = -1, L
  15717. }
  15718. ;;
  15719. { .mib
  15720. (p7) LDFD f32 = [AOFFSET], 1 * SIZE
  15721. mov ar.lc = L
  15722. (p6) br.cond.dpnt .L048
  15723. }
  15724. ;;
  15725. .L042:
  15726. { .mfb
  15727. lfetch.nt1 [PREB], 16 * SIZE
  15728. FMA f64 = f32, f48, f64 // A1 * B1
  15729. nop __LINE__
  15730. }
  15731. { .mfb
  15732. (p12) cmp.ne p3, p0 = 0, L
  15733. FMA f72 = f32, f49, f72 // A1 * B2
  15734. nop __LINE__
  15735. }
  15736. ;;
  15737. { .mfi
  15738. (p3) LDFD f40 = [AOFFSET], 1 * SIZE
  15739. FMA f80 = f32, f50, f80 // A1 * B3
  15740. cmp.ne p4, p5 = 0, L
  15741. }
  15742. { .mfb
  15743. (p3) LDFPD f56, f57 = [BOFFSET], 2 * SIZE
  15744. FMA f88 = f32, f51, f88 // A1 * B4
  15745. nop __LINE__
  15746. }
  15747. ;;
  15748. { .mfb
  15749. (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  15750. FMA f96 = f32, f52, f96 // A1 * B5
  15751. nop __LINE__
  15752. }
  15753. { .mfb
  15754. nop __LINE__
  15755. FMA f104 = f32, f53, f104 // A1 * B6
  15756. nop __LINE__
  15757. }
  15758. ;;
  15759. { .mfb
  15760. (p3) LDFPD f60, f61 = [BOFFSET], 2 * SIZE
  15761. FMA f112 = f32, f54, f112 // A1 * B7
  15762. nop __LINE__
  15763. }
  15764. { .mfb
  15765. nop __LINE__
  15766. FMA f120 = f32, f55, f120 // A1 * B8
  15767. nop __LINE__
  15768. }
  15769. ;;
  15770. { .mfb
  15771. (p4) LDFD f32 = [AOFFSET], 1 * SIZE
  15772. (p3) FMA f64 = f40, f56, f64 // A1 * B1
  15773. nop __LINE__
  15774. }
  15775. { .mfb
  15776. (p3) LDFPD f62, f63 = [BOFFSET], 2 * SIZE
  15777. (p3) FMA f72 = f40, f57, f72 // A1 * B2
  15778. nop __LINE__
  15779. }
  15780. ;;
  15781. { .mfb
  15782. (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE
  15783. (p3) FMA f80 = f40, f58, f80 // A1 * B3
  15784. nop __LINE__
  15785. }
  15786. { .mfb
  15787. nop __LINE__
  15788. (p3) FMA f88 = f40, f59, f88 // A1 * B4
  15789. nop __LINE__
  15790. }
  15791. ;;
  15792. { .mfb
  15793. (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE
  15794. (p3) FMA f96 = f40, f60, f96 // A1 * B5
  15795. nop __LINE__
  15796. }
  15797. { .mfb
  15798. nop __LINE__
  15799. (p3) FMA f104 = f40, f61, f104 // A1 * B6
  15800. nop __LINE__
  15801. }
  15802. ;;
  15803. { .mfi
  15804. (p4) LDFPD f52, f53 = [BOFFSET], 2 * SIZE
  15805. (p3) FMA f112 = f40, f62, f112 // A1 * B7
  15806. adds L = -1, L
  15807. }
  15808. { .mmb
  15809. nop __LINE__
  15810. nop __LINE__
  15811. nop __LINE__
  15812. }
  15813. ;;
  15814. { .mfb
  15815. (p4) LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  15816. (p3) FMA f120 = f40, f63, f120 // A1 * B8
  15817. nop __LINE__
  15818. }
  15819. { .mmb
  15820. nop __LINE__
  15821. nop __LINE__
  15822. br.cloop.sptk.few .L042
  15823. }
  15824. ;;
  15825. .L048:
  15826. #if defined(LN) || defined(RT)
  15827. #ifdef LN
  15828. adds r2 = -1, KK
  15829. #else
  15830. adds r2 = -8, KK
  15831. #endif
  15832. ;;
  15833. shladd r2 = r2, BASE_SHIFT, r0
  15834. ;;
  15835. add AOFFSET = r2, AORIG
  15836. shladd BOFFSET = r2, 3, B
  15837. ;;
  15838. #endif
  15839. adds AOFFSET2 = 4 * SIZE, AOFFSET
  15840. adds BOFFSET2 = 4 * SIZE, BOFFSET
  15841. ;;
  15842. #if defined(LN) || defined(LT)
  15843. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  15844. ;;
  15845. LDFPD f34, f35 = [BOFFSET], 2 * SIZE
  15846. ;;
  15847. LDFPD f36, f37 = [BOFFSET], 2 * SIZE
  15848. ;;
  15849. LDFPD f38, f39 = [BOFFSET]
  15850. adds BOFFSET = -6 * SIZE, BOFFSET
  15851. ;;
  15852. { .mfi
  15853. FSUB f64 = f32, f64
  15854. nop __LINE__
  15855. }
  15856. { .mfi
  15857. nop __LINE__
  15858. FSUB f72 = f33, f72
  15859. nop __LINE__
  15860. }
  15861. ;;
  15862. { .mfi
  15863. FSUB f80 = f34, f80
  15864. nop __LINE__
  15865. }
  15866. { .mfi
  15867. nop __LINE__
  15868. FSUB f88 = f35, f88
  15869. nop __LINE__
  15870. }
  15871. ;;
  15872. { .mfi
  15873. FSUB f96 = f36, f96
  15874. nop __LINE__
  15875. }
  15876. { .mfi
  15877. nop __LINE__
  15878. FSUB f104 = f37, f104
  15879. nop __LINE__
  15880. }
  15881. ;;
  15882. { .mfi
  15883. FSUB f112 = f38, f112
  15884. nop __LINE__
  15885. }
  15886. { .mfi
  15887. nop __LINE__
  15888. FSUB f120 = f39, f120
  15889. nop __LINE__
  15890. }
  15891. ;;
  15892. #else
  15893. LDFPD f32, f33 = [AOFFSET], 2 * SIZE
  15894. ;;
  15895. LDFPD f34, f35 = [AOFFSET], 2 * SIZE
  15896. ;;
  15897. LDFPD f36, f37 = [AOFFSET], 2 * SIZE
  15898. ;;
  15899. LDFPD f38, f39 = [AOFFSET]
  15900. adds AOFFSET = -6 * SIZE, AOFFSET
  15901. ;;
  15902. FSUB f64 = f32, f64
  15903. FSUB f72 = f33, f72
  15904. FSUB f80 = f34, f80
  15905. FSUB f88 = f35, f88
  15906. FSUB f96 = f36, f96
  15907. FSUB f104 = f37, f104
  15908. FSUB f112 = f38, f112
  15909. FSUB f120 = f39, f120
  15910. ;;
  15911. #endif
  15912. #ifdef LN
  15913. LDFD f32 = [AOFFSET]
  15914. ;;
  15915. FMPY f64 = f64, f32
  15916. FMPY f96 = f96, f32
  15917. FMPY f72 = f72, f32
  15918. FMPY f104 = f104, f32
  15919. FMPY f80 = f80, f32
  15920. FMPY f112 = f112, f32
  15921. FMPY f88 = f88, f32
  15922. FMPY f120 = f120, f32
  15923. ;;
  15924. { .mmi
  15925. STFD [BOFFSET] = f64, SIZE
  15926. STFD [BOFFSET2] = f96, SIZE
  15927. adds C1 = -1 * SIZE, C1
  15928. }
  15929. ;;
  15930. { .mmi
  15931. STFD [BOFFSET] = f72, SIZE
  15932. STFD [BOFFSET2] = f104, SIZE
  15933. adds C2 = -1 * SIZE, C2
  15934. }
  15935. ;;
  15936. { .mmi
  15937. STFD [BOFFSET] = f80, SIZE
  15938. STFD [BOFFSET2] = f112, SIZE
  15939. nop __LINE__
  15940. }
  15941. ;;
  15942. { .mmi
  15943. STFD [BOFFSET] = f88, - 3 * SIZE
  15944. STFD [BOFFSET2] = f120, - 3 * SIZE
  15945. }
  15946. ;;
  15947. adds C3 = -1 * SIZE, C3
  15948. adds C4 = -1 * SIZE, C4
  15949. adds C5 = -1 * SIZE, C5
  15950. adds C6 = -1 * SIZE, C6
  15951. adds C7 = -1 * SIZE, C7
  15952. adds C8 = -1 * SIZE, C8
  15953. ;;
  15954. #endif
  15955. #ifdef LT
  15956. LDFD f32 = [AOFFSET]
  15957. ;;
  15958. { .mfi
  15959. FMPY f64 = f64, f32
  15960. nop __LINE__
  15961. }
  15962. { .mfi
  15963. nop __LINE__
  15964. FMPY f96 = f96, f32
  15965. nop __LINE__
  15966. }
  15967. ;;
  15968. { .mfi
  15969. FMPY f72 = f72, f32
  15970. nop __LINE__
  15971. }
  15972. { .mfi
  15973. nop __LINE__
  15974. FMPY f104 = f104, f32
  15975. nop __LINE__
  15976. }
  15977. ;;
  15978. { .mfi
  15979. FMPY f80 = f80, f32
  15980. }
  15981. { .mfi
  15982. nop __LINE__
  15983. FMPY f112 = f112, f32
  15984. nop __LINE__
  15985. }
  15986. ;;
  15987. { .mfi
  15988. FMPY f88 = f88, f32
  15989. nop __LINE__
  15990. }
  15991. { .mfi
  15992. nop __LINE__
  15993. FMPY f120 = f120, f32
  15994. nop __LINE__
  15995. }
  15996. ;;
  15997. { .mfi
  15998. STFD [BOFFSET] = f64, SIZE
  15999. }
  16000. { .mfi
  16001. STFD [BOFFSET2] = f96, SIZE
  16002. }
  16003. ;;
  16004. { .mfi
  16005. STFD [BOFFSET] = f72, SIZE
  16006. }
  16007. { .mfi
  16008. STFD [BOFFSET2] = f104, SIZE
  16009. }
  16010. ;;
  16011. { .mfi
  16012. STFD [BOFFSET] = f80, SIZE
  16013. }
  16014. { .mfi
  16015. STFD [BOFFSET2] = f112, SIZE
  16016. }
  16017. ;;
  16018. { .mfi
  16019. STFD [BOFFSET] = f88, -3 * SIZE
  16020. }
  16021. { .mfi
  16022. STFD [BOFFSET2] = f120, -3 * SIZE
  16023. }
  16024. ;;
  16025. #endif
  16026. #ifdef RN
  16027. LDFPD f32, f33 = [BOFFSET], 2 * SIZE
  16028. ;;
  16029. LDFPD f34, f35 = [BOFFSET], 2 * SIZE
  16030. ;;
  16031. LDFPD f36, f37 = [BOFFSET], 2 * SIZE
  16032. ;;
  16033. LDFPD f38, f39 = [BOFFSET]
  16034. adds BOFFSET = 3 * SIZE, BOFFSET
  16035. ;;
  16036. LDFD f40 = [BOFFSET], 1 * SIZE
  16037. ;;
  16038. LDFPD f41, f42 = [BOFFSET], 2 * SIZE
  16039. ;;
  16040. LDFPD f43, f44 = [BOFFSET], 2 * SIZE
  16041. ;;
  16042. LDFPD f45, f46 = [BOFFSET]
  16043. adds BOFFSET = 4 * SIZE, BOFFSET
  16044. ;;
  16045. LDFPD f47, f48 = [BOFFSET], 2 * SIZE
  16046. ;;
  16047. LDFPD f49, f50 = [BOFFSET], 2 * SIZE
  16048. ;;
  16049. LDFPD f51, f52 = [BOFFSET]
  16050. adds BOFFSET = 5 * SIZE, BOFFSET
  16051. ;;
  16052. LDFD f53 = [BOFFSET], 1 * SIZE
  16053. ;;
  16054. LDFPD f54, f55 = [BOFFSET], 2 * SIZE
  16055. ;;
  16056. LDFPD f56, f57 = [BOFFSET]
  16057. adds BOFFSET = 6 * SIZE, BOFFSET
  16058. ;;
  16059. LDFPD f58, f59 = [BOFFSET], 2 * SIZE
  16060. ;;
  16061. LDFPD f60, f61 = [BOFFSET]
  16062. adds BOFFSET = 7 * SIZE, BOFFSET
  16063. ;;
  16064. LDFD f16 = [BOFFSET], 1 * SIZE
  16065. ;;
  16066. LDFPD f17, f18 = [BOFFSET]
  16067. adds BOFFSET = 8 * SIZE, BOFFSET
  16068. ;;
  16069. LDFPD f19, f20 = [BOFFSET]
  16070. adds BOFFSET = 9 * SIZE, BOFFSET
  16071. ;;
  16072. LDFD f21 = [BOFFSET]
  16073. adds BOFFSET = -63 * SIZE, BOFFSET
  16074. ;;
  16075. FMPY f64 = f64, f32
  16076. ;;
  16077. FNMA f72 = f64, f33, f72
  16078. ;;
  16079. FNMA f80 = f64, f34, f80
  16080. ;;
  16081. FNMA f88 = f64, f35, f88
  16082. ;;
  16083. FNMA f96 = f64, f36, f96
  16084. ;;
  16085. FNMA f104 = f64, f37, f104
  16086. ;;
  16087. FNMA f112 = f64, f38, f112
  16088. ;;
  16089. FNMA f120 = f64, f39, f120
  16090. ;;
  16091. FMPY f72 = f72, f40
  16092. ;;
  16093. FNMA f80 = f72, f41, f80
  16094. ;;
  16095. FNMA f88 = f72, f42, f88
  16096. ;;
  16097. FNMA f96 = f72, f43, f96
  16098. ;;
  16099. FNMA f104 = f72, f44, f104
  16100. ;;
  16101. FNMA f112 = f72, f45, f112
  16102. ;;
  16103. FNMA f120 = f72, f46, f120
  16104. ;;
  16105. FMPY f80 = f80, f47
  16106. ;;
  16107. FNMA f88 = f80, f48, f88
  16108. ;;
  16109. FNMA f96 = f80, f49, f96
  16110. ;;
  16111. FNMA f104 = f80, f50, f104
  16112. ;;
  16113. FNMA f112 = f80, f51, f112
  16114. ;;
  16115. FNMA f120 = f80, f52, f120
  16116. ;;
  16117. FMPY f88 = f88, f53
  16118. ;;
  16119. FNMA f96 = f88, f54, f96
  16120. ;;
  16121. FNMA f104 = f88, f55, f104
  16122. ;;
  16123. FNMA f112 = f88, f56, f112
  16124. ;;
  16125. FNMA f120 = f88, f57, f120
  16126. ;;
  16127. FMPY f96 = f96, f58
  16128. ;;
  16129. FNMA f104 = f96, f59, f104
  16130. ;;
  16131. FNMA f112 = f96, f60, f112
  16132. ;;
  16133. FNMA f120 = f96, f61, f120
  16134. ;;
  16135. FMPY f104 = f104, f16
  16136. ;;
  16137. FNMA f112 = f104, f17, f112
  16138. ;;
  16139. FNMA f120 = f104, f18, f120
  16140. ;;
  16141. FMPY f112 = f112, f19
  16142. ;;
  16143. FNMA f120 = f112, f20, f120
  16144. ;;
  16145. FMPY f120 = f120, f21
  16146. ;;
  16147. STFD [AOFFSET] = f64, SIZE
  16148. STFD [AOFFSET2] = f96, SIZE
  16149. ;;
  16150. STFD [AOFFSET] = f72, SIZE
  16151. STFD [AOFFSET2] = f104, SIZE
  16152. ;;
  16153. STFD [AOFFSET] = f80, SIZE
  16154. STFD [AOFFSET2] = f112, SIZE
  16155. ;;
  16156. STFD [AOFFSET] = f88, -3 * SIZE
  16157. STFD [AOFFSET2] = f120, - 3 * SIZE
  16158. ;;
  16159. #endif
  16160. #ifdef RT
  16161. adds BOFFSET = 62 * SIZE, BOFFSET
  16162. ;;
  16163. LDFPD f33, f32 = [BOFFSET]
  16164. adds BOFFSET = - 2 * SIZE, BOFFSET
  16165. ;;
  16166. LDFPD f35, f34 = [BOFFSET]
  16167. adds BOFFSET = - 2 * SIZE, BOFFSET
  16168. ;;
  16169. LDFPD f37, f36 = [BOFFSET]
  16170. adds BOFFSET = - 2 * SIZE, BOFFSET
  16171. ;;
  16172. LDFPD f39, f38 = [BOFFSET]
  16173. adds BOFFSET = - 2 * SIZE, BOFFSET
  16174. ;;
  16175. LDFD f40 = [BOFFSET], -2 * SIZE
  16176. ;;
  16177. LDFPD f42, f41 = [BOFFSET]
  16178. adds BOFFSET = - 2 * SIZE, BOFFSET
  16179. ;;
  16180. LDFPD f44, f43 = [BOFFSET]
  16181. adds BOFFSET = - 2 * SIZE, BOFFSET
  16182. ;;
  16183. LDFPD f46, f45 = [BOFFSET]
  16184. adds BOFFSET = - 4 * SIZE, BOFFSET
  16185. ;;
  16186. LDFPD f48, f47 = [BOFFSET]
  16187. adds BOFFSET = - 2 * SIZE, BOFFSET
  16188. ;;
  16189. LDFPD f50, f49 = [BOFFSET]
  16190. adds BOFFSET = - 2 * SIZE, BOFFSET
  16191. ;;
  16192. LDFPD f52, f51 = [BOFFSET]
  16193. adds BOFFSET = - 4 * SIZE, BOFFSET
  16194. ;;
  16195. LDFD f53 = [BOFFSET], -2 * SIZE
  16196. ;;
  16197. LDFPD f55, f54 = [BOFFSET]
  16198. adds BOFFSET = - 2 * SIZE, BOFFSET
  16199. ;;
  16200. LDFPD f57, f56 = [BOFFSET]
  16201. adds BOFFSET = - 6 * SIZE, BOFFSET
  16202. ;;
  16203. LDFPD f59, f58 = [BOFFSET]
  16204. adds BOFFSET = - 2 * SIZE, BOFFSET
  16205. ;;
  16206. LDFPD f61, f60 = [BOFFSET]
  16207. adds BOFFSET = - 6 * SIZE, BOFFSET
  16208. ;;
  16209. LDFD f16 = [BOFFSET], -2 * SIZE
  16210. ;;
  16211. LDFPD f18, f17 = [BOFFSET]
  16212. adds BOFFSET = - 8 * SIZE, BOFFSET
  16213. ;;
  16214. LDFPD f20, f19 = [BOFFSET]
  16215. adds BOFFSET = - 8 * SIZE, BOFFSET
  16216. ;;
  16217. LDFD f21 = [BOFFSET]
  16218. ;;
  16219. FMPY f120 = f120, f32
  16220. ;;
  16221. FNMA f112 = f120, f33, f112
  16222. ;;
  16223. FNMA f104 = f120, f34, f104
  16224. ;;
  16225. FNMA f96 = f120, f35, f96
  16226. ;;
  16227. FNMA f88 = f120, f36, f88
  16228. ;;
  16229. FNMA f80 = f120, f37, f80
  16230. ;;
  16231. FNMA f72 = f120, f38, f72
  16232. ;;
  16233. FNMA f64 = f120, f39, f64
  16234. ;;
  16235. FMPY f112 = f112, f40
  16236. ;;
  16237. FNMA f104 = f112, f41, f104
  16238. ;;
  16239. FNMA f96 = f112, f42, f96
  16240. ;;
  16241. FNMA f88 = f112, f43, f88
  16242. ;;
  16243. FNMA f80 = f112, f44, f80
  16244. ;;
  16245. FNMA f72 = f112, f45, f72
  16246. ;;
  16247. FNMA f64 = f112, f46, f64
  16248. ;;
  16249. FMPY f104 = f104, f47
  16250. ;;
  16251. FNMA f96 = f104, f48, f96
  16252. ;;
  16253. FNMA f88 = f104, f49, f88
  16254. ;;
  16255. FNMA f80 = f104, f50, f80
  16256. ;;
  16257. FNMA f72 = f104, f51, f72
  16258. ;;
  16259. FNMA f64 = f104, f52, f64
  16260. ;;
  16261. FMPY f96 = f96, f53
  16262. ;;
  16263. FNMA f88 = f96, f54, f88
  16264. ;;
  16265. FNMA f80 = f96, f55, f80
  16266. ;;
  16267. FNMA f72 = f96, f56, f72
  16268. ;;
  16269. FNMA f64 = f96, f57, f64
  16270. ;;
  16271. FMPY f88 = f88, f58
  16272. ;;
  16273. FNMA f80 = f88, f59, f80
  16274. ;;
  16275. FNMA f72 = f88, f60, f72
  16276. ;;
  16277. FNMA f64 = f88, f61, f64
  16278. ;;
  16279. FMPY f80 = f80, f16
  16280. ;;
  16281. FNMA f72 = f80, f17, f72
  16282. ;;
  16283. FNMA f64 = f80, f18, f64
  16284. ;;
  16285. FMPY f72 = f72, f19
  16286. ;;
  16287. FNMA f64 = f72, f20, f64
  16288. ;;
  16289. FMPY f64 = f64, f21
  16290. ;;
  16291. STFD [AOFFSET] = f64, SIZE
  16292. STFD [AOFFSET2] = f96, SIZE
  16293. ;;
  16294. STFD [AOFFSET] = f72, SIZE
  16295. STFD [AOFFSET2] = f104, SIZE
  16296. ;;
  16297. STFD [AOFFSET] = f80, SIZE
  16298. STFD [AOFFSET2] = f112, SIZE
  16299. ;;
  16300. STFD [AOFFSET] = f88, - 3 * SIZE
  16301. STFD [AOFFSET2] = f120, - 3 * SIZE
  16302. ;;
  16303. #endif
  16304. #ifndef LN
  16305. STFD [C1 ] = f64, SIZE
  16306. #else
  16307. STFD [C1 ] = f64
  16308. #endif
  16309. #ifndef LN
  16310. STFD [C2 ] = f72, SIZE
  16311. #else
  16312. STFD [C2 ] = f72
  16313. #endif
  16314. #ifndef LN
  16315. STFD [C3 ] = f80, SIZE
  16316. #else
  16317. STFD [C3 ] = f80
  16318. #endif
  16319. #ifndef LN
  16320. STFD [C4 ] = f88, SIZE
  16321. #else
  16322. STFD [C4 ] = f88
  16323. #endif
  16324. #ifndef LN
  16325. STFD [C5 ] = f96, SIZE
  16326. #else
  16327. STFD [C5 ] = f96
  16328. #endif
  16329. #ifndef LN
  16330. STFD [C6 ] = f104, SIZE
  16331. #else
  16332. STFD [C6 ] = f104
  16333. #endif
  16334. #ifndef LN
  16335. STFD [C7 ] = f112, SIZE
  16336. #else
  16337. STFD [C7 ] = f112
  16338. #endif
  16339. #ifndef LN
  16340. STFD [C8 ] = f120, SIZE
  16341. #else
  16342. STFD [C8 ] = f120
  16343. #endif
  16344. ;;
  16345. mov f64 = f0
  16346. mov f72 = f0
  16347. mov f80 = f0
  16348. mov f88 = f0
  16349. mov f96 = f0
  16350. mov f104 = f0
  16351. mov f112 = f0
  16352. mov f120 = f0
  16353. ;;
  16354. shladd r2 = K, BASE_SHIFT, r0
  16355. ;;
  16356. sub L = K, KK
  16357. ;;
  16358. #ifdef RT
  16359. add AORIG = r2, AORIG
  16360. #else
  16361. nop __LINE__
  16362. #endif
  16363. ;;
  16364. #if defined(LT) || defined(RN)
  16365. shladd L = L, BASE_SHIFT, r0
  16366. #else
  16367. nop __LINE__
  16368. #endif
  16369. ;;
  16370. #if defined(LT) || defined(RN)
  16371. add AOFFSET = L, AOFFSET
  16372. #else
  16373. nop __LINE__
  16374. #endif
  16375. ;;
  16376. #if defined(LT) || defined(RN)
  16377. shladd BOFFSET = L, 3, BOFFSET
  16378. #else
  16379. nop __LINE__
  16380. #endif
  16381. ;;
  16382. #ifdef LT
  16383. adds KK = 1, KK
  16384. #elif defined LN
  16385. adds KK = -1, KK
  16386. #else
  16387. nop __LINE__
  16388. #endif
  16389. ;;
  16390. #if defined(LT) || defined(RN)
  16391. mov L = KK
  16392. #else
  16393. sub L = K, KK
  16394. #endif
  16395. ;;
  16396. .align 8
  16397. .L049:
  16398. #ifdef LN
  16399. shladd KK8 = K, BASE_SHIFT, r0
  16400. ;;
  16401. shladd B = KK8, 3, B
  16402. #endif
  16403. #if defined(LT) || defined(RN)
  16404. mov B = BOFFSET
  16405. #endif
  16406. #ifdef RN
  16407. adds KK = 8, KK
  16408. #endif
  16409. #ifdef RT
  16410. adds KK = -8, KK
  16411. #endif
  16412. ;;
  16413. { .mmi
  16414. mov AOFFSET = A
  16415. }
  16416. ;;
  16417. { .mmb
  16418. nop __LINE__
  16419. cmp.lt p6, p0 = 0, J
  16420. (p6) br.cond.dptk .L010
  16421. }
  16422. ;;
  16423. .align 8
  16424. .L999:
  16425. mov r8 = r0
  16426. adds r9 = 1 * 16, SP
  16427. ;;
  16428. ldf.fill f16 = [SP], 32
  16429. ldf.fill f17 = [r9], 32
  16430. ;;
  16431. ldf.fill f18 = [SP], 32
  16432. ldf.fill f19 = [r9], 32
  16433. ;;
  16434. ldf.fill f20 = [SP], 32
  16435. ldf.fill f21 = [r9], 32
  16436. ;;
  16437. mov ar.lc = ARLC
  16438. ;;
  16439. mov pr = PR, -1
  16440. ;;
  16441. mov ar.pfs = ARPFS
  16442. ;;
  16443. br.ret.sptk.many b0
  16444. EPILOGUE