You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

index.html 117 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451
  1. <!doctype html>
  2. <html lang="en" class="no-js">
  3. <head>
  4. <meta charset="utf-8">
  5. <meta name="viewport" content="width=device-width,initial-scale=1">
  6. <link rel="prev" href="../NativeLibraryConfig/">
  7. <link rel="next" href="../ChatSession/">
  8. <link rel="icon" href="../../media/icon128.png">
  9. <meta name="generator" content="mkdocs-1.4.3, mkdocs-material-9.1.20">
  10. <title>Use executors - LLamaSharp Documentation</title>
  11. <link rel="stylesheet" href="../../assets/stylesheets/main.eebd395e.min.css">
  12. <link rel="stylesheet" href="../../assets/stylesheets/palette.ecc896b0.min.css">
  13. <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
  14. <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Fira+Sans:300,300i,400,400i,700,700i%7CFira+Mono:400,400i,700,700i&display=fallback">
  15. <style>:root{--md-text-font:"Fira Sans";--md-code-font:"Fira Mono"}</style>
  16. <link rel="stylesheet" href="../../css/extra.css?v=14">
  17. <script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce((e,_)=>(e<<5)-e+_.charCodeAt(0),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
  18. </head>
  19. <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="white" data-md-color-accent="red">
  20. <script>var palette=__md_get("__palette");if(palette&&"object"==typeof palette.color)for(var key of Object.keys(palette.color))document.body.setAttribute("data-md-color-"+key,palette.color[key])</script>
  21. <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
  22. <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
  23. <label class="md-overlay" for="__drawer"></label>
  24. <div data-md-component="skip">
  25. <a href="#llamasharp-executors" class="md-skip">
  26. Skip to content
  27. </a>
  28. </div>
  29. <div data-md-component="announce">
  30. </div>
  31. <div data-md-color-scheme="default" data-md-component="outdated" hidden>
  32. </div>
  33. <header class="md-header md-header--shadow" data-md-component="header">
  34. <nav class="md-header__inner md-grid" aria-label="Header">
  35. <a href="../.." title="LLamaSharp Documentation" class="md-header__button md-logo" aria-label="LLamaSharp Documentation" data-md-component="logo">
  36. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 9h5.5L13 3.5V9M6 2h8l6 6v12a2 2 0 0 1-2 2H6a2 2 0 0 1-2-2V4c0-1.11.89-2 2-2m9 16v-2H6v2h9m3-4v-2H6v2h12Z"/></svg>
  37. </a>
  38. <label class="md-header__button md-icon" for="__drawer">
  39. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2Z"/></svg>
  40. </label>
  41. <div class="md-header__title" data-md-component="header-title">
  42. <div class="md-header__ellipsis">
  43. <div class="md-header__topic">
  44. <span class="md-ellipsis">
  45. LLamaSharp Documentation
  46. </span>
  47. </div>
  48. <div class="md-header__topic" data-md-component="header-topic">
  49. <span class="md-ellipsis">
  50. Use executors
  51. </span>
  52. </div>
  53. </div>
  54. </div>
  55. <form class="md-header__option" data-md-component="palette">
  56. <input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="white" data-md-color-accent="red" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_1">
  57. <label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_2" hidden>
  58. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 7a5 5 0 0 1 5 5 5 5 0 0 1-5 5 5 5 0 0 1-5-5 5 5 0 0 1 5-5m0 2a3 3 0 0 0-3 3 3 3 0 0 0 3 3 3 3 0 0 0 3-3 3 3 0 0 0-3-3m0-7 2.39 3.42C13.65 5.15 12.84 5 12 5c-.84 0-1.65.15-2.39.42L12 2M3.34 7l4.16-.35A7.2 7.2 0 0 0 5.94 8.5c-.44.74-.69 1.5-.83 2.29L3.34 7m.02 10 1.76-3.77a7.131 7.131 0 0 0 2.38 4.14L3.36 17M20.65 7l-1.77 3.79a7.023 7.023 0 0 0-2.38-4.15l4.15.36m-.01 10-4.14.36c.59-.51 1.12-1.14 1.54-1.86.42-.73.69-1.5.83-2.29L20.64 17M12 22l-2.41-3.44c.74.27 1.55.44 2.41.44.82 0 1.63-.17 2.37-.44L12 22Z"/></svg>
  59. </label>
  60. <input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="blue" data-md-color-accent="blue" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_2">
  61. <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_1" hidden>
  62. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m17.75 4.09-2.53 1.94.91 3.06-2.63-1.81-2.63 1.81.91-3.06-2.53-1.94L12.44 4l1.06-3 1.06 3 3.19.09m3.5 6.91-1.64 1.25.59 1.98-1.7-1.17-1.7 1.17.59-1.98L15.75 11l2.06-.05L18.5 9l.69 1.95 2.06.05m-2.28 4.95c.83-.08 1.72 1.1 1.19 1.85-.32.45-.66.87-1.08 1.27C15.17 23 8.84 23 4.94 19.07c-3.91-3.9-3.91-10.24 0-14.14.4-.4.82-.76 1.27-1.08.75-.53 1.93.36 1.85 1.19-.27 2.86.69 5.83 2.89 8.02a9.96 9.96 0 0 0 8.02 2.89m-1.64 2.02a12.08 12.08 0 0 1-7.8-3.47c-2.17-2.19-3.33-5-3.49-7.82-2.81 3.14-2.7 7.96.31 10.98 3.02 3.01 7.84 3.12 10.98.31Z"/></svg>
  63. </label>
  64. </form>
  65. <label class="md-header__button md-icon" for="__search">
  66. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
  67. </label>
  68. <div class="md-search" data-md-component="search" role="dialog">
  69. <label class="md-search__overlay" for="__search"></label>
  70. <div class="md-search__inner" role="search">
  71. <form class="md-search__form" name="search">
  72. <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
  73. <label class="md-search__icon md-icon" for="__search">
  74. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
  75. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
  76. </label>
  77. <nav class="md-search__options" aria-label="Search">
  78. <button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
  79. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41Z"/></svg>
  80. </button>
  81. </nav>
  82. </form>
  83. <div class="md-search__output">
  84. <div class="md-search__scrollwrap" data-md-scrollfix>
  85. <div class="md-search-result" data-md-component="search-result">
  86. <div class="md-search-result__meta">
  87. Initializing search
  88. </div>
  89. <ol class="md-search-result__list" role="presentation"></ol>
  90. </div>
  91. </div>
  92. </div>
  93. </div>
  94. </div>
  95. </nav>
  96. </header>
  97. <div class="md-container" data-md-component="container">
  98. <main class="md-main" data-md-component="main">
  99. <div class="md-main__inner md-grid">
  100. <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
  101. <div class="md-sidebar__scrollwrap">
  102. <div class="md-sidebar__inner">
  103. <nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
  104. <label class="md-nav__title" for="__drawer">
  105. <a href="../.." title="LLamaSharp Documentation" class="md-nav__button md-logo" aria-label="LLamaSharp Documentation" data-md-component="logo">
  106. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 9h5.5L13 3.5V9M6 2h8l6 6v12a2 2 0 0 1-2 2H6a2 2 0 0 1-2-2V4c0-1.11.89-2 2-2m9 16v-2H6v2h9m3-4v-2H6v2h12Z"/></svg>
  107. </a>
  108. LLamaSharp Documentation
  109. </label>
  110. <ul class="md-nav__list" data-md-scrollfix>
  111. <li class="md-nav__item">
  112. <a href="../.." class="md-nav__link">
  113. Overview
  114. </a>
  115. </li>
  116. <li class="md-nav__item">
  117. <a href="../../QuickStart/" class="md-nav__link">
  118. Quick Start
  119. </a>
  120. </li>
  121. <li class="md-nav__item">
  122. <a href="../../Architecture/" class="md-nav__link">
  123. Architecture
  124. </a>
  125. </li>
  126. <li class="md-nav__item">
  127. <a href="../../FAQ/" class="md-nav__link">
  128. FAQ
  129. </a>
  130. </li>
  131. <li class="md-nav__item">
  132. <a href="../../ContributingGuide/" class="md-nav__link">
  133. Contributing Guide
  134. </a>
  135. </li>
  136. <li class="md-nav__item md-nav__item--active md-nav__item--nested">
  137. <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" checked>
  138. <label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
  139. Tutorials
  140. <span class="md-nav__icon md-icon"></span>
  141. </label>
  142. <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="true">
  143. <label class="md-nav__title" for="__nav_6">
  144. <span class="md-nav__icon md-icon"></span>
  145. Tutorials
  146. </label>
  147. <ul class="md-nav__list" data-md-scrollfix>
  148. <li class="md-nav__item">
  149. <a href="../NativeLibraryConfig/" class="md-nav__link">
  150. Customize the native library loading
  151. </a>
  152. </li>
  153. <li class="md-nav__item md-nav__item--active">
  154. <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
  155. <label class="md-nav__link md-nav__link--active" for="__toc">
  156. Use executors
  157. <span class="md-nav__icon md-icon"></span>
  158. </label>
  159. <a href="./" class="md-nav__link md-nav__link--active">
  160. Use executors
  161. </a>
  162. <nav class="md-nav md-nav--secondary" aria-label="Table of contents">
  163. <label class="md-nav__title" for="__toc">
  164. <span class="md-nav__icon md-icon"></span>
  165. Table of contents
  166. </label>
  167. <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
  168. <li class="md-nav__item">
  169. <a href="#text-to-text-apis-of-the-executors" class="md-nav__link">
  170. Text-to-Text APIs of the executors
  171. </a>
  172. </li>
  173. <li class="md-nav__item">
  174. <a href="#interactiveexecutor--instructexecutor" class="md-nav__link">
  175. InteractiveExecutor &amp; InstructExecutor
  176. </a>
  177. </li>
  178. <li class="md-nav__item">
  179. <a href="#statelessexecutor" class="md-nav__link">
  180. StatelessExecutor.
  181. </a>
  182. </li>
  183. <li class="md-nav__item">
  184. <a href="#batchedexecutor" class="md-nav__link">
  185. BatchedExecutor
  186. </a>
  187. </li>
  188. <li class="md-nav__item">
  189. <a href="#inference-parameters" class="md-nav__link">
  190. Inference parameters
  191. </a>
  192. </li>
  193. <li class="md-nav__item">
  194. <a href="#save-and-load-executor-state" class="md-nav__link">
  195. Save and load executor state
  196. </a>
  197. </li>
  198. </ul>
  199. </nav>
  200. </li>
  201. <li class="md-nav__item">
  202. <a href="../ChatSession/" class="md-nav__link">
  203. Use ChatSession
  204. </a>
  205. </li>
  206. <li class="md-nav__item">
  207. <a href="../UnderstandLLamaContext/" class="md-nav__link">
  208. Understand LLamaContext
  209. </a>
  210. </li>
  211. <li class="md-nav__item">
  212. <a href="../GetEmbeddings/" class="md-nav__link">
  213. Get embeddings
  214. </a>
  215. </li>
  216. <li class="md-nav__item">
  217. <a href="../Quantization/" class="md-nav__link">
  218. Quantize the model
  219. </a>
  220. </li>
  221. </ul>
  222. </nav>
  223. </li>
  224. <li class="md-nav__item md-nav__item--nested">
  225. <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_7" >
  226. <label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0">
  227. Integrations
  228. <span class="md-nav__icon md-icon"></span>
  229. </label>
  230. <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
  231. <label class="md-nav__title" for="__nav_7">
  232. <span class="md-nav__icon md-icon"></span>
  233. Integrations
  234. </label>
  235. <ul class="md-nav__list" data-md-scrollfix>
  236. <li class="md-nav__item">
  237. <a href="../../Integrations/semantic-kernel/" class="md-nav__link">
  238. semantic-kernel integration
  239. </a>
  240. </li>
  241. <li class="md-nav__item">
  242. <a href="../../Integrations/kernel-memory/" class="md-nav__link">
  243. kernel-memory integration
  244. </a>
  245. </li>
  246. <li class="md-nav__item">
  247. <a href="../../Integrations/BotSharp.md" class="md-nav__link">
  248. BotSharp integration
  249. </a>
  250. </li>
  251. <li class="md-nav__item">
  252. <a href="../../Integrations/Langchain.md" class="md-nav__link">
  253. Langchain integration
  254. </a>
  255. </li>
  256. </ul>
  257. </nav>
  258. </li>
  259. <li class="md-nav__item md-nav__item--nested">
  260. <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8" >
  261. <label class="md-nav__link" for="__nav_8" id="__nav_8_label" tabindex="0">
  262. Examples
  263. <span class="md-nav__icon md-icon"></span>
  264. </label>
  265. <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_8_label" aria-expanded="false">
  266. <label class="md-nav__title" for="__nav_8">
  267. <span class="md-nav__icon md-icon"></span>
  268. Examples
  269. </label>
  270. <ul class="md-nav__list" data-md-scrollfix>
  271. <li class="md-nav__item">
  272. <a href="../../Examples/BatchedExecutorFork/" class="md-nav__link">
  273. Bacthed executor - multi-output to one input
  274. </a>
  275. </li>
  276. <li class="md-nav__item">
  277. <a href="../../Examples/BatchedExecutorGuidance/" class="md-nav__link">
  278. Batched executor - basic guidance
  279. </a>
  280. </li>
  281. <li class="md-nav__item">
  282. <a href="../../Examples/BatchedExecutorRewind/" class="md-nav__link">
  283. Batched executor - rewinding to an earlier state
  284. </a>
  285. </li>
  286. <li class="md-nav__item">
  287. <a href="../../Examples/ChatChineseGB2312/" class="md-nav__link">
  288. Chinese LLM - with GB2312 encoding
  289. </a>
  290. </li>
  291. <li class="md-nav__item">
  292. <a href="../../Examples/ChatSessionStripRoleName/" class="md-nav__link">
  293. ChatSession - stripping role names
  294. </a>
  295. </li>
  296. <li class="md-nav__item">
  297. <a href="../../Examples/ChatSessionWithHistory/" class="md-nav__link">
  298. ChatSession - with history
  299. </a>
  300. </li>
  301. <li class="md-nav__item">
  302. <a href="../../Examples/ChatSessionWithRestart/" class="md-nav__link">
  303. ChatSession - restarting
  304. </a>
  305. </li>
  306. <li class="md-nav__item">
  307. <a href="../../Examples/ChatSessionWithRoleName/" class="md-nav__link">
  308. ChatSession - Basic
  309. </a>
  310. </li>
  311. <li class="md-nav__item">
  312. <a href="../../Examples/CodingAssistant/" class="md-nav__link">
  313. Coding assistant
  314. </a>
  315. </li>
  316. <li class="md-nav__item">
  317. <a href="../../Examples/GetEmbeddings/" class="md-nav__link">
  318. Get embeddings
  319. </a>
  320. </li>
  321. <li class="md-nav__item">
  322. <a href="../../Examples/GrammarJsonResponse/" class="md-nav__link">
  323. Grammar - json response
  324. </a>
  325. </li>
  326. <li class="md-nav__item">
  327. <a href="../../Examples/InstructModeExecute/" class="md-nav__link">
  328. Instruct executor - basic
  329. </a>
  330. </li>
  331. <li class="md-nav__item">
  332. <a href="../../Examples/InteractiveModeExecute/" class="md-nav__link">
  333. Interactive executor - basic
  334. </a>
  335. </li>
  336. <li class="md-nav__item">
  337. <a href="../../Examples/KernelMemory/" class="md-nav__link">
  338. Kernel memory integration - basic
  339. </a>
  340. </li>
  341. <li class="md-nav__item">
  342. <a href="../../Examples/KernelMemorySaveAndLoad/" class="md-nav__link">
  343. Kernel-memory - save & load
  344. </a>
  345. </li>
  346. <li class="md-nav__item">
  347. <a href="../../Examples/LLavaInteractiveModeExecute/" class="md-nav__link">
  348. LLaVA - basic
  349. </a>
  350. </li>
  351. <li class="md-nav__item">
  352. <a href="../../Examples/LoadAndSaveSession/" class="md-nav__link">
  353. ChatSession - load & save
  354. </a>
  355. </li>
  356. <li class="md-nav__item">
  357. <a href="../../Examples/LoadAndSaveState/" class="md-nav__link">
  358. Executor - save/load state
  359. </a>
  360. </li>
  361. <li class="md-nav__item">
  362. <a href="../../Examples/QuantizeModel/" class="md-nav__link">
  363. Quantization
  364. </a>
  365. </li>
  366. <li class="md-nav__item">
  367. <a href="../../Examples/SemanticKernelChat/" class="md-nav__link">
  368. Semantic-kernel - chat
  369. </a>
  370. </li>
  371. <li class="md-nav__item">
  372. <a href="../../Examples/SemanticKernelMemory/" class="md-nav__link">
  373. Semantic-kernel - with kernel-memory
  374. </a>
  375. </li>
  376. <li class="md-nav__item">
  377. <a href="../../Examples/SemanticKernelPrompt/" class="md-nav__link">
  378. Semantic-kernel - basic
  379. </a>
  380. </li>
  381. <li class="md-nav__item">
  382. <a href="../../Examples/StatelessModeExecute/" class="md-nav__link">
  383. Stateless executor
  384. </a>
  385. </li>
  386. <li class="md-nav__item">
  387. <a href="../../Examples/TalkToYourself/" class="md-nav__link">
  388. Talk to yourself
  389. </a>
  390. </li>
  391. </ul>
  392. </nav>
  393. </li>
  394. <li class="md-nav__item md-nav__item--nested">
  395. <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_9" >
  396. <label class="md-nav__link" for="__nav_9" id="__nav_9_label" tabindex="0">
  397. API Reference
  398. <span class="md-nav__icon md-icon"></span>
  399. </label>
  400. <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_9_label" aria-expanded="false">
  401. <label class="md-nav__title" for="__nav_9">
  402. <span class="md-nav__icon md-icon"></span>
  403. API Reference
  404. </label>
  405. <ul class="md-nav__list" data-md-scrollfix>
  406. <li class="md-nav__item">
  407. <a href="../../xmldocs/" class="md-nav__link">
  408. index
  409. </a>
  410. </li>
  411. <li class="md-nav__item">
  412. <a href="../../xmldocs/llama.abstractions.adaptercollection/" class="md-nav__link">
  413. llama.abstractions.adaptercollection
  414. </a>
  415. </li>
  416. <li class="md-nav__item">
  417. <a href="../../xmldocs/llama.abstractions.icontextparams/" class="md-nav__link">
  418. llama.abstractions.icontextparams
  419. </a>
  420. </li>
  421. <li class="md-nav__item">
  422. <a href="../../xmldocs/llama.abstractions.ihistorytransform/" class="md-nav__link">
  423. llama.abstractions.ihistorytransform
  424. </a>
  425. </li>
  426. <li class="md-nav__item">
  427. <a href="../../xmldocs/llama.abstractions.iinferenceparams/" class="md-nav__link">
  428. llama.abstractions.iinferenceparams
  429. </a>
  430. </li>
  431. <li class="md-nav__item">
  432. <a href="../../xmldocs/llama.abstractions.illamaexecutor/" class="md-nav__link">
  433. llama.abstractions.illamaexecutor
  434. </a>
  435. </li>
  436. <li class="md-nav__item">
  437. <a href="../../xmldocs/llama.abstractions.illamaparams/" class="md-nav__link">
  438. llama.abstractions.illamaparams
  439. </a>
  440. </li>
  441. <li class="md-nav__item">
  442. <a href="../../xmldocs/llama.abstractions.imodelparams/" class="md-nav__link">
  443. llama.abstractions.imodelparams
  444. </a>
  445. </li>
  446. <li class="md-nav__item">
  447. <a href="../../xmldocs/llama.abstractions.itextstreamtransform/" class="md-nav__link">
  448. llama.abstractions.itextstreamtransform
  449. </a>
  450. </li>
  451. <li class="md-nav__item">
  452. <a href="../../xmldocs/llama.abstractions.itexttransform/" class="md-nav__link">
  453. llama.abstractions.itexttransform
  454. </a>
  455. </li>
  456. <li class="md-nav__item">
  457. <a href="../../xmldocs/llama.abstractions.loraadapter/" class="md-nav__link">
  458. llama.abstractions.loraadapter
  459. </a>
  460. </li>
  461. <li class="md-nav__item">
  462. <a href="../../xmldocs/llama.abstractions.metadataoverride/" class="md-nav__link">
  463. llama.abstractions.metadataoverride
  464. </a>
  465. </li>
  466. <li class="md-nav__item">
  467. <a href="../../xmldocs/llama.abstractions.metadataoverrideconverter/" class="md-nav__link">
  468. llama.abstractions.metadataoverrideconverter
  469. </a>
  470. </li>
  471. <li class="md-nav__item">
  472. <a href="../../xmldocs/llama.abstractions.tensorsplitscollection/" class="md-nav__link">
  473. llama.abstractions.tensorsplitscollection
  474. </a>
  475. </li>
  476. <li class="md-nav__item">
  477. <a href="../../xmldocs/llama.abstractions.tensorsplitscollectionconverter/" class="md-nav__link">
  478. llama.abstractions.tensorsplitscollectionconverter
  479. </a>
  480. </li>
  481. <li class="md-nav__item">
  482. <a href="../../xmldocs/llama.antipromptprocessor/" class="md-nav__link">
  483. llama.antipromptprocessor
  484. </a>
  485. </li>
  486. <li class="md-nav__item">
  487. <a href="../../xmldocs/llama.batched.alreadypromptedconversationexception/" class="md-nav__link">
  488. llama.batched.alreadypromptedconversationexception
  489. </a>
  490. </li>
  491. <li class="md-nav__item">
  492. <a href="../../xmldocs/llama.batched.batchedexecutor/" class="md-nav__link">
  493. llama.batched.batchedexecutor
  494. </a>
  495. </li>
  496. <li class="md-nav__item">
  497. <a href="../../xmldocs/llama.batched.cannotforkwhilerequiresinferenceexception/" class="md-nav__link">
  498. llama.batched.cannotforkwhilerequiresinferenceexception
  499. </a>
  500. </li>
  501. <li class="md-nav__item">
  502. <a href="../../xmldocs/llama.batched.cannotmodifywhilerequiresinferenceexception/" class="md-nav__link">
  503. llama.batched.cannotmodifywhilerequiresinferenceexception
  504. </a>
  505. </li>
  506. <li class="md-nav__item">
  507. <a href="../../xmldocs/llama.batched.cannotsamplerequiresinferenceexception/" class="md-nav__link">
  508. llama.batched.cannotsamplerequiresinferenceexception
  509. </a>
  510. </li>
  511. <li class="md-nav__item">
  512. <a href="../../xmldocs/llama.batched.cannotsamplerequirespromptexception/" class="md-nav__link">
  513. llama.batched.cannotsamplerequirespromptexception
  514. </a>
  515. </li>
  516. <li class="md-nav__item">
  517. <a href="../../xmldocs/llama.batched.conversation/" class="md-nav__link">
  518. llama.batched.conversation
  519. </a>
  520. </li>
  521. <li class="md-nav__item">
  522. <a href="../../xmldocs/llama.batched.conversationextensions/" class="md-nav__link">
  523. llama.batched.conversationextensions
  524. </a>
  525. </li>
  526. <li class="md-nav__item">
  527. <a href="../../xmldocs/llama.batched.experimentalbatchedexecutorexception/" class="md-nav__link">
  528. llama.batched.experimentalbatchedexecutorexception
  529. </a>
  530. </li>
  531. <li class="md-nav__item">
  532. <a href="../../xmldocs/llama.chatsession-1/" class="md-nav__link">
  533. llama.chatsession-1
  534. </a>
  535. </li>
  536. <li class="md-nav__item">
  537. <a href="../../xmldocs/llama.chatsession/" class="md-nav__link">
  538. llama.chatsession
  539. </a>
  540. </li>
  541. <li class="md-nav__item">
  542. <a href="../../xmldocs/llama.common.authorrole/" class="md-nav__link">
  543. llama.common.authorrole
  544. </a>
  545. </li>
  546. <li class="md-nav__item">
  547. <a href="../../xmldocs/llama.common.chathistory/" class="md-nav__link">
  548. llama.common.chathistory
  549. </a>
  550. </li>
  551. <li class="md-nav__item">
  552. <a href="../../xmldocs/llama.common.fixedsizequeue-1/" class="md-nav__link">
  553. llama.common.fixedsizequeue-1
  554. </a>
  555. </li>
  556. <li class="md-nav__item">
  557. <a href="../../xmldocs/llama.common.inferenceparams/" class="md-nav__link">
  558. llama.common.inferenceparams
  559. </a>
  560. </li>
  561. <li class="md-nav__item">
  562. <a href="../../xmldocs/llama.common.mirostattype/" class="md-nav__link">
  563. llama.common.mirostattype
  564. </a>
  565. </li>
  566. <li class="md-nav__item">
  567. <a href="../../xmldocs/llama.common.modelparams/" class="md-nav__link">
  568. llama.common.modelparams
  569. </a>
  570. </li>
  571. <li class="md-nav__item">
  572. <a href="../../xmldocs/llama.exceptions.grammarexpectedname/" class="md-nav__link">
  573. llama.exceptions.grammarexpectedname
  574. </a>
  575. </li>
  576. <li class="md-nav__item">
  577. <a href="../../xmldocs/llama.exceptions.grammarexpectednext/" class="md-nav__link">
  578. llama.exceptions.grammarexpectednext
  579. </a>
  580. </li>
  581. <li class="md-nav__item">
  582. <a href="../../xmldocs/llama.exceptions.grammarexpectedprevious/" class="md-nav__link">
  583. llama.exceptions.grammarexpectedprevious
  584. </a>
  585. </li>
  586. <li class="md-nav__item">
  587. <a href="../../xmldocs/llama.exceptions.grammarformatexception/" class="md-nav__link">
  588. llama.exceptions.grammarformatexception
  589. </a>
  590. </li>
  591. <li class="md-nav__item">
  592. <a href="../../xmldocs/llama.exceptions.grammarunexpectedcharaltelement/" class="md-nav__link">
  593. llama.exceptions.grammarunexpectedcharaltelement
  594. </a>
  595. </li>
  596. <li class="md-nav__item">
  597. <a href="../../xmldocs/llama.exceptions.grammarunexpectedcharrngelement/" class="md-nav__link">
  598. llama.exceptions.grammarunexpectedcharrngelement
  599. </a>
  600. </li>
  601. <li class="md-nav__item">
  602. <a href="../../xmldocs/llama.exceptions.grammarunexpectedendelement/" class="md-nav__link">
  603. llama.exceptions.grammarunexpectedendelement
  604. </a>
  605. </li>
  606. <li class="md-nav__item">
  607. <a href="../../xmldocs/llama.exceptions.grammarunexpectedendofinput/" class="md-nav__link">
  608. llama.exceptions.grammarunexpectedendofinput
  609. </a>
  610. </li>
  611. <li class="md-nav__item">
  612. <a href="../../xmldocs/llama.exceptions.grammarunexpectedhexcharscount/" class="md-nav__link">
  613. llama.exceptions.grammarunexpectedhexcharscount
  614. </a>
  615. </li>
  616. <li class="md-nav__item">
  617. <a href="../../xmldocs/llama.exceptions.grammarunknownescapecharacter/" class="md-nav__link">
  618. llama.exceptions.grammarunknownescapecharacter
  619. </a>
  620. </li>
  621. <li class="md-nav__item">
  622. <a href="../../xmldocs/llama.exceptions.llamadecodeerror/" class="md-nav__link">
  623. llama.exceptions.llamadecodeerror
  624. </a>
  625. </li>
  626. <li class="md-nav__item">
  627. <a href="../../xmldocs/llama.exceptions.loadweightsfailedexception/" class="md-nav__link">
  628. llama.exceptions.loadweightsfailedexception
  629. </a>
  630. </li>
  631. <li class="md-nav__item">
  632. <a href="../../xmldocs/llama.exceptions.runtimeerror/" class="md-nav__link">
  633. llama.exceptions.runtimeerror
  634. </a>
  635. </li>
  636. <li class="md-nav__item">
  637. <a href="../../xmldocs/llama.extensions.icontextparamsextensions/" class="md-nav__link">
  638. llama.extensions.icontextparamsextensions
  639. </a>
  640. </li>
  641. <li class="md-nav__item">
  642. <a href="../../xmldocs/llama.extensions.imodelparamsextensions/" class="md-nav__link">
  643. llama.extensions.imodelparamsextensions
  644. </a>
  645. </li>
  646. <li class="md-nav__item">
  647. <a href="../../xmldocs/llama.grammars.grammar/" class="md-nav__link">
  648. llama.grammars.grammar
  649. </a>
  650. </li>
  651. <li class="md-nav__item">
  652. <a href="../../xmldocs/llama.grammars.grammarrule/" class="md-nav__link">
  653. llama.grammars.grammarrule
  654. </a>
  655. </li>
  656. <li class="md-nav__item">
  657. <a href="../../xmldocs/llama.ichatmodel/" class="md-nav__link">
  658. llama.ichatmodel
  659. </a>
  660. </li>
  661. <li class="md-nav__item">
  662. <a href="../../xmldocs/llama.llamacache/" class="md-nav__link">
  663. llama.llamacache
  664. </a>
  665. </li>
  666. <li class="md-nav__item">
  667. <a href="../../xmldocs/llama.llamaembedder/" class="md-nav__link">
  668. llama.llamaembedder
  669. </a>
  670. </li>
  671. <li class="md-nav__item">
  672. <a href="../../xmldocs/llama.llamamodel/" class="md-nav__link">
  673. llama.llamamodel
  674. </a>
  675. </li>
  676. <li class="md-nav__item">
  677. <a href="../../xmldocs/llama.llamamodelv1/" class="md-nav__link">
  678. llama.llamamodelv1
  679. </a>
  680. </li>
  681. <li class="md-nav__item">
  682. <a href="../../xmldocs/llama.llamaparams/" class="md-nav__link">
  683. llama.llamaparams
  684. </a>
  685. </li>
  686. <li class="md-nav__item">
  687. <a href="../../xmldocs/llama.llamaquantizer/" class="md-nav__link">
  688. llama.llamaquantizer
  689. </a>
  690. </li>
  691. <li class="md-nav__item">
  692. <a href="../../xmldocs/llama.llamastate/" class="md-nav__link">
  693. llama.llamastate
  694. </a>
  695. </li>
  696. <li class="md-nav__item">
  697. <a href="../../xmldocs/llama.llamatransforms/" class="md-nav__link">
  698. llama.llamatransforms
  699. </a>
  700. </li>
  701. <li class="md-nav__item">
  702. <a href="../../xmldocs/llama.llavaweights/" class="md-nav__link">
  703. llama.llavaweights
  704. </a>
  705. </li>
  706. <li class="md-nav__item">
  707. <a href="../../xmldocs/llama.native.decoderesult/" class="md-nav__link">
  708. llama.native.decoderesult
  709. </a>
  710. </li>
  711. <li class="md-nav__item">
  712. <a href="../../xmldocs/llama.native.ggmltype/" class="md-nav__link">
  713. llama.native.ggmltype
  714. </a>
  715. </li>
  716. <li class="md-nav__item">
  717. <a href="../../xmldocs/llama.native.gpusplitmode/" class="md-nav__link">
  718. llama.native.gpusplitmode
  719. </a>
  720. </li>
  721. <li class="md-nav__item">
  722. <a href="../../xmldocs/llama.native.llamabatch/" class="md-nav__link">
  723. llama.native.llamabatch
  724. </a>
  725. </li>
  726. <li class="md-nav__item">
  727. <a href="../../xmldocs/llama.native.llamabeamsstate/" class="md-nav__link">
  728. llama.native.llamabeamsstate
  729. </a>
  730. </li>
  731. <li class="md-nav__item">
  732. <a href="../../xmldocs/llama.native.llamabeamview/" class="md-nav__link">
  733. llama.native.llamabeamview
  734. </a>
  735. </li>
  736. <li class="md-nav__item">
  737. <a href="../../xmldocs/llama.native.llamachatmessage/" class="md-nav__link">
  738. llama.native.llamachatmessage
  739. </a>
  740. </li>
  741. <li class="md-nav__item">
  742. <a href="../../xmldocs/llama.native.llamacontextparams/" class="md-nav__link">
  743. llama.native.llamacontextparams
  744. </a>
  745. </li>
  746. <li class="md-nav__item">
  747. <a href="../../xmldocs/llama.native.llamaftype/" class="md-nav__link">
  748. llama.native.llamaftype
  749. </a>
  750. </li>
  751. <li class="md-nav__item">
  752. <a href="../../xmldocs/llama.native.llamagrammarelement/" class="md-nav__link">
  753. llama.native.llamagrammarelement
  754. </a>
  755. </li>
  756. <li class="md-nav__item">
  757. <a href="../../xmldocs/llama.native.llamagrammarelementtype/" class="md-nav__link">
  758. llama.native.llamagrammarelementtype
  759. </a>
  760. </li>
  761. <li class="md-nav__item">
  762. <a href="../../xmldocs/llama.native.llamakvcacheview/" class="md-nav__link">
  763. llama.native.llamakvcacheview
  764. </a>
  765. </li>
  766. <li class="md-nav__item">
  767. <a href="../../xmldocs/llama.native.llamakvcacheviewcell/" class="md-nav__link">
  768. llama.native.llamakvcacheviewcell
  769. </a>
  770. </li>
  771. <li class="md-nav__item">
  772. <a href="../../xmldocs/llama.native.llamakvcacheviewsafehandle/" class="md-nav__link">
  773. llama.native.llamakvcacheviewsafehandle
  774. </a>
  775. </li>
  776. <li class="md-nav__item">
  777. <a href="../../xmldocs/llama.native.llamaloglevel/" class="md-nav__link">
  778. llama.native.llamaloglevel
  779. </a>
  780. </li>
  781. <li class="md-nav__item">
  782. <a href="../../xmldocs/llama.native.llamamodelkvoverridetype/" class="md-nav__link">
  783. llama.native.llamamodelkvoverridetype
  784. </a>
  785. </li>
  786. <li class="md-nav__item">
  787. <a href="../../xmldocs/llama.native.llamamodelmetadataoverride/" class="md-nav__link">
  788. llama.native.llamamodelmetadataoverride
  789. </a>
  790. </li>
  791. <li class="md-nav__item">
  792. <a href="../../xmldocs/llama.native.llamamodelparams/" class="md-nav__link">
  793. llama.native.llamamodelparams
  794. </a>
  795. </li>
  796. <li class="md-nav__item">
  797. <a href="../../xmldocs/llama.native.llamamodelquantizeparams/" class="md-nav__link">
  798. llama.native.llamamodelquantizeparams
  799. </a>
  800. </li>
  801. <li class="md-nav__item">
  802. <a href="../../xmldocs/llama.native.llamanativebatch/" class="md-nav__link">
  803. llama.native.llamanativebatch
  804. </a>
  805. </li>
  806. <li class="md-nav__item">
  807. <a href="../../xmldocs/llama.native.llamapoolingtype/" class="md-nav__link">
  808. llama.native.llamapoolingtype
  809. </a>
  810. </li>
  811. <li class="md-nav__item">
  812. <a href="../../xmldocs/llama.native.llamapos/" class="md-nav__link">
  813. llama.native.llamapos
  814. </a>
  815. </li>
  816. <li class="md-nav__item">
  817. <a href="../../xmldocs/llama.native.llamaropetype/" class="md-nav__link">
  818. llama.native.llamaropetype
  819. </a>
  820. </li>
  821. <li class="md-nav__item">
  822. <a href="../../xmldocs/llama.native.llamaseqid/" class="md-nav__link">
  823. llama.native.llamaseqid
  824. </a>
  825. </li>
  826. <li class="md-nav__item">
  827. <a href="../../xmldocs/llama.native.llamatoken/" class="md-nav__link">
  828. llama.native.llamatoken
  829. </a>
  830. </li>
  831. <li class="md-nav__item">
  832. <a href="../../xmldocs/llama.native.llamatokendata/" class="md-nav__link">
  833. llama.native.llamatokendata
  834. </a>
  835. </li>
  836. <li class="md-nav__item">
  837. <a href="../../xmldocs/llama.native.llamatokendataarray/" class="md-nav__link">
  838. llama.native.llamatokendataarray
  839. </a>
  840. </li>
  841. <li class="md-nav__item">
  842. <a href="../../xmldocs/llama.native.llamatokendataarraynative/" class="md-nav__link">
  843. llama.native.llamatokendataarraynative
  844. </a>
  845. </li>
  846. <li class="md-nav__item">
  847. <a href="../../xmldocs/llama.native.llamatokentype/" class="md-nav__link">
  848. llama.native.llamatokentype
  849. </a>
  850. </li>
  851. <li class="md-nav__item">
  852. <a href="../../xmldocs/llama.native.llamavocabtype/" class="md-nav__link">
  853. llama.native.llamavocabtype
  854. </a>
  855. </li>
  856. <li class="md-nav__item">
  857. <a href="../../xmldocs/llama.native.llavaimageembed/" class="md-nav__link">
  858. llama.native.llavaimageembed
  859. </a>
  860. </li>
  861. <li class="md-nav__item">
  862. <a href="../../xmldocs/llama.native.nativeapi/" class="md-nav__link">
  863. llama.native.nativeapi
  864. </a>
  865. </li>
  866. <li class="md-nav__item">
  867. <a href="../../xmldocs/llama.native.nativelibraryconfig/" class="md-nav__link">
  868. llama.native.nativelibraryconfig
  869. </a>
  870. </li>
  871. <li class="md-nav__item">
  872. <a href="../../xmldocs/llama.native.ropescalingtype/" class="md-nav__link">
  873. llama.native.ropescalingtype
  874. </a>
  875. </li>
  876. <li class="md-nav__item">
  877. <a href="../../xmldocs/llama.native.safellamacontexthandle/" class="md-nav__link">
  878. llama.native.safellamacontexthandle
  879. </a>
  880. </li>
  881. <li class="md-nav__item">
  882. <a href="../../xmldocs/llama.native.safellamagrammarhandle/" class="md-nav__link">
  883. llama.native.safellamagrammarhandle
  884. </a>
  885. </li>
  886. <li class="md-nav__item">
  887. <a href="../../xmldocs/llama.native.safellamahandlebase/" class="md-nav__link">
  888. llama.native.safellamahandlebase
  889. </a>
  890. </li>
  891. <li class="md-nav__item">
  892. <a href="../../xmldocs/llama.native.safellamamodelhandle/" class="md-nav__link">
  893. llama.native.safellamamodelhandle
  894. </a>
  895. </li>
  896. <li class="md-nav__item">
  897. <a href="../../xmldocs/llama.native.safellavaimageembedhandle/" class="md-nav__link">
  898. llama.native.safellavaimageembedhandle
  899. </a>
  900. </li>
  901. <li class="md-nav__item">
  902. <a href="../../xmldocs/llama.native.safellavamodelhandle/" class="md-nav__link">
  903. llama.native.safellavamodelhandle
  904. </a>
  905. </li>
  906. <li class="md-nav__item">
  907. <a href="../../xmldocs/llama.quantizer/" class="md-nav__link">
  908. llama.quantizer
  909. </a>
  910. </li>
  911. <li class="md-nav__item">
  912. <a href="../../xmldocs/llama.sampling.basesamplingpipeline/" class="md-nav__link">
  913. llama.sampling.basesamplingpipeline
  914. </a>
  915. </li>
  916. <li class="md-nav__item">
  917. <a href="../../xmldocs/llama.sampling.defaultsamplingpipeline/" class="md-nav__link">
  918. llama.sampling.defaultsamplingpipeline
  919. </a>
  920. </li>
  921. <li class="md-nav__item">
  922. <a href="../../xmldocs/llama.sampling.greedysamplingpipeline/" class="md-nav__link">
  923. llama.sampling.greedysamplingpipeline
  924. </a>
  925. </li>
  926. <li class="md-nav__item">
  927. <a href="../../xmldocs/llama.sampling.isamplingpipeline/" class="md-nav__link">
  928. llama.sampling.isamplingpipeline
  929. </a>
  930. </li>
  931. <li class="md-nav__item">
  932. <a href="../../xmldocs/llama.sampling.isamplingpipelineextensions/" class="md-nav__link">
  933. llama.sampling.isamplingpipelineextensions
  934. </a>
  935. </li>
  936. <li class="md-nav__item">
  937. <a href="../../xmldocs/llama.sampling.mirostate2samplingpipeline/" class="md-nav__link">
  938. llama.sampling.mirostate2samplingpipeline
  939. </a>
  940. </li>
  941. <li class="md-nav__item">
  942. <a href="../../xmldocs/llama.sampling.mirostatesamplingpipeline/" class="md-nav__link">
  943. llama.sampling.mirostatesamplingpipeline
  944. </a>
  945. </li>
  946. <li class="md-nav__item">
  947. <a href="../../xmldocs/llama.sessionstate/" class="md-nav__link">
  948. llama.sessionstate
  949. </a>
  950. </li>
  951. <li class="md-nav__item">
  952. <a href="../../xmldocs/llama.streamingtokendecoder/" class="md-nav__link">
  953. llama.streamingtokendecoder
  954. </a>
  955. </li>
  956. <li class="md-nav__item">
  957. <a href="../../xmldocs/llama.types.chatcompletion/" class="md-nav__link">
  958. llama.types.chatcompletion
  959. </a>
  960. </li>
  961. <li class="md-nav__item">
  962. <a href="../../xmldocs/llama.types.chatcompletionchoice/" class="md-nav__link">
  963. llama.types.chatcompletionchoice
  964. </a>
  965. </li>
  966. <li class="md-nav__item">
  967. <a href="../../xmldocs/llama.types.chatcompletionchunk/" class="md-nav__link">
  968. llama.types.chatcompletionchunk
  969. </a>
  970. </li>
  971. <li class="md-nav__item">
  972. <a href="../../xmldocs/llama.types.chatcompletionchunkchoice/" class="md-nav__link">
  973. llama.types.chatcompletionchunkchoice
  974. </a>
  975. </li>
  976. <li class="md-nav__item">
  977. <a href="../../xmldocs/llama.types.chatcompletionchunkdelta/" class="md-nav__link">
  978. llama.types.chatcompletionchunkdelta
  979. </a>
  980. </li>
  981. <li class="md-nav__item">
  982. <a href="../../xmldocs/llama.types.chatcompletionmessage/" class="md-nav__link">
  983. llama.types.chatcompletionmessage
  984. </a>
  985. </li>
  986. <li class="md-nav__item">
  987. <a href="../../xmldocs/llama.types.chatmessagerecord/" class="md-nav__link">
  988. llama.types.chatmessagerecord
  989. </a>
  990. </li>
  991. <li class="md-nav__item">
  992. <a href="../../xmldocs/llama.types.chatrole/" class="md-nav__link">
  993. llama.types.chatrole
  994. </a>
  995. </li>
  996. <li class="md-nav__item">
  997. <a href="../../xmldocs/llama.types.completion/" class="md-nav__link">
  998. llama.types.completion
  999. </a>
  1000. </li>
  1001. <li class="md-nav__item">
  1002. <a href="../../xmldocs/llama.types.completionchoice/" class="md-nav__link">
  1003. llama.types.completionchoice
  1004. </a>
  1005. </li>
  1006. <li class="md-nav__item">
  1007. <a href="../../xmldocs/llama.types.completionchunk/" class="md-nav__link">
  1008. llama.types.completionchunk
  1009. </a>
  1010. </li>
  1011. <li class="md-nav__item">
  1012. <a href="../../xmldocs/llama.types.completionlogprobs/" class="md-nav__link">
  1013. llama.types.completionlogprobs
  1014. </a>
  1015. </li>
  1016. <li class="md-nav__item">
  1017. <a href="../../xmldocs/llama.types.completionusage/" class="md-nav__link">
  1018. llama.types.completionusage
  1019. </a>
  1020. </li>
  1021. <li class="md-nav__item">
  1022. <a href="../../xmldocs/llama.types.embedding/" class="md-nav__link">
  1023. llama.types.embedding
  1024. </a>
  1025. </li>
  1026. <li class="md-nav__item">
  1027. <a href="../../xmldocs/llama.types.embeddingdata/" class="md-nav__link">
  1028. llama.types.embeddingdata
  1029. </a>
  1030. </li>
  1031. <li class="md-nav__item">
  1032. <a href="../../xmldocs/llama.types.embeddingusage/" class="md-nav__link">
  1033. llama.types.embeddingusage
  1034. </a>
  1035. </li>
  1036. <li class="md-nav__item">
  1037. <a href="../../xmldocs/logger/" class="md-nav__link">
  1038. logger
  1039. </a>
  1040. </li>
  1041. </ul>
  1042. </nav>
  1043. </li>
  1044. </ul>
  1045. </nav>
  1046. </div>
  1047. </div>
  1048. </div>
  1049. <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
  1050. <div class="md-sidebar__scrollwrap">
  1051. <div class="md-sidebar__inner">
  1052. <nav class="md-nav md-nav--secondary" aria-label="Table of contents">
  1053. <label class="md-nav__title" for="__toc">
  1054. <span class="md-nav__icon md-icon"></span>
  1055. Table of contents
  1056. </label>
  1057. <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
  1058. <li class="md-nav__item">
  1059. <a href="#text-to-text-apis-of-the-executors" class="md-nav__link">
  1060. Text-to-Text APIs of the executors
  1061. </a>
  1062. </li>
  1063. <li class="md-nav__item">
  1064. <a href="#interactiveexecutor--instructexecutor" class="md-nav__link">
  1065. InteractiveExecutor &amp; InstructExecutor
  1066. </a>
  1067. </li>
  1068. <li class="md-nav__item">
  1069. <a href="#statelessexecutor" class="md-nav__link">
  1070. StatelessExecutor.
  1071. </a>
  1072. </li>
  1073. <li class="md-nav__item">
  1074. <a href="#batchedexecutor" class="md-nav__link">
  1075. BatchedExecutor
  1076. </a>
  1077. </li>
  1078. <li class="md-nav__item">
  1079. <a href="#inference-parameters" class="md-nav__link">
  1080. Inference parameters
  1081. </a>
  1082. </li>
  1083. <li class="md-nav__item">
  1084. <a href="#save-and-load-executor-state" class="md-nav__link">
  1085. Save and load executor state
  1086. </a>
  1087. </li>
  1088. </ul>
  1089. </nav>
  1090. </div>
  1091. </div>
  1092. </div>
  1093. <div class="md-content" data-md-component="content">
  1094. <article class="md-content__inner md-typeset">
  1095. <h1 id="llamasharp-executors">LLamaSharp executors<a class="headerlink" href="#llamasharp-executors" title="Permanent link"></a></h1>
  1096. <p>LLamaSharp executor defines the behavior of the model when it is called. Currently, there are four kinds of executors, which are <code>InteractiveExecutor</code>, <code>InstructExecutor</code>, <code>StatelessExecutor</code> and <code>BatchedExecutor</code>.</p>
  1097. <p>In a word, <code>InteractiveExecutor</code> is suitable for getting answer of your questions from LLM continuously. <code>InstructExecutor</code> let LLM execute your instructions, such as "continue writing". <code>StatelessExecutor</code> is best for one-time job because the previous inference has no impact on the current inference. <code>BatchedExecutor</code> could accept multiple inputs and generate multiple outputs of different sessions at the same time, significantly improving the throughput of the program.</p>
  1098. <h2 id="text-to-text-apis-of-the-executors">Text-to-Text APIs of the executors<a class="headerlink" href="#text-to-text-apis-of-the-executors" title="Permanent link"></a></h2>
  1099. <p>All the executors implements the interface <code>ILLamaExecutor</code>, which provides two APIs to execute text-to-text tasks.</p>
  1100. <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 1</span>
  1101. <span class="normal"> 2</span>
  1102. <span class="normal"> 3</span>
  1103. <span class="normal"> 4</span>
  1104. <span class="normal"> 5</span>
  1105. <span class="normal"> 6</span>
  1106. <span class="normal"> 7</span>
  1107. <span class="normal"> 8</span>
  1108. <span class="normal"> 9</span>
  1109. <span class="normal">10</span>
  1110. <span class="normal">11</span>
  1111. <span class="normal">12</span>
  1112. <span class="normal">13</span>
  1113. <span class="normal">14</span>
  1114. <span class="normal">15</span>
  1115. <span class="normal">16</span>
  1116. <span class="normal">17</span>
  1117. <span class="normal">18</span>
  1118. <span class="normal">19</span>
  1119. <span class="normal">20</span>
  1120. <span class="normal">21</span>
  1121. <span class="normal">22</span>
  1122. <span class="normal">23</span>
  1123. <span class="normal">24</span>
  1124. <span class="normal">25</span>
  1125. <span class="normal">26</span>
  1126. <span class="normal">27</span>
  1127. <span class="normal">28</span>
  1128. <span class="normal">29</span>
  1129. <span class="normal">30</span>
  1130. <span class="normal">31</span>
  1131. <span class="normal">32</span>
  1132. <span class="normal">33</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">public</span><span class="w"> </span><span class="k">interface</span><span class="w"> </span><span class="n">ILLamaExecutor</span>
  1133. <span class="p">{</span>
  1134. <span class="w"> </span><span class="c1">/// &lt;summary&gt;</span>
  1135. <span class="w"> </span><span class="c1">/// The loaded context for this executor.</span>
  1136. <span class="w"> </span><span class="c1">/// &lt;/summary&gt;</span>
  1137. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">LLamaContext</span><span class="w"> </span><span class="n">Context</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="p">}</span>
  1138. <span class="w"> </span><span class="c1">// LLava Section</span>
  1139. <span class="w"> </span><span class="c1">//</span>
  1140. <span class="w"> </span><span class="c1">/// &lt;summary&gt;</span>
  1141. <span class="w"> </span><span class="c1">/// Identify if it&#39;s a multi-modal model and there is a image to process.</span>
  1142. <span class="w"> </span><span class="c1">/// &lt;/summary&gt;</span>
  1143. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">bool</span><span class="w"> </span><span class="n">IsMultiModal</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="p">}</span>
  1144. <span class="w"> </span><span class="c1">/// &lt;summary&gt;</span>
  1145. <span class="w"> </span><span class="c1">/// Muti-Modal Projections / Clip Model weights</span>
  1146. <span class="w"> </span><span class="c1">/// &lt;/summary&gt;</span>
  1147. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">LLavaWeights</span><span class="o">?</span><span class="w"> </span><span class="n">ClipModel</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span>
  1148. <span class="w"> </span><span class="c1">/// &lt;summary&gt;</span>
  1149. <span class="w"> </span><span class="c1">/// List of images: Image filename and path (jpeg images).</span>
  1150. <span class="w"> </span><span class="c1">/// &lt;/summary&gt;</span>
  1151. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">List</span><span class="o">&lt;</span><span class="kt">string</span><span class="o">&gt;</span><span class="w"> </span><span class="n">ImagePaths</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span>
  1152. <span class="w"> </span><span class="c1">/// &lt;summary&gt;</span>
  1153. <span class="w"> </span><span class="c1">/// Asynchronously infers a response from the model.</span>
  1154. <span class="w"> </span><span class="c1">/// &lt;/summary&gt;</span>
  1155. <span class="w"> </span><span class="c1">/// &lt;param name=&quot;text&quot;&gt;Your prompt&lt;/param&gt;</span>
  1156. <span class="w"> </span><span class="c1">/// &lt;param name=&quot;inferenceParams&quot;&gt;Any additional parameters&lt;/param&gt;</span>
  1157. <span class="w"> </span><span class="c1">/// &lt;param name=&quot;token&quot;&gt;A cancellation token.&lt;/param&gt;</span>
  1158. <span class="w"> </span><span class="c1">/// &lt;returns&gt;&lt;/returns&gt;</span>
  1159. <span class="w"> </span><span class="n">IAsyncEnumerable</span><span class="o">&lt;</span><span class="kt">string</span><span class="o">&gt;</span><span class="w"> </span><span class="n">InferAsync</span><span class="p">(</span><span class="kt">string</span><span class="w"> </span><span class="n">text</span><span class="p">,</span><span class="w"> </span><span class="n">IInferenceParams</span><span class="o">?</span><span class="w"> </span><span class="n">inferenceParams</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">null</span><span class="p">,</span><span class="w"> </span><span class="n">CancellationToken</span><span class="w"> </span><span class="n">token</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">default</span><span class="p">);</span>
  1160. <span class="p">}</span>
  1161. </code></pre></div></td></tr></table></div>
  1162. <p>The output of both two APIs are <strong>yield enumerable</strong>. Therefore, when receiving the output, you can directly use <code>foreach</code> to take actions on each word you get by order, instead of waiting for the whole process completed.</p>
  1163. <h2 id="interactiveexecutor--instructexecutor">InteractiveExecutor &amp; InstructExecutor<a class="headerlink" href="#interactiveexecutor--instructexecutor" title="Permanent link"></a></h2>
  1164. <p>Both of them are taking "completing the prompt" as the goal to generate the response. For example, if you input <code>Long long ago, there was a fox who wanted to make friend with humen. One day</code>, then the LLM will continue to write the story.</p>
  1165. <p>Under interactive mode, you serve a role of user and the LLM serves the role of assistant. Then it will help you with your question or request. </p>
  1166. <p>Under instruct mode, you give LLM some instructions and it follows.</p>
  1167. <p>Though the behaviors of them sounds similar, it could introduce many differences depending on your prompt. For example, "chat-with-bob" has good performance under interactive mode and <code>alpaca</code> does well with instruct mode.</p>
  1168. <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
  1169. <span class="normal">2</span>
  1170. <span class="normal">3</span>
  1171. <span class="normal">4</span>
  1172. <span class="normal">5</span>
  1173. <span class="normal">6</span>
  1174. <span class="normal">7</span>
  1175. <span class="normal">8</span>
  1176. <span class="normal">9</span></pre></div></td><td class="code"><div><pre><span></span><code>// chat-with-bob
  1177. Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User&#39;s requests immediately and with precision.
  1178. User: Hello, Bob.
  1179. Bob: Hello. How may I help you today?
  1180. User: Please tell me the largest city in Europe.
  1181. Bob: Sure. The largest city in Europe is Moscow, the capital of Russia.
  1182. User:
  1183. </code></pre></div></td></tr></table></div>
  1184. <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
  1185. <span class="normal">2</span>
  1186. <span class="normal">3</span></pre></div></td><td class="code"><div><pre><span></span><code>// alpaca
  1187. Below is an instruction that describes a task. Write a response that appropriately completes the request.
  1188. </code></pre></div></td></tr></table></div>
  1189. <p>Therefore, please modify the prompt correspondingly when switching from one mode to the other.</p>
  1190. <h2 id="statelessexecutor">StatelessExecutor.<a class="headerlink" href="#statelessexecutor" title="Permanent link"></a></h2>
  1191. <p>Despite the differences between interactive mode and instruct mode, both of them are stateful mode. That is, your previous question/instruction will impact on the current response from LLM. On the contrary, the stateless executor does not have such a "memory". No matter how many times you talk to it, it will only concentrate on what you say in this time. It is very useful when you want a clean context, without being affected by previous inputs.</p>
  1192. <p>Since the stateless executor has no memory of conversations before, you need to input your question with the whole prompt into it to get the better answer.</p>
  1193. <p>For example, if you feed <code>Q: Who is Trump? A:</code> to the stateless executor, it may give the following answer with the antiprompt <code>Q:</code>.</p>
  1194. <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
  1195. <span class="normal">2</span>
  1196. <span class="normal">3</span>
  1197. <span class="normal">4</span>
  1198. <span class="normal">5</span></pre></div></td><td class="code"><div><pre><span></span><code>Donald J. Trump, born June 14, 1946, is an American businessman, television personality, politician and the 45th President of the United States (2017-2021). # Anexo:Torneo de Hamburgo 2022 (individual masculino)
  1199. ## Presentación previa
  1200. * Defensor del título: Daniil Medvédev
  1201. </code></pre></div></td></tr></table></div>
  1202. <p>It seems that things went well at first. However, after answering the question itself, LLM began to talk about some other things until the answer reached the token count limit. The reason of this strange behavior is the anti-prompt cannot be match. With the input, LLM cannot decide whether to append a string "A: " at the end of the response.</p>
  1203. <p>As an improvement, let's take the following text as the input:</p>
  1204. <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>Q: What is the capital of the USA? A: Washingtong. Q: What is the sum of 1 and 2? A: 3. Q: Who is Trump? A:
  1205. </code></pre></div></td></tr></table></div>
  1206. <p>Then, I got the following answer with the anti-prompt <code>Q:</code>.</p>
  1207. <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>45th president of the United States.
  1208. </code></pre></div></td></tr></table></div>
  1209. <p>At this time, by repeating the same mode of <code>Q: xxx? A: xxx.</code>, LLM outputs the anti-prompt we want to help to decide where to stop the generation.</p>
  1210. <h2 id="batchedexecutor">BatchedExecutor<a class="headerlink" href="#batchedexecutor" title="Permanent link"></a></h2>
  1211. <p>Different from other executors, <code>BatchedExecutor</code> could accept multiple inputs from different sessions and geneate outputs for them at the same time. Here is an example to use it.</p>
  1212. <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 1</span>
  1213. <span class="normal"> 2</span>
  1214. <span class="normal"> 3</span>
  1215. <span class="normal"> 4</span>
  1216. <span class="normal"> 5</span>
  1217. <span class="normal"> 6</span>
  1218. <span class="normal"> 7</span>
  1219. <span class="normal"> 8</span>
  1220. <span class="normal"> 9</span>
  1221. <span class="normal"> 10</span>
  1222. <span class="normal"> 11</span>
  1223. <span class="normal"> 12</span>
  1224. <span class="normal"> 13</span>
  1225. <span class="normal"> 14</span>
  1226. <span class="normal"> 15</span>
  1227. <span class="normal"> 16</span>
  1228. <span class="normal"> 17</span>
  1229. <span class="normal"> 18</span>
  1230. <span class="normal"> 19</span>
  1231. <span class="normal"> 20</span>
  1232. <span class="normal"> 21</span>
  1233. <span class="normal"> 22</span>
  1234. <span class="normal"> 23</span>
  1235. <span class="normal"> 24</span>
  1236. <span class="normal"> 25</span>
  1237. <span class="normal"> 26</span>
  1238. <span class="normal"> 27</span>
  1239. <span class="normal"> 28</span>
  1240. <span class="normal"> 29</span>
  1241. <span class="normal"> 30</span>
  1242. <span class="normal"> 31</span>
  1243. <span class="normal"> 32</span>
  1244. <span class="normal"> 33</span>
  1245. <span class="normal"> 34</span>
  1246. <span class="normal"> 35</span>
  1247. <span class="normal"> 36</span>
  1248. <span class="normal"> 37</span>
  1249. <span class="normal"> 38</span>
  1250. <span class="normal"> 39</span>
  1251. <span class="normal"> 40</span>
  1252. <span class="normal"> 41</span>
  1253. <span class="normal"> 42</span>
  1254. <span class="normal"> 43</span>
  1255. <span class="normal"> 44</span>
  1256. <span class="normal"> 45</span>
  1257. <span class="normal"> 46</span>
  1258. <span class="normal"> 47</span>
  1259. <span class="normal"> 48</span>
  1260. <span class="normal"> 49</span>
  1261. <span class="normal"> 50</span>
  1262. <span class="normal"> 51</span>
  1263. <span class="normal"> 52</span>
  1264. <span class="normal"> 53</span>
  1265. <span class="normal"> 54</span>
  1266. <span class="normal"> 55</span>
  1267. <span class="normal"> 56</span>
  1268. <span class="normal"> 57</span>
  1269. <span class="normal"> 58</span>
  1270. <span class="normal"> 59</span>
  1271. <span class="normal"> 60</span>
  1272. <span class="normal"> 61</span>
  1273. <span class="normal"> 62</span>
  1274. <span class="normal"> 63</span>
  1275. <span class="normal"> 64</span>
  1276. <span class="normal"> 65</span>
  1277. <span class="normal"> 66</span>
  1278. <span class="normal"> 67</span>
  1279. <span class="normal"> 68</span>
  1280. <span class="normal"> 69</span>
  1281. <span class="normal"> 70</span>
  1282. <span class="normal"> 71</span>
  1283. <span class="normal"> 72</span>
  1284. <span class="normal"> 73</span>
  1285. <span class="normal"> 74</span>
  1286. <span class="normal"> 75</span>
  1287. <span class="normal"> 76</span>
  1288. <span class="normal"> 77</span>
  1289. <span class="normal"> 78</span>
  1290. <span class="normal"> 79</span>
  1291. <span class="normal"> 80</span>
  1292. <span class="normal"> 81</span>
  1293. <span class="normal"> 82</span>
  1294. <span class="normal"> 83</span>
  1295. <span class="normal"> 84</span>
  1296. <span class="normal"> 85</span>
  1297. <span class="normal"> 86</span>
  1298. <span class="normal"> 87</span>
  1299. <span class="normal"> 88</span>
  1300. <span class="normal"> 89</span>
  1301. <span class="normal"> 90</span>
  1302. <span class="normal"> 91</span>
  1303. <span class="normal"> 92</span>
  1304. <span class="normal"> 93</span>
  1305. <span class="normal"> 94</span>
  1306. <span class="normal"> 95</span>
  1307. <span class="normal"> 96</span>
  1308. <span class="normal"> 97</span>
  1309. <span class="normal"> 98</span>
  1310. <span class="normal"> 99</span>
  1311. <span class="normal">100</span>
  1312. <span class="normal">101</span>
  1313. <span class="normal">102</span>
  1314. <span class="normal">103</span>
  1315. <span class="normal">104</span>
  1316. <span class="normal">105</span>
  1317. <span class="normal">106</span>
  1318. <span class="normal">107</span>
  1319. <span class="normal">108</span>
  1320. <span class="normal">109</span>
  1321. <span class="normal">110</span>
  1322. <span class="normal">111</span>
  1323. <span class="normal">112</span>
  1324. <span class="normal">113</span>
  1325. <span class="normal">114</span>
  1326. <span class="normal">115</span>
  1327. <span class="normal">116</span>
  1328. <span class="normal">117</span>
  1329. <span class="normal">118</span>
  1330. <span class="normal">119</span>
  1331. <span class="normal">120</span>
  1332. <span class="normal">121</span>
  1333. <span class="normal">122</span>
  1334. <span class="normal">123</span>
  1335. <span class="normal">124</span>
  1336. <span class="normal">125</span>
  1337. <span class="normal">126</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">using</span><span class="w"> </span><span class="nn">LLama.Batched</span><span class="p">;</span>
  1338. <span class="k">using</span><span class="w"> </span><span class="nn">LLama.Common</span><span class="p">;</span>
  1339. <span class="k">using</span><span class="w"> </span><span class="nn">LLama.Native</span><span class="p">;</span>
  1340. <span class="k">using</span><span class="w"> </span><span class="nn">LLama.Sampling</span><span class="p">;</span>
  1341. <span class="k">using</span><span class="w"> </span><span class="nn">Spectre.Console</span><span class="p">;</span>
  1342. <span class="k">namespace</span><span class="w"> </span><span class="nn">LLama.Examples.Examples</span><span class="p">;</span>
  1343. <span class="c1">/// &lt;summary&gt;</span>
  1344. <span class="c1">/// This demonstrates using a batch to generate two sequences and then using one</span>
  1345. <span class="c1">/// sequence as the negative guidance (&quot;classifier free guidance&quot;) for the other.</span>
  1346. <span class="c1">/// &lt;/summary&gt;</span>
  1347. <span class="k">public</span><span class="w"> </span><span class="k">class</span><span class="w"> </span><span class="nc">BatchedExecutorGuidance</span>
  1348. <span class="p">{</span>
  1349. <span class="w"> </span><span class="k">private</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">n_len</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">32</span><span class="p">;</span>
  1350. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="k">static</span><span class="w"> </span><span class="k">async</span><span class="w"> </span><span class="n">Task</span><span class="w"> </span><span class="nf">Run</span><span class="p">()</span>
  1351. <span class="w"> </span><span class="p">{</span>
  1352. <span class="w"> </span><span class="kt">string</span><span class="w"> </span><span class="n">modelPath</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">UserSettings</span><span class="p">.</span><span class="n">GetModelPath</span><span class="p">();</span>
  1353. <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">parameters</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ModelParams</span><span class="p">(</span><span class="n">modelPath</span><span class="p">);</span>
  1354. <span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="nn">var</span><span class="w"> </span><span class="n">model</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">LLamaWeights</span><span class="p">.</span><span class="n">LoadFromFile</span><span class="p">(</span><span class="n">parameters</span><span class="p">);</span>
  1355. <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">positivePrompt</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">AnsiConsole</span><span class="p">.</span><span class="n">Ask</span><span class="p">(</span><span class="s">&quot;Positive Prompt (or ENTER for default):&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;My favourite colour is&quot;</span><span class="p">).</span><span class="n">Trim</span><span class="p">();</span>
  1356. <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">negativePrompt</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">AnsiConsole</span><span class="p">.</span><span class="n">Ask</span><span class="p">(</span><span class="s">&quot;Negative Prompt (or ENTER for default):&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;I hate the colour red. My favourite colour is&quot;</span><span class="p">).</span><span class="n">Trim</span><span class="p">();</span>
  1357. <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">weight</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">AnsiConsole</span><span class="p">.</span><span class="n">Ask</span><span class="p">(</span><span class="s">&quot;Guidance Weight (or ENTER for default):&quot;</span><span class="p">,</span><span class="w"> </span><span class="m">2.0f</span><span class="p">);</span>
  1358. <span class="w"> </span><span class="c1">// Create an executor that can evaluate a batch of conversations together</span>
  1359. <span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="nn">var</span><span class="w"> </span><span class="n">executor</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">BatchedExecutor</span><span class="p">(</span><span class="n">model</span><span class="p">,</span><span class="w"> </span><span class="n">parameters</span><span class="p">);</span>
  1360. <span class="w"> </span><span class="c1">// Print some info</span>
  1361. <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">executor</span><span class="p">.</span><span class="n">Model</span><span class="p">.</span><span class="n">Metadata</span><span class="p">.</span><span class="n">GetValueOrDefault</span><span class="p">(</span><span class="s">&quot;general.name&quot;</span><span class="p">,</span><span class="w"> </span><span class="s">&quot;unknown model name&quot;</span><span class="p">);</span>
  1362. <span class="w"> </span><span class="n">Console</span><span class="p">.</span><span class="n">WriteLine</span><span class="p">(</span><span class="s">$&quot;Created executor with model: {name}&quot;</span><span class="p">);</span>
  1363. <span class="w"> </span><span class="c1">// Load the two prompts into two conversations</span>
  1364. <span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="nn">var</span><span class="w"> </span><span class="n">guided</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">executor</span><span class="p">.</span><span class="n">Create</span><span class="p">();</span>
  1365. <span class="w"> </span><span class="n">guided</span><span class="p">.</span><span class="n">Prompt</span><span class="p">(</span><span class="n">positivePrompt</span><span class="p">);</span>
  1366. <span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="nn">var</span><span class="w"> </span><span class="n">guidance</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">executor</span><span class="p">.</span><span class="n">Create</span><span class="p">();</span>
  1367. <span class="w"> </span><span class="n">guidance</span><span class="p">.</span><span class="n">Prompt</span><span class="p">(</span><span class="n">negativePrompt</span><span class="p">);</span>
  1368. <span class="w"> </span><span class="c1">// Run inference to evaluate prompts</span>
  1369. <span class="w"> </span><span class="k">await</span><span class="w"> </span><span class="n">AnsiConsole</span>
  1370. <span class="w"> </span><span class="p">.</span><span class="n">Status</span><span class="p">()</span>
  1371. <span class="w"> </span><span class="p">.</span><span class="n">Spinner</span><span class="p">(</span><span class="n">Spinner</span><span class="p">.</span><span class="n">Known</span><span class="p">.</span><span class="n">Line</span><span class="p">)</span>
  1372. <span class="w"> </span><span class="p">.</span><span class="n">StartAsync</span><span class="p">(</span><span class="s">&quot;Evaluating Prompts...&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">_</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="n">executor</span><span class="p">.</span><span class="n">Infer</span><span class="p">());</span>
  1373. <span class="w"> </span><span class="c1">// Fork the &quot;guided&quot; conversation. We&#39;ll run this one without guidance for comparison</span>
  1374. <span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="nn">var</span><span class="w"> </span><span class="n">unguided</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">guided</span><span class="p">.</span><span class="n">Fork</span><span class="p">();</span>
  1375. <span class="w"> </span><span class="c1">// Run inference loop</span>
  1376. <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">unguidedSampler</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">GuidedSampler</span><span class="p">(</span><span class="k">null</span><span class="p">,</span><span class="w"> </span><span class="n">weight</span><span class="p">);</span>
  1377. <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">unguidedDecoder</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">StreamingTokenDecoder</span><span class="p">(</span><span class="n">executor</span><span class="p">.</span><span class="n">Context</span><span class="p">);</span>
  1378. <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">guidedSampler</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">GuidedSampler</span><span class="p">(</span><span class="n">guidance</span><span class="p">,</span><span class="w"> </span><span class="n">weight</span><span class="p">);</span>
  1379. <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">guidedDecoder</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">StreamingTokenDecoder</span><span class="p">(</span><span class="n">executor</span><span class="p">.</span><span class="n">Context</span><span class="p">);</span>
  1380. <span class="w"> </span><span class="k">await</span><span class="w"> </span><span class="n">AnsiConsole</span>
  1381. <span class="w"> </span><span class="p">.</span><span class="n">Progress</span><span class="p">()</span>
  1382. <span class="w"> </span><span class="p">.</span><span class="n">StartAsync</span><span class="p">(</span><span class="k">async</span><span class="w"> </span><span class="n">progress</span><span class="w"> </span><span class="o">=&gt;</span>
  1383. <span class="w"> </span><span class="p">{</span>
  1384. <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">reporter</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">progress</span><span class="p">.</span><span class="n">AddTask</span><span class="p">(</span><span class="s">&quot;Running Inference&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">maxValue</span><span class="p">:</span><span class="w"> </span><span class="n">n_len</span><span class="p">);</span>
  1385. <span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">var</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="n">n_len</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span>
  1386. <span class="w"> </span><span class="p">{</span>
  1387. <span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">i</span><span class="w"> </span><span class="o">!=</span><span class="w"> </span><span class="m">0</span><span class="p">)</span>
  1388. <span class="w"> </span><span class="k">await</span><span class="w"> </span><span class="n">executor</span><span class="p">.</span><span class="n">Infer</span><span class="p">();</span>
  1389. <span class="w"> </span><span class="c1">// Sample from the &quot;unguided&quot; conversation. This is just a conversation using the same prompt, without any</span>
  1390. <span class="w"> </span><span class="c1">// guidance. This serves as a comparison to show the effect of guidance.</span>
  1391. <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">u</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">unguidedSampler</span><span class="p">.</span><span class="n">Sample</span><span class="p">(</span><span class="n">executor</span><span class="p">.</span><span class="n">Context</span><span class="p">.</span><span class="n">NativeHandle</span><span class="p">,</span><span class="w"> </span><span class="n">unguided</span><span class="p">.</span><span class="n">Sample</span><span class="p">(),</span><span class="w"> </span><span class="n">Array</span><span class="p">.</span><span class="n">Empty</span><span class="o">&lt;</span><span class="n">LLamaToken</span><span class="o">&gt;</span><span class="p">());</span>
  1392. <span class="w"> </span><span class="n">unguidedDecoder</span><span class="p">.</span><span class="n">Add</span><span class="p">(</span><span class="n">u</span><span class="p">);</span>
  1393. <span class="w"> </span><span class="n">unguided</span><span class="p">.</span><span class="n">Prompt</span><span class="p">(</span><span class="n">u</span><span class="p">);</span>
  1394. <span class="w"> </span><span class="c1">// Sample from the &quot;guided&quot; conversation. This sampler will internally use the &quot;guidance&quot; conversation</span>
  1395. <span class="w"> </span><span class="c1">// to steer the conversation. See how this is done in GuidedSampler.ProcessLogits (bottom of this file).</span>
  1396. <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">g</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">guidedSampler</span><span class="p">.</span><span class="n">Sample</span><span class="p">(</span><span class="n">executor</span><span class="p">.</span><span class="n">Context</span><span class="p">.</span><span class="n">NativeHandle</span><span class="p">,</span><span class="w"> </span><span class="n">guided</span><span class="p">.</span><span class="n">Sample</span><span class="p">(),</span><span class="w"> </span><span class="n">Array</span><span class="p">.</span><span class="n">Empty</span><span class="o">&lt;</span><span class="n">LLamaToken</span><span class="o">&gt;</span><span class="p">());</span>
  1397. <span class="w"> </span><span class="n">guidedDecoder</span><span class="p">.</span><span class="n">Add</span><span class="p">(</span><span class="n">g</span><span class="p">);</span>
  1398. <span class="w"> </span><span class="c1">// Use this token to advance both guided _and_ guidance. Keeping them in sync (except for the initial prompt).</span>
  1399. <span class="w"> </span><span class="n">guided</span><span class="p">.</span><span class="n">Prompt</span><span class="p">(</span><span class="n">g</span><span class="p">);</span>
  1400. <span class="w"> </span><span class="n">guidance</span><span class="p">.</span><span class="n">Prompt</span><span class="p">(</span><span class="n">g</span><span class="p">);</span>
  1401. <span class="w"> </span><span class="c1">// Early exit if we reach the natural end of the guided sentence</span>
  1402. <span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">g</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="n">model</span><span class="p">.</span><span class="n">EndOfSentenceToken</span><span class="p">)</span>
  1403. <span class="w"> </span><span class="k">break</span><span class="p">;</span>
  1404. <span class="w"> </span><span class="c1">// Update progress bar</span>
  1405. <span class="w"> </span><span class="n">reporter</span><span class="p">.</span><span class="n">Increment</span><span class="p">(</span><span class="m">1</span><span class="p">);</span>
  1406. <span class="w"> </span><span class="p">}</span>
  1407. <span class="w"> </span><span class="p">});</span>
  1408. <span class="w"> </span><span class="n">AnsiConsole</span><span class="p">.</span><span class="n">MarkupLine</span><span class="p">(</span><span class="s">$&quot;[green]Unguided:[/][white]{unguidedDecoder.Read().ReplaceLineEndings(&quot;</span><span class="w"> </span><span class="s">&quot;)}[/]&quot;</span><span class="p">);</span>
  1409. <span class="w"> </span><span class="n">AnsiConsole</span><span class="p">.</span><span class="n">MarkupLine</span><span class="p">(</span><span class="s">$&quot;[green]Guided:[/][white]{guidedDecoder.Read().ReplaceLineEndings(&quot;</span><span class="w"> </span><span class="s">&quot;)}[/]&quot;</span><span class="p">);</span>
  1410. <span class="w"> </span><span class="p">}</span>
  1411. <span class="w"> </span><span class="k">private</span><span class="w"> </span><span class="k">class</span><span class="w"> </span><span class="nf">GuidedSampler</span><span class="p">(</span><span class="n">Conversation</span><span class="o">?</span><span class="w"> </span><span class="n">guidance</span><span class="p">,</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">weight</span><span class="p">)</span>
  1412. <span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="n">BaseSamplingPipeline</span>
  1413. <span class="w"> </span><span class="p">{</span>
  1414. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="k">void</span><span class="w"> </span><span class="nf">Accept</span><span class="p">(</span><span class="n">SafeLLamaContextHandle</span><span class="w"> </span><span class="n">ctx</span><span class="p">,</span><span class="w"> </span><span class="n">LLamaToken</span><span class="w"> </span><span class="n">token</span><span class="p">)</span>
  1415. <span class="w"> </span><span class="p">{</span>
  1416. <span class="w"> </span><span class="p">}</span>
  1417. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="n">ISamplingPipeline</span><span class="w"> </span><span class="nf">Clone</span><span class="p">()</span>
  1418. <span class="w"> </span><span class="p">{</span>
  1419. <span class="w"> </span><span class="k">throw</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="nf">NotSupportedException</span><span class="p">();</span>
  1420. <span class="w"> </span><span class="p">}</span>
  1421. <span class="w"> </span><span class="k">protected</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="k">void</span><span class="w"> </span><span class="nf">ProcessLogits</span><span class="p">(</span><span class="n">SafeLLamaContextHandle</span><span class="w"> </span><span class="n">ctx</span><span class="p">,</span><span class="w"> </span><span class="n">Span</span><span class="o">&lt;</span><span class="kt">float</span><span class="o">&gt;</span><span class="w"> </span><span class="n">logits</span><span class="p">,</span><span class="w"> </span><span class="n">ReadOnlySpan</span><span class="o">&lt;</span><span class="n">LLamaToken</span><span class="o">&gt;</span><span class="w"> </span><span class="n">lastTokens</span><span class="p">)</span>
  1422. <span class="w"> </span><span class="p">{</span>
  1423. <span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">guidance</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="k">null</span><span class="p">)</span>
  1424. <span class="w"> </span><span class="k">return</span><span class="p">;</span>
  1425. <span class="w"> </span><span class="c1">// Get the logits generated by the guidance sequences</span>
  1426. <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">guidanceLogits</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">guidance</span><span class="p">.</span><span class="n">Sample</span><span class="p">();</span>
  1427. <span class="w"> </span><span class="c1">// Use those logits to guide this sequence</span>
  1428. <span class="w"> </span><span class="n">NativeApi</span><span class="p">.</span><span class="n">llama_sample_apply_guidance</span><span class="p">(</span><span class="n">ctx</span><span class="p">,</span><span class="w"> </span><span class="n">logits</span><span class="p">,</span><span class="w"> </span><span class="n">guidanceLogits</span><span class="p">,</span><span class="w"> </span><span class="n">weight</span><span class="p">);</span>
  1429. <span class="w"> </span><span class="p">}</span>
  1430. <span class="w"> </span><span class="k">protected</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="n">LLamaToken</span><span class="w"> </span><span class="nf">ProcessTokenDataArray</span><span class="p">(</span><span class="n">SafeLLamaContextHandle</span><span class="w"> </span><span class="n">ctx</span><span class="p">,</span><span class="w"> </span><span class="n">LLamaTokenDataArray</span><span class="w"> </span><span class="n">candidates</span><span class="p">,</span><span class="w"> </span><span class="n">ReadOnlySpan</span><span class="o">&lt;</span><span class="n">LLamaToken</span><span class="o">&gt;</span><span class="w"> </span><span class="n">lastTokens</span><span class="p">)</span>
  1431. <span class="w"> </span><span class="p">{</span>
  1432. <span class="w"> </span><span class="n">candidates</span><span class="p">.</span><span class="n">Temperature</span><span class="p">(</span><span class="n">ctx</span><span class="p">,</span><span class="w"> </span><span class="m">0.8f</span><span class="p">);</span>
  1433. <span class="w"> </span><span class="n">candidates</span><span class="p">.</span><span class="n">TopK</span><span class="p">(</span><span class="n">ctx</span><span class="p">,</span><span class="w"> </span><span class="m">25</span><span class="p">);</span>
  1434. <span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">candidates</span><span class="p">.</span><span class="n">SampleToken</span><span class="p">(</span><span class="n">ctx</span><span class="p">);</span>
  1435. <span class="w"> </span><span class="p">}</span>
  1436. <span class="w"> </span><span class="p">}</span>
  1437. <span class="p">}</span>
  1438. </code></pre></div></td></tr></table></div>
  1439. <h2 id="inference-parameters">Inference parameters<a class="headerlink" href="#inference-parameters" title="Permanent link"></a></h2>
  1440. <p>Different from context parameters, which is indicated in <a href="../UnderstandLLamaContext/">understand-llama-context</a>, executors accept parameters when you call its API to execute the inference. That means you could change the parameters every time you ask the model to generate the outputs.</p>
  1441. <p>Here is the parameters for LLamaSharp executors.</p>
  1442. <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 1</span>
  1443. <span class="normal"> 2</span>
  1444. <span class="normal"> 3</span>
  1445. <span class="normal"> 4</span>
  1446. <span class="normal"> 5</span>
  1447. <span class="normal"> 6</span>
  1448. <span class="normal"> 7</span>
  1449. <span class="normal"> 8</span>
  1450. <span class="normal"> 9</span>
  1451. <span class="normal">10</span>
  1452. <span class="normal">11</span>
  1453. <span class="normal">12</span>
  1454. <span class="normal">13</span>
  1455. <span class="normal">14</span>
  1456. <span class="normal">15</span>
  1457. <span class="normal">16</span>
  1458. <span class="normal">17</span>
  1459. <span class="normal">18</span>
  1460. <span class="normal">19</span>
  1461. <span class="normal">20</span>
  1462. <span class="normal">21</span>
  1463. <span class="normal">22</span>
  1464. <span class="normal">23</span>
  1465. <span class="normal">24</span>
  1466. <span class="normal">25</span>
  1467. <span class="normal">26</span>
  1468. <span class="normal">27</span>
  1469. <span class="normal">28</span>
  1470. <span class="normal">29</span>
  1471. <span class="normal">30</span>
  1472. <span class="normal">31</span>
  1473. <span class="normal">32</span>
  1474. <span class="normal">33</span>
  1475. <span class="normal">34</span>
  1476. <span class="normal">35</span>
  1477. <span class="normal">36</span>
  1478. <span class="normal">37</span>
  1479. <span class="normal">38</span>
  1480. <span class="normal">39</span>
  1481. <span class="normal">40</span>
  1482. <span class="normal">41</span>
  1483. <span class="normal">42</span>
  1484. <span class="normal">43</span>
  1485. <span class="normal">44</span>
  1486. <span class="normal">45</span>
  1487. <span class="normal">46</span>
  1488. <span class="normal">47</span>
  1489. <span class="normal">48</span>
  1490. <span class="normal">49</span>
  1491. <span class="normal">50</span>
  1492. <span class="normal">51</span>
  1493. <span class="normal">52</span>
  1494. <span class="normal">53</span>
  1495. <span class="normal">54</span>
  1496. <span class="normal">55</span>
  1497. <span class="normal">56</span>
  1498. <span class="normal">57</span>
  1499. <span class="normal">58</span>
  1500. <span class="normal">59</span>
  1501. <span class="normal">60</span>
  1502. <span class="normal">61</span>
  1503. <span class="normal">62</span>
  1504. <span class="normal">63</span>
  1505. <span class="normal">64</span>
  1506. <span class="normal">65</span>
  1507. <span class="normal">66</span>
  1508. <span class="normal">67</span>
  1509. <span class="normal">68</span>
  1510. <span class="normal">69</span>
  1511. <span class="normal">70</span>
  1512. <span class="normal">71</span>
  1513. <span class="normal">72</span>
  1514. <span class="normal">73</span>
  1515. <span class="normal">74</span>
  1516. <span class="normal">75</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="c1">/// &lt;summary&gt;</span>
  1517. <span class="c1">/// The paramters used for inference.</span>
  1518. <span class="c1">/// &lt;/summary&gt;</span>
  1519. <span class="k">public</span><span class="w"> </span><span class="n">record</span><span class="w"> </span><span class="n">InferenceParams</span>
  1520. <span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="n">IInferenceParams</span>
  1521. <span class="p">{</span>
  1522. <span class="w"> </span><span class="c1">/// &lt;summary&gt;</span>
  1523. <span class="w"> </span><span class="c1">/// number of tokens to keep from initial prompt</span>
  1524. <span class="w"> </span><span class="c1">/// &lt;/summary&gt;</span>
  1525. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">TokensKeep</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">0</span><span class="p">;</span>
  1526. <span class="w"> </span><span class="c1">/// &lt;summary&gt;</span>
  1527. <span class="w"> </span><span class="c1">/// how many new tokens to predict (n_predict), set to -1 to inifinitely generate response</span>
  1528. <span class="w"> </span><span class="c1">/// until it complete.</span>
  1529. <span class="w"> </span><span class="c1">/// &lt;/summary&gt;</span>
  1530. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">MaxTokens</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">-</span><span class="m">1</span><span class="p">;</span>
  1531. <span class="w"> </span><span class="c1">/// &lt;summary&gt;</span>
  1532. <span class="w"> </span><span class="c1">/// logit bias for specific tokens</span>
  1533. <span class="w"> </span><span class="c1">/// &lt;/summary&gt;</span>
  1534. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">Dictionary</span><span class="o">&lt;</span><span class="n">LLamaToken</span><span class="p">,</span><span class="w"> </span><span class="kt">float</span><span class="o">&gt;?</span><span class="w"> </span><span class="n">LogitBias</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">null</span><span class="p">;</span>
  1535. <span class="w"> </span><span class="c1">/// &lt;summary&gt;</span>
  1536. <span class="w"> </span><span class="c1">/// Sequences where the model will stop generating further tokens.</span>
  1537. <span class="w"> </span><span class="c1">/// &lt;/summary&gt;</span>
  1538. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">IReadOnlyList</span><span class="o">&lt;</span><span class="kt">string</span><span class="o">&gt;</span><span class="w"> </span><span class="n">AntiPrompts</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">Array</span><span class="p">.</span><span class="n">Empty</span><span class="o">&lt;</span><span class="kt">string</span><span class="o">&gt;</span><span class="p">();</span>
  1539. <span class="w"> </span><span class="c1">/// &lt;inheritdoc /&gt;</span>
  1540. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">TopK</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">40</span><span class="p">;</span>
  1541. <span class="w"> </span><span class="c1">/// &lt;inheritdoc /&gt;</span>
  1542. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">TopP</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">0.95f</span><span class="p">;</span>
  1543. <span class="w"> </span><span class="c1">/// &lt;inheritdoc /&gt;</span>
  1544. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">MinP</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">0.05f</span><span class="p">;</span>
  1545. <span class="w"> </span><span class="c1">/// &lt;inheritdoc /&gt;</span>
  1546. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">TfsZ</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">1.0f</span><span class="p">;</span>
  1547. <span class="w"> </span><span class="c1">/// &lt;inheritdoc /&gt;</span>
  1548. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">TypicalP</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">1.0f</span><span class="p">;</span>
  1549. <span class="w"> </span><span class="c1">/// &lt;inheritdoc /&gt;</span>
  1550. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">Temperature</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">0.8f</span><span class="p">;</span>
  1551. <span class="w"> </span><span class="c1">/// &lt;inheritdoc /&gt;</span>
  1552. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">RepeatPenalty</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">1.1f</span><span class="p">;</span>
  1553. <span class="w"> </span><span class="c1">/// &lt;inheritdoc /&gt;</span>
  1554. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">RepeatLastTokensCount</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">64</span><span class="p">;</span>
  1555. <span class="w"> </span><span class="c1">/// &lt;inheritdoc /&gt;</span>
  1556. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">FrequencyPenalty</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">.</span><span class="m">0f</span><span class="p">;</span>
  1557. <span class="w"> </span><span class="c1">/// &lt;inheritdoc /&gt;</span>
  1558. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">PresencePenalty</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">.</span><span class="m">0f</span><span class="p">;</span>
  1559. <span class="w"> </span><span class="c1">/// &lt;inheritdoc /&gt;</span>
  1560. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">MirostatType</span><span class="w"> </span><span class="n">Mirostat</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">MirostatType</span><span class="p">.</span><span class="n">Disable</span><span class="p">;</span>
  1561. <span class="w"> </span><span class="c1">/// &lt;inheritdoc /&gt;</span>
  1562. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">MirostatTau</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">5.0f</span><span class="p">;</span>
  1563. <span class="w"> </span><span class="c1">/// &lt;inheritdoc /&gt;</span>
  1564. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">MirostatEta</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">0.1f</span><span class="p">;</span>
  1565. <span class="w"> </span><span class="c1">/// &lt;inheritdoc /&gt;</span>
  1566. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">bool</span><span class="w"> </span><span class="n">PenalizeNL</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">true</span><span class="p">;</span>
  1567. <span class="w"> </span><span class="c1">/// &lt;inheritdoc /&gt;</span>
  1568. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">SafeLLamaGrammarHandle</span><span class="o">?</span><span class="w"> </span><span class="n">Grammar</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span>
  1569. <span class="w"> </span><span class="c1">/// &lt;inheritdoc /&gt;</span>
  1570. <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">ISamplingPipeline</span><span class="o">?</span><span class="w"> </span><span class="n">SamplingPipeline</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span>
  1571. <span class="p">}</span>
  1572. </code></pre></div></td></tr></table></div>
  1573. <h2 id="save-and-load-executor-state">Save and load executor state<a class="headerlink" href="#save-and-load-executor-state" title="Permanent link"></a></h2>
  1574. <p>An executor also has its state, which can be saved and loaded. That means a lot when you want to support restore a previous session for the user in your application.</p>
  1575. <p>The following code shows how to use save and load executor state.</p>
  1576. <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 1</span>
  1577. <span class="normal"> 2</span>
  1578. <span class="normal"> 3</span>
  1579. <span class="normal"> 4</span>
  1580. <span class="normal"> 5</span>
  1581. <span class="normal"> 6</span>
  1582. <span class="normal"> 7</span>
  1583. <span class="normal"> 8</span>
  1584. <span class="normal"> 9</span>
  1585. <span class="normal">10</span>
  1586. <span class="normal">11</span>
  1587. <span class="normal">12</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="n">InteractiveExecutor</span><span class="w"> </span><span class="n">executor</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">InteractiveExecutor</span><span class="p">(</span><span class="n">model</span><span class="p">);</span>
  1588. <span class="c1">// do some things...</span>
  1589. <span class="n">executor</span><span class="p">.</span><span class="n">SaveState</span><span class="p">(</span><span class="s">&quot;executor.st&quot;</span><span class="p">);</span>
  1590. <span class="kt">var</span><span class="w"> </span><span class="n">stateData</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">executor</span><span class="p">.</span><span class="n">GetStateData</span><span class="p">();</span>
  1591. <span class="n">InteractiveExecutor</span><span class="w"> </span><span class="n">executor2</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">InteractiveExecutor</span><span class="p">(</span><span class="n">model</span><span class="p">);</span>
  1592. <span class="n">executor2</span><span class="p">.</span><span class="n">LoadState</span><span class="p">(</span><span class="n">stateData</span><span class="p">);</span>
  1593. <span class="c1">// do some things...</span>
  1594. <span class="n">InteractiveExecutor</span><span class="w"> </span><span class="n">executor3</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">InteractiveExecutor</span><span class="p">(</span><span class="n">model</span><span class="p">);</span>
  1595. <span class="n">executor3</span><span class="p">.</span><span class="n">LoadState</span><span class="p">(</span><span class="s">&quot;executor.st&quot;</span><span class="p">);</span>
  1596. <span class="c1">// do some things...</span>
  1597. </code></pre></div></td></tr></table></div>
  1598. </article>
  1599. </div>
  1600. </div>
  1601. </main>
  1602. <footer class="md-footer">
  1603. <div class="md-footer-meta md-typeset">
  1604. <div class="md-footer-meta__inner md-grid">
  1605. <div class="md-copyright">
  1606. Made with
  1607. <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
  1608. Material for MkDocs
  1609. </a>
  1610. </div>
  1611. </div>
  1612. </div>
  1613. </footer>
  1614. </div>
  1615. <div class="md-dialog" data-md-component="dialog">
  1616. <div class="md-dialog__inner md-typeset"></div>
  1617. </div>
  1618. <script id="__config" type="application/json">{"base": "../..", "features": ["content.action.edit", "navigation.instant"], "search": "../../assets/javascripts/workers/search.74e28a9f.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": {"provider": "mike"}}</script>
  1619. <script src="../../assets/javascripts/bundle.220ee61c.min.js"></script>
  1620. </body>
  1621. </html>

C#/.NET上易用的LLM高性能推理框架,支持LLaMA和LLaVA系列模型。

Contributors (1)