|
-
- <!doctype html>
- <html lang="en" class="no-js">
- <head>
-
- <meta charset="utf-8">
- <meta name="viewport" content="width=device-width,initial-scale=1">
-
-
-
-
- <link rel="prev" href="../NativeLibraryConfig/">
-
-
- <link rel="next" href="../ChatSession/">
-
- <link rel="icon" href="../../media/icon128.png">
- <meta name="generator" content="mkdocs-1.4.3, mkdocs-material-9.1.20">
-
-
-
- <title>Use executors - LLamaSharp Documentation</title>
-
-
-
- <link rel="stylesheet" href="../../assets/stylesheets/main.eebd395e.min.css">
-
-
- <link rel="stylesheet" href="../../assets/stylesheets/palette.ecc896b0.min.css">
-
-
-
-
-
-
-
-
-
- <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
- <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Fira+Sans:300,300i,400,400i,700,700i%7CFira+Mono:400,400i,700,700i&display=fallback">
- <style>:root{--md-text-font:"Fira Sans";--md-code-font:"Fira Mono"}</style>
-
-
-
- <link rel="stylesheet" href="../../css/extra.css?v=14">
-
- <script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce((e,_)=>(e<<5)-e+_.charCodeAt(0),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
-
-
-
-
-
-
- </head>
-
-
-
-
-
-
-
-
-
- <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="white" data-md-color-accent="red">
-
-
-
- <script>var palette=__md_get("__palette");if(palette&&"object"==typeof palette.color)for(var key of Object.keys(palette.color))document.body.setAttribute("data-md-color-"+key,palette.color[key])</script>
-
- <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
- <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
- <label class="md-overlay" for="__drawer"></label>
- <div data-md-component="skip">
-
-
- <a href="#llamasharp-executors" class="md-skip">
- Skip to content
- </a>
-
- </div>
- <div data-md-component="announce">
-
- </div>
-
- <div data-md-color-scheme="default" data-md-component="outdated" hidden>
-
- </div>
-
-
-
-
-
-
- <header class="md-header md-header--shadow" data-md-component="header">
- <nav class="md-header__inner md-grid" aria-label="Header">
- <a href="../.." title="LLamaSharp Documentation" class="md-header__button md-logo" aria-label="LLamaSharp Documentation" data-md-component="logo">
-
-
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 9h5.5L13 3.5V9M6 2h8l6 6v12a2 2 0 0 1-2 2H6a2 2 0 0 1-2-2V4c0-1.11.89-2 2-2m9 16v-2H6v2h9m3-4v-2H6v2h12Z"/></svg>
-
- </a>
- <label class="md-header__button md-icon" for="__drawer">
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2Z"/></svg>
- </label>
- <div class="md-header__title" data-md-component="header-title">
- <div class="md-header__ellipsis">
- <div class="md-header__topic">
- <span class="md-ellipsis">
- LLamaSharp Documentation
- </span>
- </div>
- <div class="md-header__topic" data-md-component="header-topic">
- <span class="md-ellipsis">
-
- Use executors
-
- </span>
- </div>
- </div>
- </div>
-
-
- <form class="md-header__option" data-md-component="palette">
-
-
-
-
- <input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="white" data-md-color-accent="red" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_1">
-
- <label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_2" hidden>
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 7a5 5 0 0 1 5 5 5 5 0 0 1-5 5 5 5 0 0 1-5-5 5 5 0 0 1 5-5m0 2a3 3 0 0 0-3 3 3 3 0 0 0 3 3 3 3 0 0 0 3-3 3 3 0 0 0-3-3m0-7 2.39 3.42C13.65 5.15 12.84 5 12 5c-.84 0-1.65.15-2.39.42L12 2M3.34 7l4.16-.35A7.2 7.2 0 0 0 5.94 8.5c-.44.74-.69 1.5-.83 2.29L3.34 7m.02 10 1.76-3.77a7.131 7.131 0 0 0 2.38 4.14L3.36 17M20.65 7l-1.77 3.79a7.023 7.023 0 0 0-2.38-4.15l4.15.36m-.01 10-4.14.36c.59-.51 1.12-1.14 1.54-1.86.42-.73.69-1.5.83-2.29L20.64 17M12 22l-2.41-3.44c.74.27 1.55.44 2.41.44.82 0 1.63-.17 2.37-.44L12 22Z"/></svg>
- </label>
-
-
-
-
-
- <input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="blue" data-md-color-accent="blue" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_2">
-
- <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_1" hidden>
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m17.75 4.09-2.53 1.94.91 3.06-2.63-1.81-2.63 1.81.91-3.06-2.53-1.94L12.44 4l1.06-3 1.06 3 3.19.09m3.5 6.91-1.64 1.25.59 1.98-1.7-1.17-1.7 1.17.59-1.98L15.75 11l2.06-.05L18.5 9l.69 1.95 2.06.05m-2.28 4.95c.83-.08 1.72 1.1 1.19 1.85-.32.45-.66.87-1.08 1.27C15.17 23 8.84 23 4.94 19.07c-3.91-3.9-3.91-10.24 0-14.14.4-.4.82-.76 1.27-1.08.75-.53 1.93.36 1.85 1.19-.27 2.86.69 5.83 2.89 8.02a9.96 9.96 0 0 0 8.02 2.89m-1.64 2.02a12.08 12.08 0 0 1-7.8-3.47c-2.17-2.19-3.33-5-3.49-7.82-2.81 3.14-2.7 7.96.31 10.98 3.02 3.01 7.84 3.12 10.98.31Z"/></svg>
- </label>
-
-
- </form>
-
-
-
-
- <label class="md-header__button md-icon" for="__search">
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
- </label>
- <div class="md-search" data-md-component="search" role="dialog">
- <label class="md-search__overlay" for="__search"></label>
- <div class="md-search__inner" role="search">
- <form class="md-search__form" name="search">
- <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
- <label class="md-search__icon md-icon" for="__search">
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
- </label>
- <nav class="md-search__options" aria-label="Search">
-
- <button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41Z"/></svg>
- </button>
- </nav>
-
- </form>
- <div class="md-search__output">
- <div class="md-search__scrollwrap" data-md-scrollfix>
- <div class="md-search-result" data-md-component="search-result">
- <div class="md-search-result__meta">
- Initializing search
- </div>
- <ol class="md-search-result__list" role="presentation"></ol>
- </div>
- </div>
- </div>
- </div>
- </div>
-
-
- </nav>
-
- </header>
-
- <div class="md-container" data-md-component="container">
-
-
-
-
-
-
- <main class="md-main" data-md-component="main">
- <div class="md-main__inner md-grid">
-
-
-
- <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
- <div class="md-sidebar__scrollwrap">
- <div class="md-sidebar__inner">
-
-
-
- <nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
- <label class="md-nav__title" for="__drawer">
- <a href="../.." title="LLamaSharp Documentation" class="md-nav__button md-logo" aria-label="LLamaSharp Documentation" data-md-component="logo">
-
-
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 9h5.5L13 3.5V9M6 2h8l6 6v12a2 2 0 0 1-2 2H6a2 2 0 0 1-2-2V4c0-1.11.89-2 2-2m9 16v-2H6v2h9m3-4v-2H6v2h12Z"/></svg>
-
- </a>
- LLamaSharp Documentation
- </label>
-
- <ul class="md-nav__list" data-md-scrollfix>
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../.." class="md-nav__link">
- Overview
- </a>
- </li>
-
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../QuickStart/" class="md-nav__link">
- Quick Start
- </a>
- </li>
-
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Architecture/" class="md-nav__link">
- Architecture
- </a>
- </li>
-
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../FAQ/" class="md-nav__link">
- FAQ
- </a>
- </li>
-
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../ContributingGuide/" class="md-nav__link">
- Contributing Guide
- </a>
- </li>
-
-
-
-
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item md-nav__item--active md-nav__item--nested">
-
-
-
-
- <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" checked>
-
-
-
- <label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
- Tutorials
- <span class="md-nav__icon md-icon"></span>
- </label>
-
- <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="true">
- <label class="md-nav__title" for="__nav_6">
- <span class="md-nav__icon md-icon"></span>
- Tutorials
- </label>
- <ul class="md-nav__list" data-md-scrollfix>
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../NativeLibraryConfig/" class="md-nav__link">
- Customize the native library loading
- </a>
- </li>
-
-
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item md-nav__item--active">
-
- <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
-
-
-
-
-
- <label class="md-nav__link md-nav__link--active" for="__toc">
- Use executors
- <span class="md-nav__icon md-icon"></span>
- </label>
-
- <a href="./" class="md-nav__link md-nav__link--active">
- Use executors
- </a>
-
-
-
- <nav class="md-nav md-nav--secondary" aria-label="Table of contents">
-
-
-
-
-
-
- <label class="md-nav__title" for="__toc">
- <span class="md-nav__icon md-icon"></span>
- Table of contents
- </label>
- <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
-
- <li class="md-nav__item">
- <a href="#text-to-text-apis-of-the-executors" class="md-nav__link">
- Text-to-Text APIs of the executors
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#interactiveexecutor--instructexecutor" class="md-nav__link">
- InteractiveExecutor & InstructExecutor
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#statelessexecutor" class="md-nav__link">
- StatelessExecutor.
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#batchedexecutor" class="md-nav__link">
- BatchedExecutor
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#inference-parameters" class="md-nav__link">
- Inference parameters
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#save-and-load-executor-state" class="md-nav__link">
- Save and load executor state
- </a>
-
- </li>
-
- </ul>
-
- </nav>
-
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../ChatSession/" class="md-nav__link">
- Use ChatSession
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../UnderstandLLamaContext/" class="md-nav__link">
- Understand LLamaContext
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../GetEmbeddings/" class="md-nav__link">
- Get embeddings
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../Quantization/" class="md-nav__link">
- Quantize the model
- </a>
- </li>
-
-
-
-
- </ul>
- </nav>
- </li>
-
-
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item md-nav__item--nested">
-
-
-
-
- <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_7" >
-
-
-
- <label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0">
- Integrations
- <span class="md-nav__icon md-icon"></span>
- </label>
-
- <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
- <label class="md-nav__title" for="__nav_7">
- <span class="md-nav__icon md-icon"></span>
- Integrations
- </label>
- <ul class="md-nav__list" data-md-scrollfix>
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Integrations/semantic-kernel/" class="md-nav__link">
- semantic-kernel integration
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Integrations/kernel-memory/" class="md-nav__link">
- kernel-memory integration
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Integrations/BotSharp.md" class="md-nav__link">
- BotSharp integration
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Integrations/Langchain.md" class="md-nav__link">
- Langchain integration
- </a>
- </li>
-
-
-
-
- </ul>
- </nav>
- </li>
-
-
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item md-nav__item--nested">
-
-
-
-
- <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8" >
-
-
-
- <label class="md-nav__link" for="__nav_8" id="__nav_8_label" tabindex="0">
- Examples
- <span class="md-nav__icon md-icon"></span>
- </label>
-
- <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_8_label" aria-expanded="false">
- <label class="md-nav__title" for="__nav_8">
- <span class="md-nav__icon md-icon"></span>
- Examples
- </label>
- <ul class="md-nav__list" data-md-scrollfix>
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/BatchedExecutorFork/" class="md-nav__link">
- Bacthed executor - multi-output to one input
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/BatchedExecutorGuidance/" class="md-nav__link">
- Batched executor - basic guidance
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/BatchedExecutorRewind/" class="md-nav__link">
- Batched executor - rewinding to an earlier state
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/ChatChineseGB2312/" class="md-nav__link">
- Chinese LLM - with GB2312 encoding
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/ChatSessionStripRoleName/" class="md-nav__link">
- ChatSession - stripping role names
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/ChatSessionWithHistory/" class="md-nav__link">
- ChatSession - with history
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/ChatSessionWithRestart/" class="md-nav__link">
- ChatSession - restarting
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/ChatSessionWithRoleName/" class="md-nav__link">
- ChatSession - Basic
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/CodingAssistant/" class="md-nav__link">
- Coding assistant
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/GetEmbeddings/" class="md-nav__link">
- Get embeddings
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/GrammarJsonResponse/" class="md-nav__link">
- Grammar - json response
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/InstructModeExecute/" class="md-nav__link">
- Instruct executor - basic
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/InteractiveModeExecute/" class="md-nav__link">
- Interactive executor - basic
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/KernelMemory/" class="md-nav__link">
- Kernel memory integration - basic
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/KernelMemorySaveAndLoad/" class="md-nav__link">
- Kernel-memory - save & load
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/LLavaInteractiveModeExecute/" class="md-nav__link">
- LLaVA - basic
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/LoadAndSaveSession/" class="md-nav__link">
- ChatSession - load & save
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/LoadAndSaveState/" class="md-nav__link">
- Executor - save/load state
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/QuantizeModel/" class="md-nav__link">
- Quantization
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/SemanticKernelChat/" class="md-nav__link">
- Semantic-kernel - chat
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/SemanticKernelMemory/" class="md-nav__link">
- Semantic-kernel - with kernel-memory
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/SemanticKernelPrompt/" class="md-nav__link">
- Semantic-kernel - basic
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/StatelessModeExecute/" class="md-nav__link">
- Stateless executor
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../Examples/TalkToYourself/" class="md-nav__link">
- Talk to yourself
- </a>
- </li>
-
-
-
-
- </ul>
- </nav>
- </li>
-
-
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item md-nav__item--nested">
-
-
-
-
- <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_9" >
-
-
-
- <label class="md-nav__link" for="__nav_9" id="__nav_9_label" tabindex="0">
- API Reference
- <span class="md-nav__icon md-icon"></span>
- </label>
-
- <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_9_label" aria-expanded="false">
- <label class="md-nav__title" for="__nav_9">
- <span class="md-nav__icon md-icon"></span>
- API Reference
- </label>
- <ul class="md-nav__list" data-md-scrollfix>
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/" class="md-nav__link">
- index
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.abstractions.adaptercollection/" class="md-nav__link">
- llama.abstractions.adaptercollection
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.abstractions.icontextparams/" class="md-nav__link">
- llama.abstractions.icontextparams
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.abstractions.ihistorytransform/" class="md-nav__link">
- llama.abstractions.ihistorytransform
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.abstractions.iinferenceparams/" class="md-nav__link">
- llama.abstractions.iinferenceparams
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.abstractions.illamaexecutor/" class="md-nav__link">
- llama.abstractions.illamaexecutor
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.abstractions.illamaparams/" class="md-nav__link">
- llama.abstractions.illamaparams
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.abstractions.imodelparams/" class="md-nav__link">
- llama.abstractions.imodelparams
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.abstractions.itextstreamtransform/" class="md-nav__link">
- llama.abstractions.itextstreamtransform
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.abstractions.itexttransform/" class="md-nav__link">
- llama.abstractions.itexttransform
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.abstractions.loraadapter/" class="md-nav__link">
- llama.abstractions.loraadapter
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.abstractions.metadataoverride/" class="md-nav__link">
- llama.abstractions.metadataoverride
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.abstractions.metadataoverrideconverter/" class="md-nav__link">
- llama.abstractions.metadataoverrideconverter
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.abstractions.tensorsplitscollection/" class="md-nav__link">
- llama.abstractions.tensorsplitscollection
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.abstractions.tensorsplitscollectionconverter/" class="md-nav__link">
- llama.abstractions.tensorsplitscollectionconverter
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.antipromptprocessor/" class="md-nav__link">
- llama.antipromptprocessor
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.batched.alreadypromptedconversationexception/" class="md-nav__link">
- llama.batched.alreadypromptedconversationexception
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.batched.batchedexecutor/" class="md-nav__link">
- llama.batched.batchedexecutor
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.batched.cannotforkwhilerequiresinferenceexception/" class="md-nav__link">
- llama.batched.cannotforkwhilerequiresinferenceexception
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.batched.cannotmodifywhilerequiresinferenceexception/" class="md-nav__link">
- llama.batched.cannotmodifywhilerequiresinferenceexception
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.batched.cannotsamplerequiresinferenceexception/" class="md-nav__link">
- llama.batched.cannotsamplerequiresinferenceexception
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.batched.cannotsamplerequirespromptexception/" class="md-nav__link">
- llama.batched.cannotsamplerequirespromptexception
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.batched.conversation/" class="md-nav__link">
- llama.batched.conversation
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.batched.conversationextensions/" class="md-nav__link">
- llama.batched.conversationextensions
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.batched.experimentalbatchedexecutorexception/" class="md-nav__link">
- llama.batched.experimentalbatchedexecutorexception
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.chatsession-1/" class="md-nav__link">
- llama.chatsession-1
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.chatsession/" class="md-nav__link">
- llama.chatsession
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.common.authorrole/" class="md-nav__link">
- llama.common.authorrole
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.common.chathistory/" class="md-nav__link">
- llama.common.chathistory
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.common.fixedsizequeue-1/" class="md-nav__link">
- llama.common.fixedsizequeue-1
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.common.inferenceparams/" class="md-nav__link">
- llama.common.inferenceparams
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.common.mirostattype/" class="md-nav__link">
- llama.common.mirostattype
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.common.modelparams/" class="md-nav__link">
- llama.common.modelparams
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.exceptions.grammarexpectedname/" class="md-nav__link">
- llama.exceptions.grammarexpectedname
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.exceptions.grammarexpectednext/" class="md-nav__link">
- llama.exceptions.grammarexpectednext
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.exceptions.grammarexpectedprevious/" class="md-nav__link">
- llama.exceptions.grammarexpectedprevious
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.exceptions.grammarformatexception/" class="md-nav__link">
- llama.exceptions.grammarformatexception
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.exceptions.grammarunexpectedcharaltelement/" class="md-nav__link">
- llama.exceptions.grammarunexpectedcharaltelement
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.exceptions.grammarunexpectedcharrngelement/" class="md-nav__link">
- llama.exceptions.grammarunexpectedcharrngelement
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.exceptions.grammarunexpectedendelement/" class="md-nav__link">
- llama.exceptions.grammarunexpectedendelement
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.exceptions.grammarunexpectedendofinput/" class="md-nav__link">
- llama.exceptions.grammarunexpectedendofinput
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.exceptions.grammarunexpectedhexcharscount/" class="md-nav__link">
- llama.exceptions.grammarunexpectedhexcharscount
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.exceptions.grammarunknownescapecharacter/" class="md-nav__link">
- llama.exceptions.grammarunknownescapecharacter
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.exceptions.llamadecodeerror/" class="md-nav__link">
- llama.exceptions.llamadecodeerror
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.exceptions.loadweightsfailedexception/" class="md-nav__link">
- llama.exceptions.loadweightsfailedexception
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.exceptions.runtimeerror/" class="md-nav__link">
- llama.exceptions.runtimeerror
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.extensions.icontextparamsextensions/" class="md-nav__link">
- llama.extensions.icontextparamsextensions
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.extensions.imodelparamsextensions/" class="md-nav__link">
- llama.extensions.imodelparamsextensions
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.grammars.grammar/" class="md-nav__link">
- llama.grammars.grammar
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.grammars.grammarrule/" class="md-nav__link">
- llama.grammars.grammarrule
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.ichatmodel/" class="md-nav__link">
- llama.ichatmodel
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.llamacache/" class="md-nav__link">
- llama.llamacache
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.llamaembedder/" class="md-nav__link">
- llama.llamaembedder
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.llamamodel/" class="md-nav__link">
- llama.llamamodel
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.llamamodelv1/" class="md-nav__link">
- llama.llamamodelv1
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.llamaparams/" class="md-nav__link">
- llama.llamaparams
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.llamaquantizer/" class="md-nav__link">
- llama.llamaquantizer
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.llamastate/" class="md-nav__link">
- llama.llamastate
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.llamatransforms/" class="md-nav__link">
- llama.llamatransforms
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.llavaweights/" class="md-nav__link">
- llama.llavaweights
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.decoderesult/" class="md-nav__link">
- llama.native.decoderesult
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.ggmltype/" class="md-nav__link">
- llama.native.ggmltype
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.gpusplitmode/" class="md-nav__link">
- llama.native.gpusplitmode
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamabatch/" class="md-nav__link">
- llama.native.llamabatch
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamabeamsstate/" class="md-nav__link">
- llama.native.llamabeamsstate
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamabeamview/" class="md-nav__link">
- llama.native.llamabeamview
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamachatmessage/" class="md-nav__link">
- llama.native.llamachatmessage
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamacontextparams/" class="md-nav__link">
- llama.native.llamacontextparams
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamaftype/" class="md-nav__link">
- llama.native.llamaftype
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamagrammarelement/" class="md-nav__link">
- llama.native.llamagrammarelement
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamagrammarelementtype/" class="md-nav__link">
- llama.native.llamagrammarelementtype
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamakvcacheview/" class="md-nav__link">
- llama.native.llamakvcacheview
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamakvcacheviewcell/" class="md-nav__link">
- llama.native.llamakvcacheviewcell
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamakvcacheviewsafehandle/" class="md-nav__link">
- llama.native.llamakvcacheviewsafehandle
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamaloglevel/" class="md-nav__link">
- llama.native.llamaloglevel
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamamodelkvoverridetype/" class="md-nav__link">
- llama.native.llamamodelkvoverridetype
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamamodelmetadataoverride/" class="md-nav__link">
- llama.native.llamamodelmetadataoverride
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamamodelparams/" class="md-nav__link">
- llama.native.llamamodelparams
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamamodelquantizeparams/" class="md-nav__link">
- llama.native.llamamodelquantizeparams
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamanativebatch/" class="md-nav__link">
- llama.native.llamanativebatch
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamapoolingtype/" class="md-nav__link">
- llama.native.llamapoolingtype
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamapos/" class="md-nav__link">
- llama.native.llamapos
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamaropetype/" class="md-nav__link">
- llama.native.llamaropetype
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamaseqid/" class="md-nav__link">
- llama.native.llamaseqid
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamatoken/" class="md-nav__link">
- llama.native.llamatoken
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamatokendata/" class="md-nav__link">
- llama.native.llamatokendata
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamatokendataarray/" class="md-nav__link">
- llama.native.llamatokendataarray
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamatokendataarraynative/" class="md-nav__link">
- llama.native.llamatokendataarraynative
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamatokentype/" class="md-nav__link">
- llama.native.llamatokentype
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llamavocabtype/" class="md-nav__link">
- llama.native.llamavocabtype
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.llavaimageembed/" class="md-nav__link">
- llama.native.llavaimageembed
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.nativeapi/" class="md-nav__link">
- llama.native.nativeapi
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.nativelibraryconfig/" class="md-nav__link">
- llama.native.nativelibraryconfig
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.ropescalingtype/" class="md-nav__link">
- llama.native.ropescalingtype
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.safellamacontexthandle/" class="md-nav__link">
- llama.native.safellamacontexthandle
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.safellamagrammarhandle/" class="md-nav__link">
- llama.native.safellamagrammarhandle
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.safellamahandlebase/" class="md-nav__link">
- llama.native.safellamahandlebase
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.safellamamodelhandle/" class="md-nav__link">
- llama.native.safellamamodelhandle
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.safellavaimageembedhandle/" class="md-nav__link">
- llama.native.safellavaimageembedhandle
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.native.safellavamodelhandle/" class="md-nav__link">
- llama.native.safellavamodelhandle
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.quantizer/" class="md-nav__link">
- llama.quantizer
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.sampling.basesamplingpipeline/" class="md-nav__link">
- llama.sampling.basesamplingpipeline
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.sampling.defaultsamplingpipeline/" class="md-nav__link">
- llama.sampling.defaultsamplingpipeline
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.sampling.greedysamplingpipeline/" class="md-nav__link">
- llama.sampling.greedysamplingpipeline
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.sampling.isamplingpipeline/" class="md-nav__link">
- llama.sampling.isamplingpipeline
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.sampling.isamplingpipelineextensions/" class="md-nav__link">
- llama.sampling.isamplingpipelineextensions
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.sampling.mirostate2samplingpipeline/" class="md-nav__link">
- llama.sampling.mirostate2samplingpipeline
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.sampling.mirostatesamplingpipeline/" class="md-nav__link">
- llama.sampling.mirostatesamplingpipeline
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.sessionstate/" class="md-nav__link">
- llama.sessionstate
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.streamingtokendecoder/" class="md-nav__link">
- llama.streamingtokendecoder
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.types.chatcompletion/" class="md-nav__link">
- llama.types.chatcompletion
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.types.chatcompletionchoice/" class="md-nav__link">
- llama.types.chatcompletionchoice
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.types.chatcompletionchunk/" class="md-nav__link">
- llama.types.chatcompletionchunk
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.types.chatcompletionchunkchoice/" class="md-nav__link">
- llama.types.chatcompletionchunkchoice
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.types.chatcompletionchunkdelta/" class="md-nav__link">
- llama.types.chatcompletionchunkdelta
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.types.chatcompletionmessage/" class="md-nav__link">
- llama.types.chatcompletionmessage
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.types.chatmessagerecord/" class="md-nav__link">
- llama.types.chatmessagerecord
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.types.chatrole/" class="md-nav__link">
- llama.types.chatrole
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.types.completion/" class="md-nav__link">
- llama.types.completion
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.types.completionchoice/" class="md-nav__link">
- llama.types.completionchoice
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.types.completionchunk/" class="md-nav__link">
- llama.types.completionchunk
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.types.completionlogprobs/" class="md-nav__link">
- llama.types.completionlogprobs
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.types.completionusage/" class="md-nav__link">
- llama.types.completionusage
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.types.embedding/" class="md-nav__link">
- llama.types.embedding
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.types.embeddingdata/" class="md-nav__link">
- llama.types.embeddingdata
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/llama.types.embeddingusage/" class="md-nav__link">
- llama.types.embeddingusage
- </a>
- </li>
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../../xmldocs/logger/" class="md-nav__link">
- logger
- </a>
- </li>
-
-
-
-
- </ul>
- </nav>
- </li>
-
-
-
- </ul>
- </nav>
- </div>
- </div>
- </div>
-
-
-
- <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
- <div class="md-sidebar__scrollwrap">
- <div class="md-sidebar__inner">
-
-
- <nav class="md-nav md-nav--secondary" aria-label="Table of contents">
-
-
-
-
-
-
- <label class="md-nav__title" for="__toc">
- <span class="md-nav__icon md-icon"></span>
- Table of contents
- </label>
- <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
-
- <li class="md-nav__item">
- <a href="#text-to-text-apis-of-the-executors" class="md-nav__link">
- Text-to-Text APIs of the executors
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#interactiveexecutor--instructexecutor" class="md-nav__link">
- InteractiveExecutor & InstructExecutor
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#statelessexecutor" class="md-nav__link">
- StatelessExecutor.
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#batchedexecutor" class="md-nav__link">
- BatchedExecutor
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#inference-parameters" class="md-nav__link">
- Inference parameters
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#save-and-load-executor-state" class="md-nav__link">
- Save and load executor state
- </a>
-
- </li>
-
- </ul>
-
- </nav>
- </div>
- </div>
- </div>
-
-
-
- <div class="md-content" data-md-component="content">
- <article class="md-content__inner md-typeset">
-
-
-
-
- <h1 id="llamasharp-executors">LLamaSharp executors<a class="headerlink" href="#llamasharp-executors" title="Permanent link"></a></h1>
- <p>LLamaSharp executor defines the behavior of the model when it is called. Currently, there are four kinds of executors, which are <code>InteractiveExecutor</code>, <code>InstructExecutor</code>, <code>StatelessExecutor</code> and <code>BatchedExecutor</code>.</p>
- <p>In a word, <code>InteractiveExecutor</code> is suitable for getting answer of your questions from LLM continuously. <code>InstructExecutor</code> let LLM execute your instructions, such as "continue writing". <code>StatelessExecutor</code> is best for one-time job because the previous inference has no impact on the current inference. <code>BatchedExecutor</code> could accept multiple inputs and generate multiple outputs of different sessions at the same time, significantly improving the throughput of the program.</p>
- <h2 id="text-to-text-apis-of-the-executors">Text-to-Text APIs of the executors<a class="headerlink" href="#text-to-text-apis-of-the-executors" title="Permanent link"></a></h2>
- <p>All the executors implements the interface <code>ILLamaExecutor</code>, which provides two APIs to execute text-to-text tasks.</p>
- <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 1</span>
- <span class="normal"> 2</span>
- <span class="normal"> 3</span>
- <span class="normal"> 4</span>
- <span class="normal"> 5</span>
- <span class="normal"> 6</span>
- <span class="normal"> 7</span>
- <span class="normal"> 8</span>
- <span class="normal"> 9</span>
- <span class="normal">10</span>
- <span class="normal">11</span>
- <span class="normal">12</span>
- <span class="normal">13</span>
- <span class="normal">14</span>
- <span class="normal">15</span>
- <span class="normal">16</span>
- <span class="normal">17</span>
- <span class="normal">18</span>
- <span class="normal">19</span>
- <span class="normal">20</span>
- <span class="normal">21</span>
- <span class="normal">22</span>
- <span class="normal">23</span>
- <span class="normal">24</span>
- <span class="normal">25</span>
- <span class="normal">26</span>
- <span class="normal">27</span>
- <span class="normal">28</span>
- <span class="normal">29</span>
- <span class="normal">30</span>
- <span class="normal">31</span>
- <span class="normal">32</span>
- <span class="normal">33</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">public</span><span class="w"> </span><span class="k">interface</span><span class="w"> </span><span class="n">ILLamaExecutor</span>
- <span class="p">{</span>
- <span class="w"> </span><span class="c1">/// <summary></span>
- <span class="w"> </span><span class="c1">/// The loaded context for this executor.</span>
- <span class="w"> </span><span class="c1">/// </summary></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">LLamaContext</span><span class="w"> </span><span class="n">Context</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="p">}</span>
-
- <span class="w"> </span><span class="c1">// LLava Section</span>
- <span class="w"> </span><span class="c1">//</span>
- <span class="w"> </span><span class="c1">/// <summary></span>
- <span class="w"> </span><span class="c1">/// Identify if it's a multi-modal model and there is a image to process.</span>
- <span class="w"> </span><span class="c1">/// </summary></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">bool</span><span class="w"> </span><span class="n">IsMultiModal</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="p">}</span>
- <span class="w"> </span><span class="c1">/// <summary></span>
- <span class="w"> </span><span class="c1">/// Muti-Modal Projections / Clip Model weights</span>
- <span class="w"> </span><span class="c1">/// </summary></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">LLavaWeights</span><span class="o">?</span><span class="w"> </span><span class="n">ClipModel</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span>
-
- <span class="w"> </span><span class="c1">/// <summary></span>
- <span class="w"> </span><span class="c1">/// List of images: Image filename and path (jpeg images).</span>
- <span class="w"> </span><span class="c1">/// </summary></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">List</span><span class="o"><</span><span class="kt">string</span><span class="o">></span><span class="w"> </span><span class="n">ImagePaths</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span>
-
-
- <span class="w"> </span><span class="c1">/// <summary></span>
- <span class="w"> </span><span class="c1">/// Asynchronously infers a response from the model.</span>
- <span class="w"> </span><span class="c1">/// </summary></span>
- <span class="w"> </span><span class="c1">/// <param name="text">Your prompt</param></span>
- <span class="w"> </span><span class="c1">/// <param name="inferenceParams">Any additional parameters</param></span>
- <span class="w"> </span><span class="c1">/// <param name="token">A cancellation token.</param></span>
- <span class="w"> </span><span class="c1">/// <returns></returns></span>
- <span class="w"> </span><span class="n">IAsyncEnumerable</span><span class="o"><</span><span class="kt">string</span><span class="o">></span><span class="w"> </span><span class="n">InferAsync</span><span class="p">(</span><span class="kt">string</span><span class="w"> </span><span class="n">text</span><span class="p">,</span><span class="w"> </span><span class="n">IInferenceParams</span><span class="o">?</span><span class="w"> </span><span class="n">inferenceParams</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">null</span><span class="p">,</span><span class="w"> </span><span class="n">CancellationToken</span><span class="w"> </span><span class="n">token</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">default</span><span class="p">);</span>
- <span class="p">}</span>
- </code></pre></div></td></tr></table></div>
- <p>The output of both two APIs are <strong>yield enumerable</strong>. Therefore, when receiving the output, you can directly use <code>foreach</code> to take actions on each word you get by order, instead of waiting for the whole process completed.</p>
- <h2 id="interactiveexecutor--instructexecutor">InteractiveExecutor & InstructExecutor<a class="headerlink" href="#interactiveexecutor--instructexecutor" title="Permanent link"></a></h2>
- <p>Both of them are taking "completing the prompt" as the goal to generate the response. For example, if you input <code>Long long ago, there was a fox who wanted to make friend with humen. One day</code>, then the LLM will continue to write the story.</p>
- <p>Under interactive mode, you serve a role of user and the LLM serves the role of assistant. Then it will help you with your question or request. </p>
- <p>Under instruct mode, you give LLM some instructions and it follows.</p>
- <p>Though the behaviors of them sounds similar, it could introduce many differences depending on your prompt. For example, "chat-with-bob" has good performance under interactive mode and <code>alpaca</code> does well with instruct mode.</p>
- <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
- <span class="normal">2</span>
- <span class="normal">3</span>
- <span class="normal">4</span>
- <span class="normal">5</span>
- <span class="normal">6</span>
- <span class="normal">7</span>
- <span class="normal">8</span>
- <span class="normal">9</span></pre></div></td><td class="code"><div><pre><span></span><code>// chat-with-bob
-
- Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.
-
- User: Hello, Bob.
- Bob: Hello. How may I help you today?
- User: Please tell me the largest city in Europe.
- Bob: Sure. The largest city in Europe is Moscow, the capital of Russia.
- User:
- </code></pre></div></td></tr></table></div>
- <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
- <span class="normal">2</span>
- <span class="normal">3</span></pre></div></td><td class="code"><div><pre><span></span><code>// alpaca
-
- Below is an instruction that describes a task. Write a response that appropriately completes the request.
- </code></pre></div></td></tr></table></div>
- <p>Therefore, please modify the prompt correspondingly when switching from one mode to the other.</p>
- <h2 id="statelessexecutor">StatelessExecutor.<a class="headerlink" href="#statelessexecutor" title="Permanent link"></a></h2>
- <p>Despite the differences between interactive mode and instruct mode, both of them are stateful mode. That is, your previous question/instruction will impact on the current response from LLM. On the contrary, the stateless executor does not have such a "memory". No matter how many times you talk to it, it will only concentrate on what you say in this time. It is very useful when you want a clean context, without being affected by previous inputs.</p>
- <p>Since the stateless executor has no memory of conversations before, you need to input your question with the whole prompt into it to get the better answer.</p>
- <p>For example, if you feed <code>Q: Who is Trump? A:</code> to the stateless executor, it may give the following answer with the antiprompt <code>Q:</code>.</p>
- <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span>
- <span class="normal">2</span>
- <span class="normal">3</span>
- <span class="normal">4</span>
- <span class="normal">5</span></pre></div></td><td class="code"><div><pre><span></span><code>Donald J. Trump, born June 14, 1946, is an American businessman, television personality, politician and the 45th President of the United States (2017-2021). # Anexo:Torneo de Hamburgo 2022 (individual masculino)
-
- ## Presentación previa
-
- * Defensor del título: Daniil Medvédev
- </code></pre></div></td></tr></table></div>
- <p>It seems that things went well at first. However, after answering the question itself, LLM began to talk about some other things until the answer reached the token count limit. The reason of this strange behavior is the anti-prompt cannot be match. With the input, LLM cannot decide whether to append a string "A: " at the end of the response.</p>
- <p>As an improvement, let's take the following text as the input:</p>
- <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>Q: What is the capital of the USA? A: Washingtong. Q: What is the sum of 1 and 2? A: 3. Q: Who is Trump? A:
- </code></pre></div></td></tr></table></div>
- <p>Then, I got the following answer with the anti-prompt <code>Q:</code>.</p>
- <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">1</span></pre></div></td><td class="code"><div><pre><span></span><code>45th president of the United States.
- </code></pre></div></td></tr></table></div>
- <p>At this time, by repeating the same mode of <code>Q: xxx? A: xxx.</code>, LLM outputs the anti-prompt we want to help to decide where to stop the generation.</p>
- <h2 id="batchedexecutor">BatchedExecutor<a class="headerlink" href="#batchedexecutor" title="Permanent link"></a></h2>
- <p>Different from other executors, <code>BatchedExecutor</code> could accept multiple inputs from different sessions and geneate outputs for them at the same time. Here is an example to use it.</p>
- <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 1</span>
- <span class="normal"> 2</span>
- <span class="normal"> 3</span>
- <span class="normal"> 4</span>
- <span class="normal"> 5</span>
- <span class="normal"> 6</span>
- <span class="normal"> 7</span>
- <span class="normal"> 8</span>
- <span class="normal"> 9</span>
- <span class="normal"> 10</span>
- <span class="normal"> 11</span>
- <span class="normal"> 12</span>
- <span class="normal"> 13</span>
- <span class="normal"> 14</span>
- <span class="normal"> 15</span>
- <span class="normal"> 16</span>
- <span class="normal"> 17</span>
- <span class="normal"> 18</span>
- <span class="normal"> 19</span>
- <span class="normal"> 20</span>
- <span class="normal"> 21</span>
- <span class="normal"> 22</span>
- <span class="normal"> 23</span>
- <span class="normal"> 24</span>
- <span class="normal"> 25</span>
- <span class="normal"> 26</span>
- <span class="normal"> 27</span>
- <span class="normal"> 28</span>
- <span class="normal"> 29</span>
- <span class="normal"> 30</span>
- <span class="normal"> 31</span>
- <span class="normal"> 32</span>
- <span class="normal"> 33</span>
- <span class="normal"> 34</span>
- <span class="normal"> 35</span>
- <span class="normal"> 36</span>
- <span class="normal"> 37</span>
- <span class="normal"> 38</span>
- <span class="normal"> 39</span>
- <span class="normal"> 40</span>
- <span class="normal"> 41</span>
- <span class="normal"> 42</span>
- <span class="normal"> 43</span>
- <span class="normal"> 44</span>
- <span class="normal"> 45</span>
- <span class="normal"> 46</span>
- <span class="normal"> 47</span>
- <span class="normal"> 48</span>
- <span class="normal"> 49</span>
- <span class="normal"> 50</span>
- <span class="normal"> 51</span>
- <span class="normal"> 52</span>
- <span class="normal"> 53</span>
- <span class="normal"> 54</span>
- <span class="normal"> 55</span>
- <span class="normal"> 56</span>
- <span class="normal"> 57</span>
- <span class="normal"> 58</span>
- <span class="normal"> 59</span>
- <span class="normal"> 60</span>
- <span class="normal"> 61</span>
- <span class="normal"> 62</span>
- <span class="normal"> 63</span>
- <span class="normal"> 64</span>
- <span class="normal"> 65</span>
- <span class="normal"> 66</span>
- <span class="normal"> 67</span>
- <span class="normal"> 68</span>
- <span class="normal"> 69</span>
- <span class="normal"> 70</span>
- <span class="normal"> 71</span>
- <span class="normal"> 72</span>
- <span class="normal"> 73</span>
- <span class="normal"> 74</span>
- <span class="normal"> 75</span>
- <span class="normal"> 76</span>
- <span class="normal"> 77</span>
- <span class="normal"> 78</span>
- <span class="normal"> 79</span>
- <span class="normal"> 80</span>
- <span class="normal"> 81</span>
- <span class="normal"> 82</span>
- <span class="normal"> 83</span>
- <span class="normal"> 84</span>
- <span class="normal"> 85</span>
- <span class="normal"> 86</span>
- <span class="normal"> 87</span>
- <span class="normal"> 88</span>
- <span class="normal"> 89</span>
- <span class="normal"> 90</span>
- <span class="normal"> 91</span>
- <span class="normal"> 92</span>
- <span class="normal"> 93</span>
- <span class="normal"> 94</span>
- <span class="normal"> 95</span>
- <span class="normal"> 96</span>
- <span class="normal"> 97</span>
- <span class="normal"> 98</span>
- <span class="normal"> 99</span>
- <span class="normal">100</span>
- <span class="normal">101</span>
- <span class="normal">102</span>
- <span class="normal">103</span>
- <span class="normal">104</span>
- <span class="normal">105</span>
- <span class="normal">106</span>
- <span class="normal">107</span>
- <span class="normal">108</span>
- <span class="normal">109</span>
- <span class="normal">110</span>
- <span class="normal">111</span>
- <span class="normal">112</span>
- <span class="normal">113</span>
- <span class="normal">114</span>
- <span class="normal">115</span>
- <span class="normal">116</span>
- <span class="normal">117</span>
- <span class="normal">118</span>
- <span class="normal">119</span>
- <span class="normal">120</span>
- <span class="normal">121</span>
- <span class="normal">122</span>
- <span class="normal">123</span>
- <span class="normal">124</span>
- <span class="normal">125</span>
- <span class="normal">126</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">using</span><span class="w"> </span><span class="nn">LLama.Batched</span><span class="p">;</span>
- <span class="k">using</span><span class="w"> </span><span class="nn">LLama.Common</span><span class="p">;</span>
- <span class="k">using</span><span class="w"> </span><span class="nn">LLama.Native</span><span class="p">;</span>
- <span class="k">using</span><span class="w"> </span><span class="nn">LLama.Sampling</span><span class="p">;</span>
- <span class="k">using</span><span class="w"> </span><span class="nn">Spectre.Console</span><span class="p">;</span>
-
- <span class="k">namespace</span><span class="w"> </span><span class="nn">LLama.Examples.Examples</span><span class="p">;</span>
-
- <span class="c1">/// <summary></span>
- <span class="c1">/// This demonstrates using a batch to generate two sequences and then using one</span>
- <span class="c1">/// sequence as the negative guidance ("classifier free guidance") for the other.</span>
- <span class="c1">/// </summary></span>
- <span class="k">public</span><span class="w"> </span><span class="k">class</span><span class="w"> </span><span class="nc">BatchedExecutorGuidance</span>
- <span class="p">{</span>
- <span class="w"> </span><span class="k">private</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">n_len</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">32</span><span class="p">;</span>
-
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="k">static</span><span class="w"> </span><span class="k">async</span><span class="w"> </span><span class="n">Task</span><span class="w"> </span><span class="nf">Run</span><span class="p">()</span>
- <span class="w"> </span><span class="p">{</span>
- <span class="w"> </span><span class="kt">string</span><span class="w"> </span><span class="n">modelPath</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">UserSettings</span><span class="p">.</span><span class="n">GetModelPath</span><span class="p">();</span>
-
- <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">parameters</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">ModelParams</span><span class="p">(</span><span class="n">modelPath</span><span class="p">);</span>
- <span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="nn">var</span><span class="w"> </span><span class="n">model</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">LLamaWeights</span><span class="p">.</span><span class="n">LoadFromFile</span><span class="p">(</span><span class="n">parameters</span><span class="p">);</span>
-
- <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">positivePrompt</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">AnsiConsole</span><span class="p">.</span><span class="n">Ask</span><span class="p">(</span><span class="s">"Positive Prompt (or ENTER for default):"</span><span class="p">,</span><span class="w"> </span><span class="s">"My favourite colour is"</span><span class="p">).</span><span class="n">Trim</span><span class="p">();</span>
- <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">negativePrompt</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">AnsiConsole</span><span class="p">.</span><span class="n">Ask</span><span class="p">(</span><span class="s">"Negative Prompt (or ENTER for default):"</span><span class="p">,</span><span class="w"> </span><span class="s">"I hate the colour red. My favourite colour is"</span><span class="p">).</span><span class="n">Trim</span><span class="p">();</span>
- <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">weight</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">AnsiConsole</span><span class="p">.</span><span class="n">Ask</span><span class="p">(</span><span class="s">"Guidance Weight (or ENTER for default):"</span><span class="p">,</span><span class="w"> </span><span class="m">2.0f</span><span class="p">);</span>
-
- <span class="w"> </span><span class="c1">// Create an executor that can evaluate a batch of conversations together</span>
- <span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="nn">var</span><span class="w"> </span><span class="n">executor</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">BatchedExecutor</span><span class="p">(</span><span class="n">model</span><span class="p">,</span><span class="w"> </span><span class="n">parameters</span><span class="p">);</span>
-
- <span class="w"> </span><span class="c1">// Print some info</span>
- <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">executor</span><span class="p">.</span><span class="n">Model</span><span class="p">.</span><span class="n">Metadata</span><span class="p">.</span><span class="n">GetValueOrDefault</span><span class="p">(</span><span class="s">"general.name"</span><span class="p">,</span><span class="w"> </span><span class="s">"unknown model name"</span><span class="p">);</span>
- <span class="w"> </span><span class="n">Console</span><span class="p">.</span><span class="n">WriteLine</span><span class="p">(</span><span class="s">$"Created executor with model: {name}"</span><span class="p">);</span>
-
- <span class="w"> </span><span class="c1">// Load the two prompts into two conversations</span>
- <span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="nn">var</span><span class="w"> </span><span class="n">guided</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">executor</span><span class="p">.</span><span class="n">Create</span><span class="p">();</span>
- <span class="w"> </span><span class="n">guided</span><span class="p">.</span><span class="n">Prompt</span><span class="p">(</span><span class="n">positivePrompt</span><span class="p">);</span>
- <span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="nn">var</span><span class="w"> </span><span class="n">guidance</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">executor</span><span class="p">.</span><span class="n">Create</span><span class="p">();</span>
- <span class="w"> </span><span class="n">guidance</span><span class="p">.</span><span class="n">Prompt</span><span class="p">(</span><span class="n">negativePrompt</span><span class="p">);</span>
-
- <span class="w"> </span><span class="c1">// Run inference to evaluate prompts</span>
- <span class="w"> </span><span class="k">await</span><span class="w"> </span><span class="n">AnsiConsole</span>
- <span class="w"> </span><span class="p">.</span><span class="n">Status</span><span class="p">()</span>
- <span class="w"> </span><span class="p">.</span><span class="n">Spinner</span><span class="p">(</span><span class="n">Spinner</span><span class="p">.</span><span class="n">Known</span><span class="p">.</span><span class="n">Line</span><span class="p">)</span>
- <span class="w"> </span><span class="p">.</span><span class="n">StartAsync</span><span class="p">(</span><span class="s">"Evaluating Prompts..."</span><span class="p">,</span><span class="w"> </span><span class="n">_</span><span class="w"> </span><span class="o">=></span><span class="w"> </span><span class="n">executor</span><span class="p">.</span><span class="n">Infer</span><span class="p">());</span>
-
- <span class="w"> </span><span class="c1">// Fork the "guided" conversation. We'll run this one without guidance for comparison</span>
- <span class="w"> </span><span class="k">using</span><span class="w"> </span><span class="nn">var</span><span class="w"> </span><span class="n">unguided</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">guided</span><span class="p">.</span><span class="n">Fork</span><span class="p">();</span>
-
- <span class="w"> </span><span class="c1">// Run inference loop</span>
- <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">unguidedSampler</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">GuidedSampler</span><span class="p">(</span><span class="k">null</span><span class="p">,</span><span class="w"> </span><span class="n">weight</span><span class="p">);</span>
- <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">unguidedDecoder</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">StreamingTokenDecoder</span><span class="p">(</span><span class="n">executor</span><span class="p">.</span><span class="n">Context</span><span class="p">);</span>
- <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">guidedSampler</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">GuidedSampler</span><span class="p">(</span><span class="n">guidance</span><span class="p">,</span><span class="w"> </span><span class="n">weight</span><span class="p">);</span>
- <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">guidedDecoder</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">StreamingTokenDecoder</span><span class="p">(</span><span class="n">executor</span><span class="p">.</span><span class="n">Context</span><span class="p">);</span>
- <span class="w"> </span><span class="k">await</span><span class="w"> </span><span class="n">AnsiConsole</span>
- <span class="w"> </span><span class="p">.</span><span class="n">Progress</span><span class="p">()</span>
- <span class="w"> </span><span class="p">.</span><span class="n">StartAsync</span><span class="p">(</span><span class="k">async</span><span class="w"> </span><span class="n">progress</span><span class="w"> </span><span class="o">=></span>
- <span class="w"> </span><span class="p">{</span>
- <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">reporter</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">progress</span><span class="p">.</span><span class="n">AddTask</span><span class="p">(</span><span class="s">"Running Inference"</span><span class="p">,</span><span class="w"> </span><span class="n">maxValue</span><span class="p">:</span><span class="w"> </span><span class="n">n_len</span><span class="p">);</span>
-
- <span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="kt">var</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o"><</span><span class="w"> </span><span class="n">n_len</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span>
- <span class="w"> </span><span class="p">{</span>
- <span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">i</span><span class="w"> </span><span class="o">!=</span><span class="w"> </span><span class="m">0</span><span class="p">)</span>
- <span class="w"> </span><span class="k">await</span><span class="w"> </span><span class="n">executor</span><span class="p">.</span><span class="n">Infer</span><span class="p">();</span>
-
- <span class="w"> </span><span class="c1">// Sample from the "unguided" conversation. This is just a conversation using the same prompt, without any</span>
- <span class="w"> </span><span class="c1">// guidance. This serves as a comparison to show the effect of guidance.</span>
- <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">u</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">unguidedSampler</span><span class="p">.</span><span class="n">Sample</span><span class="p">(</span><span class="n">executor</span><span class="p">.</span><span class="n">Context</span><span class="p">.</span><span class="n">NativeHandle</span><span class="p">,</span><span class="w"> </span><span class="n">unguided</span><span class="p">.</span><span class="n">Sample</span><span class="p">(),</span><span class="w"> </span><span class="n">Array</span><span class="p">.</span><span class="n">Empty</span><span class="o"><</span><span class="n">LLamaToken</span><span class="o">></span><span class="p">());</span>
- <span class="w"> </span><span class="n">unguidedDecoder</span><span class="p">.</span><span class="n">Add</span><span class="p">(</span><span class="n">u</span><span class="p">);</span>
- <span class="w"> </span><span class="n">unguided</span><span class="p">.</span><span class="n">Prompt</span><span class="p">(</span><span class="n">u</span><span class="p">);</span>
-
- <span class="w"> </span><span class="c1">// Sample from the "guided" conversation. This sampler will internally use the "guidance" conversation</span>
- <span class="w"> </span><span class="c1">// to steer the conversation. See how this is done in GuidedSampler.ProcessLogits (bottom of this file).</span>
- <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">g</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">guidedSampler</span><span class="p">.</span><span class="n">Sample</span><span class="p">(</span><span class="n">executor</span><span class="p">.</span><span class="n">Context</span><span class="p">.</span><span class="n">NativeHandle</span><span class="p">,</span><span class="w"> </span><span class="n">guided</span><span class="p">.</span><span class="n">Sample</span><span class="p">(),</span><span class="w"> </span><span class="n">Array</span><span class="p">.</span><span class="n">Empty</span><span class="o"><</span><span class="n">LLamaToken</span><span class="o">></span><span class="p">());</span>
- <span class="w"> </span><span class="n">guidedDecoder</span><span class="p">.</span><span class="n">Add</span><span class="p">(</span><span class="n">g</span><span class="p">);</span>
-
- <span class="w"> </span><span class="c1">// Use this token to advance both guided _and_ guidance. Keeping them in sync (except for the initial prompt).</span>
- <span class="w"> </span><span class="n">guided</span><span class="p">.</span><span class="n">Prompt</span><span class="p">(</span><span class="n">g</span><span class="p">);</span>
- <span class="w"> </span><span class="n">guidance</span><span class="p">.</span><span class="n">Prompt</span><span class="p">(</span><span class="n">g</span><span class="p">);</span>
-
- <span class="w"> </span><span class="c1">// Early exit if we reach the natural end of the guided sentence</span>
- <span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">g</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="n">model</span><span class="p">.</span><span class="n">EndOfSentenceToken</span><span class="p">)</span>
- <span class="w"> </span><span class="k">break</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">// Update progress bar</span>
- <span class="w"> </span><span class="n">reporter</span><span class="p">.</span><span class="n">Increment</span><span class="p">(</span><span class="m">1</span><span class="p">);</span>
- <span class="w"> </span><span class="p">}</span>
- <span class="w"> </span><span class="p">});</span>
-
- <span class="w"> </span><span class="n">AnsiConsole</span><span class="p">.</span><span class="n">MarkupLine</span><span class="p">(</span><span class="s">$"[green]Unguided:[/][white]{unguidedDecoder.Read().ReplaceLineEndings("</span><span class="w"> </span><span class="s">")}[/]"</span><span class="p">);</span>
- <span class="w"> </span><span class="n">AnsiConsole</span><span class="p">.</span><span class="n">MarkupLine</span><span class="p">(</span><span class="s">$"[green]Guided:[/][white]{guidedDecoder.Read().ReplaceLineEndings("</span><span class="w"> </span><span class="s">")}[/]"</span><span class="p">);</span>
- <span class="w"> </span><span class="p">}</span>
-
- <span class="w"> </span><span class="k">private</span><span class="w"> </span><span class="k">class</span><span class="w"> </span><span class="nf">GuidedSampler</span><span class="p">(</span><span class="n">Conversation</span><span class="o">?</span><span class="w"> </span><span class="n">guidance</span><span class="p">,</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">weight</span><span class="p">)</span>
- <span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="n">BaseSamplingPipeline</span>
- <span class="w"> </span><span class="p">{</span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="k">void</span><span class="w"> </span><span class="nf">Accept</span><span class="p">(</span><span class="n">SafeLLamaContextHandle</span><span class="w"> </span><span class="n">ctx</span><span class="p">,</span><span class="w"> </span><span class="n">LLamaToken</span><span class="w"> </span><span class="n">token</span><span class="p">)</span>
- <span class="w"> </span><span class="p">{</span>
- <span class="w"> </span><span class="p">}</span>
-
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="n">ISamplingPipeline</span><span class="w"> </span><span class="nf">Clone</span><span class="p">()</span>
- <span class="w"> </span><span class="p">{</span>
- <span class="w"> </span><span class="k">throw</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="nf">NotSupportedException</span><span class="p">();</span>
- <span class="w"> </span><span class="p">}</span>
-
- <span class="w"> </span><span class="k">protected</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="k">void</span><span class="w"> </span><span class="nf">ProcessLogits</span><span class="p">(</span><span class="n">SafeLLamaContextHandle</span><span class="w"> </span><span class="n">ctx</span><span class="p">,</span><span class="w"> </span><span class="n">Span</span><span class="o"><</span><span class="kt">float</span><span class="o">></span><span class="w"> </span><span class="n">logits</span><span class="p">,</span><span class="w"> </span><span class="n">ReadOnlySpan</span><span class="o"><</span><span class="n">LLamaToken</span><span class="o">></span><span class="w"> </span><span class="n">lastTokens</span><span class="p">)</span>
- <span class="w"> </span><span class="p">{</span>
- <span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">guidance</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="k">null</span><span class="p">)</span>
- <span class="w"> </span><span class="k">return</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">// Get the logits generated by the guidance sequences</span>
- <span class="w"> </span><span class="kt">var</span><span class="w"> </span><span class="n">guidanceLogits</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">guidance</span><span class="p">.</span><span class="n">Sample</span><span class="p">();</span>
-
- <span class="w"> </span><span class="c1">// Use those logits to guide this sequence</span>
- <span class="w"> </span><span class="n">NativeApi</span><span class="p">.</span><span class="n">llama_sample_apply_guidance</span><span class="p">(</span><span class="n">ctx</span><span class="p">,</span><span class="w"> </span><span class="n">logits</span><span class="p">,</span><span class="w"> </span><span class="n">guidanceLogits</span><span class="p">,</span><span class="w"> </span><span class="n">weight</span><span class="p">);</span>
- <span class="w"> </span><span class="p">}</span>
-
- <span class="w"> </span><span class="k">protected</span><span class="w"> </span><span class="k">override</span><span class="w"> </span><span class="n">LLamaToken</span><span class="w"> </span><span class="nf">ProcessTokenDataArray</span><span class="p">(</span><span class="n">SafeLLamaContextHandle</span><span class="w"> </span><span class="n">ctx</span><span class="p">,</span><span class="w"> </span><span class="n">LLamaTokenDataArray</span><span class="w"> </span><span class="n">candidates</span><span class="p">,</span><span class="w"> </span><span class="n">ReadOnlySpan</span><span class="o"><</span><span class="n">LLamaToken</span><span class="o">></span><span class="w"> </span><span class="n">lastTokens</span><span class="p">)</span>
- <span class="w"> </span><span class="p">{</span>
- <span class="w"> </span><span class="n">candidates</span><span class="p">.</span><span class="n">Temperature</span><span class="p">(</span><span class="n">ctx</span><span class="p">,</span><span class="w"> </span><span class="m">0.8f</span><span class="p">);</span>
- <span class="w"> </span><span class="n">candidates</span><span class="p">.</span><span class="n">TopK</span><span class="p">(</span><span class="n">ctx</span><span class="p">,</span><span class="w"> </span><span class="m">25</span><span class="p">);</span>
-
- <span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">candidates</span><span class="p">.</span><span class="n">SampleToken</span><span class="p">(</span><span class="n">ctx</span><span class="p">);</span>
- <span class="w"> </span><span class="p">}</span>
- <span class="w"> </span><span class="p">}</span>
- <span class="p">}</span>
- </code></pre></div></td></tr></table></div>
- <h2 id="inference-parameters">Inference parameters<a class="headerlink" href="#inference-parameters" title="Permanent link"></a></h2>
- <p>Different from context parameters, which is indicated in <a href="../UnderstandLLamaContext/">understand-llama-context</a>, executors accept parameters when you call its API to execute the inference. That means you could change the parameters every time you ask the model to generate the outputs.</p>
- <p>Here is the parameters for LLamaSharp executors.</p>
- <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 1</span>
- <span class="normal"> 2</span>
- <span class="normal"> 3</span>
- <span class="normal"> 4</span>
- <span class="normal"> 5</span>
- <span class="normal"> 6</span>
- <span class="normal"> 7</span>
- <span class="normal"> 8</span>
- <span class="normal"> 9</span>
- <span class="normal">10</span>
- <span class="normal">11</span>
- <span class="normal">12</span>
- <span class="normal">13</span>
- <span class="normal">14</span>
- <span class="normal">15</span>
- <span class="normal">16</span>
- <span class="normal">17</span>
- <span class="normal">18</span>
- <span class="normal">19</span>
- <span class="normal">20</span>
- <span class="normal">21</span>
- <span class="normal">22</span>
- <span class="normal">23</span>
- <span class="normal">24</span>
- <span class="normal">25</span>
- <span class="normal">26</span>
- <span class="normal">27</span>
- <span class="normal">28</span>
- <span class="normal">29</span>
- <span class="normal">30</span>
- <span class="normal">31</span>
- <span class="normal">32</span>
- <span class="normal">33</span>
- <span class="normal">34</span>
- <span class="normal">35</span>
- <span class="normal">36</span>
- <span class="normal">37</span>
- <span class="normal">38</span>
- <span class="normal">39</span>
- <span class="normal">40</span>
- <span class="normal">41</span>
- <span class="normal">42</span>
- <span class="normal">43</span>
- <span class="normal">44</span>
- <span class="normal">45</span>
- <span class="normal">46</span>
- <span class="normal">47</span>
- <span class="normal">48</span>
- <span class="normal">49</span>
- <span class="normal">50</span>
- <span class="normal">51</span>
- <span class="normal">52</span>
- <span class="normal">53</span>
- <span class="normal">54</span>
- <span class="normal">55</span>
- <span class="normal">56</span>
- <span class="normal">57</span>
- <span class="normal">58</span>
- <span class="normal">59</span>
- <span class="normal">60</span>
- <span class="normal">61</span>
- <span class="normal">62</span>
- <span class="normal">63</span>
- <span class="normal">64</span>
- <span class="normal">65</span>
- <span class="normal">66</span>
- <span class="normal">67</span>
- <span class="normal">68</span>
- <span class="normal">69</span>
- <span class="normal">70</span>
- <span class="normal">71</span>
- <span class="normal">72</span>
- <span class="normal">73</span>
- <span class="normal">74</span>
- <span class="normal">75</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="c1">/// <summary></span>
- <span class="c1">/// The paramters used for inference.</span>
- <span class="c1">/// </summary></span>
- <span class="k">public</span><span class="w"> </span><span class="n">record</span><span class="w"> </span><span class="n">InferenceParams</span>
- <span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="n">IInferenceParams</span>
- <span class="p">{</span>
- <span class="w"> </span><span class="c1">/// <summary></span>
- <span class="w"> </span><span class="c1">/// number of tokens to keep from initial prompt</span>
- <span class="w"> </span><span class="c1">/// </summary></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">TokensKeep</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">0</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <summary></span>
- <span class="w"> </span><span class="c1">/// how many new tokens to predict (n_predict), set to -1 to inifinitely generate response</span>
- <span class="w"> </span><span class="c1">/// until it complete.</span>
- <span class="w"> </span><span class="c1">/// </summary></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">MaxTokens</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">-</span><span class="m">1</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <summary></span>
- <span class="w"> </span><span class="c1">/// logit bias for specific tokens</span>
- <span class="w"> </span><span class="c1">/// </summary></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">Dictionary</span><span class="o"><</span><span class="n">LLamaToken</span><span class="p">,</span><span class="w"> </span><span class="kt">float</span><span class="o">>?</span><span class="w"> </span><span class="n">LogitBias</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">null</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <summary></span>
- <span class="w"> </span><span class="c1">/// Sequences where the model will stop generating further tokens.</span>
- <span class="w"> </span><span class="c1">/// </summary></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">IReadOnlyList</span><span class="o"><</span><span class="kt">string</span><span class="o">></span><span class="w"> </span><span class="n">AntiPrompts</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">Array</span><span class="p">.</span><span class="n">Empty</span><span class="o"><</span><span class="kt">string</span><span class="o">></span><span class="p">();</span>
-
- <span class="w"> </span><span class="c1">/// <inheritdoc /></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">TopK</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">40</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <inheritdoc /></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">TopP</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">0.95f</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <inheritdoc /></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">MinP</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">0.05f</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <inheritdoc /></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">TfsZ</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">1.0f</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <inheritdoc /></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">TypicalP</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">1.0f</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <inheritdoc /></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">Temperature</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">0.8f</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <inheritdoc /></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">RepeatPenalty</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">1.1f</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <inheritdoc /></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">RepeatLastTokensCount</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">64</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <inheritdoc /></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">FrequencyPenalty</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">.</span><span class="m">0f</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <inheritdoc /></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">PresencePenalty</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">.</span><span class="m">0f</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <inheritdoc /></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">MirostatType</span><span class="w"> </span><span class="n">Mirostat</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">MirostatType</span><span class="p">.</span><span class="n">Disable</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <inheritdoc /></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">MirostatTau</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">5.0f</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <inheritdoc /></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">MirostatEta</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">0.1f</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <inheritdoc /></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="kt">bool</span><span class="w"> </span><span class="n">PenalizeNL</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">true</span><span class="p">;</span>
-
- <span class="w"> </span><span class="c1">/// <inheritdoc /></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">SafeLLamaGrammarHandle</span><span class="o">?</span><span class="w"> </span><span class="n">Grammar</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span>
-
- <span class="w"> </span><span class="c1">/// <inheritdoc /></span>
- <span class="w"> </span><span class="k">public</span><span class="w"> </span><span class="n">ISamplingPipeline</span><span class="o">?</span><span class="w"> </span><span class="n">SamplingPipeline</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">get</span><span class="p">;</span><span class="w"> </span><span class="k">set</span><span class="p">;</span><span class="w"> </span><span class="p">}</span>
- <span class="p">}</span>
- </code></pre></div></td></tr></table></div>
- <h2 id="save-and-load-executor-state">Save and load executor state<a class="headerlink" href="#save-and-load-executor-state" title="Permanent link"></a></h2>
- <p>An executor also has its state, which can be saved and loaded. That means a lot when you want to support restore a previous session for the user in your application.</p>
- <p>The following code shows how to use save and load executor state.</p>
- <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 1</span>
- <span class="normal"> 2</span>
- <span class="normal"> 3</span>
- <span class="normal"> 4</span>
- <span class="normal"> 5</span>
- <span class="normal"> 6</span>
- <span class="normal"> 7</span>
- <span class="normal"> 8</span>
- <span class="normal"> 9</span>
- <span class="normal">10</span>
- <span class="normal">11</span>
- <span class="normal">12</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="n">InteractiveExecutor</span><span class="w"> </span><span class="n">executor</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">InteractiveExecutor</span><span class="p">(</span><span class="n">model</span><span class="p">);</span>
- <span class="c1">// do some things...</span>
- <span class="n">executor</span><span class="p">.</span><span class="n">SaveState</span><span class="p">(</span><span class="s">"executor.st"</span><span class="p">);</span>
- <span class="kt">var</span><span class="w"> </span><span class="n">stateData</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">executor</span><span class="p">.</span><span class="n">GetStateData</span><span class="p">();</span>
-
- <span class="n">InteractiveExecutor</span><span class="w"> </span><span class="n">executor2</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">InteractiveExecutor</span><span class="p">(</span><span class="n">model</span><span class="p">);</span>
- <span class="n">executor2</span><span class="p">.</span><span class="n">LoadState</span><span class="p">(</span><span class="n">stateData</span><span class="p">);</span>
- <span class="c1">// do some things...</span>
-
- <span class="n">InteractiveExecutor</span><span class="w"> </span><span class="n">executor3</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">new</span><span class="w"> </span><span class="n">InteractiveExecutor</span><span class="p">(</span><span class="n">model</span><span class="p">);</span>
- <span class="n">executor3</span><span class="p">.</span><span class="n">LoadState</span><span class="p">(</span><span class="s">"executor.st"</span><span class="p">);</span>
- <span class="c1">// do some things...</span>
- </code></pre></div></td></tr></table></div>
-
-
-
-
-
-
- </article>
- </div>
-
-
- </div>
-
- </main>
-
- <footer class="md-footer">
-
- <div class="md-footer-meta md-typeset">
- <div class="md-footer-meta__inner md-grid">
- <div class="md-copyright">
-
-
- Made with
- <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
- Material for MkDocs
- </a>
-
- </div>
-
- </div>
- </div>
- </footer>
-
- </div>
- <div class="md-dialog" data-md-component="dialog">
- <div class="md-dialog__inner md-typeset"></div>
- </div>
-
- <script id="__config" type="application/json">{"base": "../..", "features": ["content.action.edit", "navigation.instant"], "search": "../../assets/javascripts/workers/search.74e28a9f.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": {"provider": "mike"}}</script>
-
-
- <script src="../../assets/javascripts/bundle.220ee61c.min.js"></script>
-
-
- </body>
- </html>
|