Namespace: LLama
The base class for stateful LLama executors.
public abstract class StatefulExecutorBase : LLama.Abstractions.ILLamaExecutor
Inheritance Object → StatefulExecutorBase
Implements ILLamaExecutor
The mode used by the executor.
public LLamaModel Model { get; }
This API is currently not verified.
public StatefulExecutorBase WithSessionFile(string filename)
filename String
This API has not been verified currently.
public void SaveSessionFile(string filename)
filename String
After running out of the context, take some tokens from the original prompt and recompute the logits in batches.
protected void HandleRunOutOfContext(int tokensToKeep)
tokensToKeep Int32
Try to reuse the matching prefix from the session file.
protected void TryReuseMathingPrefix()
Decide whether to continue the loop.
protected abstract bool GetLoopCondition(InferStateArgs args)
args InferStateArgs
Preprocess the inputs before the inference.
protected abstract void PreprocessInputs(string text, InferStateArgs args)
text String
args InferStateArgs
Do some post processing after the inference.
protected abstract bool PostProcess(InferenceParams inferenceParams, InferStateArgs args, IEnumerable`1& extraOutputs)
inferenceParams InferenceParams
args InferStateArgs
extraOutputs IEnumerable`1&
The core inference logic.
protected abstract void InferInternal(InferenceParams inferenceParams, InferStateArgs args)
inferenceParams InferenceParams
args InferStateArgs
Save the current state to a file.
public abstract void SaveState(string filename)
filename String
Get the current state data.
public abstract ExecutorBaseState GetStateData()
Load the state from data.
public abstract void LoadState(ExecutorBaseState data)
data ExecutorBaseState
Load the state from a file.
public abstract void LoadState(string filename)
filename String
Execute the inference.
public IEnumerable<string> Infer(string text, InferenceParams inferenceParams, CancellationToken cancellationToken)
text String
inferenceParams InferenceParams
cancellationToken CancellationToken
Execute the inference asynchronously.
public IAsyncEnumerable<string> InferAsync(string text, InferenceParams inferenceParams, CancellationToken cancellationToken)
text String
inferenceParams InferenceParams
cancellationToken CancellationToken