Namespace: LLama
The base class for stateful LLama executors.
public abstract class StatefulExecutorBase : LLama.Abstractions.ILLamaExecutor
Inheritance Object → StatefulExecutorBase
Implements ILLamaExecutor
The context used by the executor.
public LLamaContext Context { get; }
This API is currently not verified.
public StatefulExecutorBase WithSessionFile(string filename)
filename String
This API has not been verified currently.
public void SaveSessionFile(string filename)
filename String
After running out of the context, take some tokens from the original prompt and recompute the logits in batches.
protected void HandleRunOutOfContext(int tokensToKeep)
tokensToKeep Int32
Try to reuse the matching prefix from the session file.
protected void TryReuseMathingPrefix()
Decide whether to continue the loop.
protected abstract bool GetLoopCondition(InferStateArgs args)
args InferStateArgs
Preprocess the inputs before the inference.
protected abstract void PreprocessInputs(string text, InferStateArgs args)
text String
args InferStateArgs
Do some post processing after the inference.
protected abstract bool PostProcess(IInferenceParams inferenceParams, InferStateArgs args, IEnumerable`1& extraOutputs)
inferenceParams IInferenceParams
args InferStateArgs
extraOutputs IEnumerable`1&
The core inference logic.
protected abstract void InferInternal(IInferenceParams inferenceParams, InferStateArgs args)
inferenceParams IInferenceParams
args InferStateArgs
Save the current state to a file.
public abstract void SaveState(string filename)
filename String
Get the current state data.
public abstract ExecutorBaseState GetStateData()
Load the state from data.
public abstract void LoadState(ExecutorBaseState data)
data ExecutorBaseState
Load the state from a file.
public abstract void LoadState(string filename)
filename String
Execute the inference.
public IEnumerable<string> Infer(string text, IInferenceParams inferenceParams, CancellationToken cancellationToken)
text String
inferenceParams IInferenceParams
cancellationToken CancellationToken
Execute the inference asynchronously.
public IAsyncEnumerable<string> InferAsync(string text, IInferenceParams inferenceParams, CancellationToken cancellationToken)
text String
inferenceParams IInferenceParams
cancellationToken CancellationToken