using System.Text; using LLama.Native; namespace LLama.Abstractions; ///

/// The parameters for initializing a LLama context from a model. ///

public interface IContextParams { ///

/// Model context size (n_ctx) ///

uint? ContextSize { get; set; } ///

/// batch size for prompt processing (must be >=32 to use BLAS) (n_batch) ///

uint BatchSize { get; set; } ///

/// Seed for the random number generator (seed) ///

uint Seed { get; set; } ///

/// Use f16 instead of f32 for memory kv (memory_f16) ///

bool UseFp16Memory { get; set; } ///

/// Compute perplexity over the prompt (perplexity) ///

bool Perplexity { get; set; } ///

/// Whether to use embedding mode. (embedding) Note that if this is set to true, /// The LLamaModel won't produce text response anymore. ///

bool EmbeddingMode { get; set; } ///

/// RoPE base frequency (null to fetch from the model) ///

float? RopeFrequencyBase { get; set; } ///

/// RoPE frequency scaling factor (null to fetch from the model) ///

float? RopeFrequencyScale { get; set; } ///

/// Use experimental mul_mat_q kernels ///

bool MulMatQ { get; set; } ///

/// The encoding to use for models ///

Encoding Encoding { get; set; } ///

/// Number of threads (null = autodetect) (n_threads) ///

uint? Threads { get; set; } ///

/// Number of threads to use for batch processing (null = autodetect) (n_threads) ///

uint? BatchThreads { get; set; } ///

/// YaRN extrapolation mix factor ///

float? YarnExtrapolationFactor { get; set; } ///

/// YaRN magnitude scaling factor ///

float? YarnAttentionFactor { get; set; } ///

/// YaRN low correction dim ///

float? YarnBetaFast { get; set; } ///

/// YaRN high correction dim ///

float? YarnBetaSlow { get; set; } ///

/// YaRN original context length ///

uint? YarnOriginalContext { get; set; } ///

/// YaRN scaling method to use. ///

RopeScalingType? YarnScalingType { get; set; } }