using System.Text; namespace LLama.Abstractions { /// /// The parameters for initializing a LLama model. /// public interface IModelParams { /// /// Model context size (n_ctx) /// int ContextSize { get; set; } /// /// the GPU that is used for scratch and small tensors /// int MainGpu { get; set; } /// /// if true, reduce VRAM usage at the cost of performance /// bool LowVram { get; set; } /// /// Number of layers to run in VRAM / GPU memory (n_gpu_layers) /// int GpuLayerCount { get; set; } /// /// Seed for the random number generator (seed) /// int Seed { get; set; } /// /// Use f16 instead of f32 for memory kv (memory_f16) /// bool UseFp16Memory { get; set; } /// /// Use mmap for faster loads (use_mmap) /// bool UseMemorymap { get; set; } /// /// Use mlock to keep model in memory (use_mlock) /// bool UseMemoryLock { get; set; } /// /// Compute perplexity over the prompt (perplexity) /// bool Perplexity { get; set; } /// /// Model path (model) /// string ModelPath { get; set; } /// /// model alias /// string ModelAlias { get; set; } /// /// lora adapter path (lora_adapter) /// string LoraAdapter { get; set; } /// /// base model path for the lora adapter (lora_base) /// string LoraBase { get; set; } /// /// Number of threads (-1 = autodetect) (n_threads) /// int Threads { get; set; } /// /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch) /// int BatchSize { get; set; } /// /// Whether to convert eos to newline during the inference. /// bool ConvertEosToNewLine { get; set; } /// /// Whether to use embedding mode. (embedding) Note that if this is set to true, /// The LLamaModel won't produce text response anymore. /// bool EmbeddingMode { get; set; } /// /// how split tensors should be distributed across GPUs /// float[]? TensorSplits { get; set; } /// /// Grouped-Query Attention /// int GroupedQueryAttention { get; set; } /// /// RMS Norm Epsilon /// float RmsNormEpsilon { get; set; } /// /// RoPE base frequency /// float RopeFrequencyBase { get; set; } /// /// RoPE frequency scaling factor /// float RopeFrequencyScale { get; set; } /// /// Use experimental mul_mat_q kernels /// bool MulMatQ { get; set; } /// /// The encoding to use for models /// Encoding Encoding { get; set; } } }