Namespace: LLama.Common
The parameters for initializing a LLama model.
public class ModelParams : LLama.Abstractions.IModelParams, System.IEquatable`1[[LLama.Common.ModelParams, LLamaSharp, Version=0.5.0.0, Culture=neutral, PublicKeyToken=null]]
Inheritance Object → ModelParams
Implements IModelParams, IEquatable<ModelParams>
Model context size (n_ctx)
public int ContextSize { get; set; }
the GPU that is used for scratch and small tensors
public int MainGpu { get; set; }
if true, reduce VRAM usage at the cost of performance
public bool LowVram { get; set; }
Number of layers to run in VRAM / GPU memory (n_gpu_layers)
public int GpuLayerCount { get; set; }
Seed for the random number generator (seed)
public int Seed { get; set; }
Use f16 instead of f32 for memory kv (memory_f16)
public bool UseFp16Memory { get; set; }
Use mmap for faster loads (use_mmap)
public bool UseMemorymap { get; set; }
Use mlock to keep model in memory (use_mlock)
public bool UseMemoryLock { get; set; }
Compute perplexity over the prompt (perplexity)
public bool Perplexity { get; set; }
Model path (model)
public string ModelPath { get; set; }
model alias
public string ModelAlias { get; set; }
lora adapter path (lora_adapter)
public string LoraAdapter { get; set; }
base model path for the lora adapter (lora_base)
public string LoraBase { get; set; }
Number of threads (-1 = autodetect) (n_threads)
public int Threads { get; set; }
batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
public int BatchSize { get; set; }
Whether to convert eos to newline during the inference.
public bool ConvertEosToNewLine { get; set; }
Whether to use embedding mode. (embedding) Note that if this is set to true,
The LLamaModel won't produce text response anymore.
public bool EmbeddingMode { get; set; }
how split tensors should be distributed across GPUs
public Single[] TensorSplits { get; set; }
RoPE base frequency
public float RopeFrequencyBase { get; set; }
RoPE frequency scaling factor
public float RopeFrequencyScale { get; set; }
Use experimental mul_mat_q kernels
public bool MulMatQ { get; set; }
The encoding to use to convert text for the model
public Encoding Encoding { get; set; }
public ModelParams(string modelPath)
modelPath String
The model path.
Use object initializer to set all optional parameters
public ModelParams(string modelPath, int contextSize, int gpuLayerCount, int seed, bool useFp16Memory, bool useMemorymap, bool useMemoryLock, bool perplexity, string loraAdapter, string loraBase, int threads, int batchSize, bool convertEosToNewLine, bool embeddingMode, float ropeFrequencyBase, float ropeFrequencyScale, bool mulMatQ, string encoding)
modelPath String
The model path.
contextSize Int32
Model context size (n_ctx)
gpuLayerCount Int32
Number of layers to run in VRAM / GPU memory (n_gpu_layers)
seed Int32
Seed for the random number generator (seed)
useFp16Memory Boolean
Whether to use f16 instead of f32 for memory kv (memory_f16)
useMemorymap Boolean
Whether to use mmap for faster loads (use_mmap)
useMemoryLock Boolean
Whether to use mlock to keep model in memory (use_mlock)
perplexity Boolean
Thether to compute perplexity over the prompt (perplexity)
loraAdapter String
Lora adapter path (lora_adapter)
loraBase String
Base model path for the lora adapter (lora_base)
threads Int32
Number of threads (-1 = autodetect) (n_threads)
batchSize Int32
Batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
convertEosToNewLine Boolean
Whether to convert eos to newline during the inference.
embeddingMode Boolean
Whether to use embedding mode. (embedding) Note that if this is set to true, The LLamaModel won't produce text response anymore.
ropeFrequencyBase Single
RoPE base frequency.
ropeFrequencyScale Single
RoPE frequency scaling factor
mulMatQ Boolean
Use experimental mul_mat_q kernels
encoding String
The encoding to use to convert text for the model
public string ToString()
protected bool PrintMembers(StringBuilder builder)
builder StringBuilder
public int GetHashCode()
public bool Equals(object obj)
obj Object
public bool Equals(ModelParams other)
other ModelParams
public ModelParams <Clone>$()