using System; using System.Runtime.InteropServices; namespace LLama.Native { public delegate void LlamaProgressCallback(float progress, IntPtr ctx); [StructLayout(LayoutKind.Sequential)] public struct LLamaContextParams { /// /// RNG seed, -1 for random /// public int seed; /// /// text context /// public int n_ctx; /// /// prompt processing batch size /// public int n_batch; /// /// grouped-query attention (TEMP - will be moved to model hparams) /// public int n_gqa; /// /// rms norm epsilon (TEMP - will be moved to model hparams) /// public float rms_norm_eps; /// /// number of layers to store in VRAM /// public int n_gpu_layers; /// /// the GPU that is used for scratch and small tensors /// public int main_gpu; /// /// how to split layers across multiple GPUs /// public nint tensor_split; /// /// ref: https://github.com/ggerganov/llama.cpp/pull/2054 /// RoPE base frequency /// public float rope_freq_base; /// /// ref: https://github.com/ggerganov/llama.cpp/pull/2054 /// RoPE frequency scaling factor /// public float rope_freq_scale; /// /// called with a progress value between 0 and 1, pass NULL to disable /// public IntPtr progress_callback; /// /// context pointer passed to the progress callback /// public IntPtr progress_callback_user_data; /// /// if true, reduce VRAM usage at the cost of performance /// public bool low_vram { get => Convert.ToBoolean(_low_vram); set => _low_vram = Convert.ToSByte(value); } private sbyte _low_vram; /// /// if true, use experimental mul_mat_q kernels /// public bool mul_mat_q { get => Convert.ToBoolean(_mul_mat_q); set => _mul_mat_q = Convert.ToSByte(value); } private sbyte _mul_mat_q; /// /// use fp16 for KV cache /// public bool f16_kv { get => Convert.ToBoolean(_f16_kv); set => _f16_kv = Convert.ToSByte(value); } private sbyte _f16_kv; /// /// the llama_eval() call computes all logits, not just the last one /// public bool logits_all { get => Convert.ToBoolean(_logits_all); set => _logits_all = Convert.ToSByte(value); } private sbyte _logits_all; /// /// only load the vocabulary, no weights /// public bool vocab_only { get => Convert.ToBoolean(_vocab_only); set => _vocab_only = Convert.ToSByte(value); } private sbyte _vocab_only; /// /// use mmap if possible /// public bool use_mmap { get => Convert.ToBoolean(_use_mmap); set => _use_mmap = Convert.ToSByte(value); } private sbyte _use_mmap; /// /// force system to keep model in RAM /// public bool use_mlock { get => Convert.ToBoolean(_use_mlock); set => _use_mlock = Convert.ToSByte(value); } private sbyte _use_mlock; /// /// embedding mode only /// public bool embedding { get => Convert.ToBoolean(_embedding); set => _embedding = Convert.ToSByte(value); } private sbyte _embedding; } }