using System; using System.Collections.Generic; using System.Runtime.InteropServices; using System.Text; namespace LLama.Native { public delegate void LlamaProgressCallback(float progress, IntPtr ctx); [StructLayout(LayoutKind.Sequential)] public struct LLamaContextParams { /// /// RNG seed, -1 for random /// public int seed; /// /// text context /// public int n_ctx; /// /// prompt processing batch size /// public int n_batch; /// /// grouped-query attention (TEMP - will be moved to model hparams) /// public int n_gqa; /// /// rms norm epsilon (TEMP - will be moved to model hparams) /// public float rms_norm_eps; /// /// number of layers to store in VRAM /// public int n_gpu_layers; /// /// the GPU that is used for scratch and small tensors /// public int main_gpu; /// /// how to split layers across multiple GPUs /// public nint tensor_split; /// /// ref: https://github.com/ggerganov/llama.cpp/pull/2054 /// RoPE base frequency /// public float rope_freq_base; /// /// ref: https://github.com/ggerganov/llama.cpp/pull/2054 /// RoPE frequency scaling factor /// public float rope_freq_scale; /// /// called with a progress value between 0 and 1, pass NULL to disable /// public IntPtr progress_callback; /// /// context pointer passed to the progress callback /// public IntPtr progress_callback_user_data; /// /// if true, reduce VRAM usage at the cost of performance /// [MarshalAs(UnmanagedType.I1)] public bool low_vram; /// /// if true, use experimental mul_mat_q kernels /// [MarshalAs(UnmanagedType.I1)] public bool mul_mat_q; /// /// use fp16 for KV cache /// [MarshalAs(UnmanagedType.I1)] public bool f16_kv; /// /// the llama_eval() call computes all logits, not just the last one /// [MarshalAs(UnmanagedType.I1)] public bool logits_all; /// /// only load the vocabulary, no weights /// [MarshalAs(UnmanagedType.I1)] public bool vocab_only; /// /// use mmap if possible /// [MarshalAs(UnmanagedType.I1)] public bool use_mmap; /// /// force system to keep model in RAM /// [MarshalAs(UnmanagedType.I1)] public bool use_mlock; /// /// embedding mode only /// [MarshalAs(UnmanagedType.I1)] public bool embedding; } }