using System; using System.Runtime.InteropServices; namespace LLama.Native { /// /// Called by llama.cpp with a progress value between 0 and 1 /// /// /// public delegate void LlamaProgressCallback(float progress, IntPtr ctx); /// /// A C# representation of the llama.cpp `llama_context_params` struct /// [StructLayout(LayoutKind.Sequential)] public struct LLamaContextParams { /// /// RNG seed, -1 for random /// public uint seed; /// /// text context, 0 = from model /// public uint n_ctx; /// /// prompt processing batch size /// public uint n_batch; /// /// number of threads to use for generation /// public uint n_threads; /// /// number of threads to use for batch processing /// public uint n_threads_batch; /// /// RoPE scaling type, from `enum llama_rope_scaling_type` /// public RopeScalingType rope_scaling_type; /// /// RoPE base frequency, 0 = from model /// public float rope_freq_base; /// /// RoPE frequency scaling factor, 0 = from model /// public float rope_freq_scale; /// /// YaRN extrapolation mix factor, negative = from model /// public float yarn_ext_factor; /// /// YaRN magnitude scaling factor /// public float yarn_attn_factor; /// /// YaRN low correction dim /// public float yarn_beta_fast; /// /// YaRN high correction dim /// public float yarn_beta_slow; /// /// YaRN original context size /// public uint yarn_orig_ctx; /// /// data type for K cache /// public GGMLType type_k; /// /// data type for V cache /// public GGMLType type_v; /// /// Deprecated! /// private sbyte _mul_mat_q; /// /// Deprecated! /// private sbyte _logits_all; /// /// embedding mode only /// public bool embedding { readonly get => Convert.ToBoolean(_embedding); set => _embedding = Convert.ToSByte(value); } private sbyte _embedding; /// /// whether to offload the KQV ops (including the KV cache) to GPU /// public bool offload_kqv { readonly get => Convert.ToBoolean(_offload_kqv); set => _offload_kqv = Convert.ToSByte(value); } private sbyte _offload_kqv; } }