using System; using System.Runtime.InteropServices; namespace LLama.Native { /// /// Called by llama.cpp with a progress value between 0 and 1 /// /// /// public delegate void LlamaProgressCallback(float progress, IntPtr ctx); /// /// A C# representation of the llama.cpp `llama_context_params` struct /// [StructLayout(LayoutKind.Sequential)] public struct LLamaContextParams { /// /// RNG seed, -1 for random /// public uint seed; /// /// text context /// public uint n_ctx; /// /// prompt processing batch size /// public uint n_batch; /// /// number of threads to use for generation /// public uint n_threads; /// /// number of threads to use for batch processing /// public uint n_threads_batch; /// /// ref: https://github.com/ggerganov/llama.cpp/pull/2054 /// RoPE base frequency /// public float rope_freq_base; /// /// ref: https://github.com/ggerganov/llama.cpp/pull/2054 /// RoPE frequency scaling factor /// public float rope_freq_scale; /// /// if true, use experimental mul_mat_q kernels /// public bool mul_mat_q { readonly get => Convert.ToBoolean(_mul_mat_q); set => _mul_mat_q = Convert.ToSByte(value); } private sbyte _mul_mat_q; /// /// use fp16 for KV cache /// public bool f16_kv { readonly get => Convert.ToBoolean(_f16_kv); set => _f16_kv = Convert.ToSByte(value); } private sbyte _f16_kv; /// /// the llama_eval() call computes all logits, not just the last one /// public bool logits_all { readonly get => Convert.ToBoolean(_logits_all); set => _logits_all = Convert.ToSByte(value); } private sbyte _logits_all; /// /// embedding mode only /// public bool embedding { readonly get => Convert.ToBoolean(_embedding); set => _embedding = Convert.ToSByte(value); } private sbyte _embedding; } }