using System; using System.Runtime.InteropServices; namespace LLama.Native { /// /// A C# representation of the llama.cpp `llama_model_params` struct /// [StructLayout(LayoutKind.Sequential)] public unsafe struct LLamaModelParams { /// /// // number of layers to store in VRAM /// public int n_gpu_layers; /// /// // the GPU that is used for scratch and small tensors /// public int main_gpu; /// /// how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES) /// public float* tensor_split; /// /// called with a progress value between 0 and 1, pass NULL to disable /// LlamaProgressCallback progress_callback; /// /// context pointer passed to the progress callback /// void* progress_callback_user_data; /// /// only load the vocabulary, no weights /// public bool vocab_only { readonly get => Convert.ToBoolean(_vocab_only); set => _vocab_only = Convert.ToSByte(value); } private sbyte _vocab_only; /// /// use mmap if possible /// public bool use_mmap { readonly get => Convert.ToBoolean(_use_mmap); set => _use_mmap = Convert.ToSByte(value); } private sbyte _use_mmap; /// /// force system to keep model in RAM /// public bool use_mlock { readonly get => Convert.ToBoolean(_use_mlock); set => _use_mlock = Convert.ToSByte(value); } private sbyte _use_mlock; } }