using System; using System.Runtime.InteropServices; namespace LLama.Native { /// /// A C# representation of the llama.cpp `llama_model_params` struct /// [StructLayout(LayoutKind.Sequential)] public unsafe struct LLamaModelParams { /// /// // number of layers to store in VRAM /// public int n_gpu_layers; /// /// how to split the model across multiple GPUs /// public GPUSplitMode split_mode; /// /// the GPU that is used for scratch and small tensors /// public int main_gpu; /// /// how to split layers across multiple GPUs (size: ) /// public float* tensor_split; /// /// called with a progress value between 0 and 1, pass NULL to disable. If the provided progress_callback /// returns true, model loading continues. If it returns false, model loading is immediately aborted. /// #if NETSTANDARD2_0 // this code is intended to be used when running LlamaSharp on NET Framework 4.8 (NET Standard 2.0) // as NET Framework 4.8 does not play nice with the LlamaProgressCallback type public IntPtr progress_callback; #else public LlamaProgressCallback progress_callback; #endif /// /// context pointer passed to the progress callback /// public void* progress_callback_user_data; /// /// override key-value pairs of the model meta data /// public LLamaModelMetadataOverride* kv_overrides; /// /// only load the vocabulary, no weights /// public bool vocab_only { readonly get => Convert.ToBoolean(_vocab_only); set => _vocab_only = Convert.ToSByte(value); } private sbyte _vocab_only; /// /// use mmap if possible /// public bool use_mmap { readonly get => Convert.ToBoolean(_use_mmap); set => _use_mmap = Convert.ToSByte(value); } private sbyte _use_mmap; /// /// force system to keep model in RAM /// public bool use_mlock { readonly get => Convert.ToBoolean(_use_mlock); set => _use_mlock = Convert.ToSByte(value); } private sbyte _use_mlock; } }