using System; using System.Collections.Generic; using System.Runtime.InteropServices; using System.Text; namespace LLama.Native { public delegate void LlamaProgressCallback(float progress, IntPtr ctx); [StructLayout(LayoutKind.Sequential)] public struct LLamaContextParams { ///

/// RNG seed, -1 for random ///

public int seed; ///

/// text context ///

public int n_ctx; ///

/// prompt processing batch size ///

public int n_batch; ///

/// grouped-query attention (TEMP - will be moved to model hparams) ///

public int n_gqa; ///

/// rms norm epsilon (TEMP - will be moved to model hparams) ///

public float rms_norm_eps; ///

/// number of layers to store in VRAM ///

public int n_gpu_layers; ///

/// the GPU that is used for scratch and small tensors ///

public int main_gpu; ///

/// how to split layers across multiple GPUs ///

public nint tensor_split; ///

/// ref: https://github.com/ggerganov/llama.cpp/pull/2054 /// RoPE base frequency ///

public float rope_freq_base; ///

/// ref: https://github.com/ggerganov/llama.cpp/pull/2054 /// RoPE frequency scaling factor ///

public float rope_freq_scale; ///

/// called with a progress value between 0 and 1, pass NULL to disable ///

public IntPtr progress_callback; ///

/// context pointer passed to the progress callback ///

public IntPtr progress_callback_user_data; ///

/// if true, reduce VRAM usage at the cost of performance ///

[MarshalAs(UnmanagedType.I1)] public bool low_vram; ///

/// if true, use experimental mul_mat_q kernels ///

[MarshalAs(UnmanagedType.I1)] public bool mul_mat_q; ///

/// use fp16 for KV cache ///

[MarshalAs(UnmanagedType.I1)] public bool f16_kv; ///

/// the llama_eval() call computes all logits, not just the last one ///

[MarshalAs(UnmanagedType.I1)] public bool logits_all; ///

/// only load the vocabulary, no weights ///

[MarshalAs(UnmanagedType.I1)] public bool vocab_only; ///

/// use mmap if possible ///

[MarshalAs(UnmanagedType.I1)] public bool use_mmap; ///

/// force system to keep model in RAM ///

[MarshalAs(UnmanagedType.I1)] public bool use_mlock; ///

/// embedding mode only ///

[MarshalAs(UnmanagedType.I1)] public bool embedding; } }