using System; using System.Runtime.InteropServices; namespace LLama.Native { ///

/// Called by llama.cpp with a progress value between 0 and 1 ///

/// /// public delegate void LlamaProgressCallback(float progress, IntPtr ctx); ///

/// A C# representation of the llama.cpp `llama_context_params` struct ///

[StructLayout(LayoutKind.Sequential)] public struct LLamaContextParams { ///

/// RNG seed, -1 for random ///

public uint seed; ///

/// text context, 0 = from model ///

public uint n_ctx; ///

/// prompt processing batch size ///

public uint n_batch; ///

/// number of threads to use for generation ///

public uint n_threads; ///

/// number of threads to use for batch processing ///

public uint n_threads_batch; ///

/// RoPE scaling type, from `enum llama_rope_scaling_type` ///

public RopeScalingType rope_scaling_type; ///

/// RoPE base frequency, 0 = from model ///

public float rope_freq_base; ///

/// RoPE frequency scaling factor, 0 = from model ///

public float rope_freq_scale; ///

/// YaRN extrapolation mix factor, negative = from model ///

public float yarn_ext_factor; ///

/// YaRN magnitude scaling factor ///

public float yarn_attn_factor; ///

/// YaRN low correction dim ///

public float yarn_beta_fast; ///

/// YaRN high correction dim ///

public float yarn_beta_slow; ///

/// YaRN original context size ///

public uint yarn_orig_ctx; ///

/// data type for K cache ///

public GGMLType type_k; ///

/// data type for V cache ///

public GGMLType type_v; ///

/// Deprecated! ///

private sbyte _mul_mat_q; ///

/// Deprecated! ///

private sbyte _logits_all; ///

/// embedding mode only ///

public bool embedding { readonly get => Convert.ToBoolean(_embedding); set => _embedding = Convert.ToSByte(value); } private sbyte _embedding; ///

/// whether to offload the KQV ops (including the KV cache) to GPU ///

public bool offload_kqv { readonly get => Convert.ToBoolean(_offload_kqv); set => _offload_kqv = Convert.ToSByte(value); } private sbyte _offload_kqv; } }