using System; using System.Runtime.InteropServices; namespace LLama.Native { ///

/// Called by llama.cpp with a progress value between 0 and 1 ///

/// /// /// If the provided progress_callback returns true, model loading continues. /// If it returns false, model loading is immediately aborted. /// llama_progress_callback public delegate bool LlamaProgressCallback(float progress, IntPtr ctx); ///

/// A C# representation of the llama.cpp `llama_context_params` struct ///

[StructLayout(LayoutKind.Sequential)] public struct LLamaContextParams { ///

/// RNG seed, -1 for random ///

public uint seed; ///

/// text context, 0 = from model ///

public uint n_ctx; ///

/// logical maximum batch size that can be submitted to llama_decode ///

public uint n_batch; ///

/// physical maximum batch size ///

public uint n_ubatch; ///

/// max number of sequences (i.e. distinct states for recurrent models) ///

public uint n_seq_max; ///

/// number of threads to use for generation ///

public uint n_threads; ///

/// number of threads to use for batch processing ///

public uint n_threads_batch; ///

/// RoPE scaling type, from `enum llama_rope_scaling_type` ///

public RopeScalingType rope_scaling_type; ///

/// whether to pool (sum) embedding results by sequence id (ignored if no pooling layer) ///

public LLamaPoolingType llama_pooling_type; ///

/// RoPE base frequency, 0 = from model ///

public float rope_freq_base; ///

/// RoPE frequency scaling factor, 0 = from model ///

public float rope_freq_scale; ///

/// YaRN extrapolation mix factor, negative = from model ///

public float yarn_ext_factor; ///

/// YaRN magnitude scaling factor ///

public float yarn_attn_factor; ///

/// YaRN low correction dim ///

public float yarn_beta_fast; ///

/// YaRN high correction dim ///

public float yarn_beta_slow; ///

/// YaRN original context size ///

public uint yarn_orig_ctx; ///

/// defragment the KV cache if holes/size > defrag_threshold, Set to < 0 to disable (default) ///

public float defrag_threshold; //todo: implement cb_eval callback support ///

/// ggml_backend_sched_eval_callback ///

public IntPtr cb_eval; //todo: implement cb_eval callback support ///

/// User data passed into cb_eval ///

public IntPtr cb_eval_user_data; ///

/// data type for K cache ///

public GGMLType type_k; ///

/// data type for V cache ///

public GGMLType type_v; ///

/// Deprecated! ///

private sbyte _logits_all; ///

/// if true, extract embeddings (together with logits) ///

public bool embeddings { readonly get => Convert.ToBoolean(_embeddings); set => _embeddings = Convert.ToSByte(value); } private sbyte _embeddings; ///

/// whether to offload the KQV ops (including the KV cache) to GPU ///

public bool offload_kqv { readonly get => Convert.ToBoolean(_offload_kqv); set => _offload_kqv = Convert.ToSByte(value); } private sbyte _offload_kqv; //todo: implement abort callback support ///

/// ggml_abort_callback ///

public IntPtr abort_callback; //todo: implement abort callback support ///

/// User data passed into abort_callback ///

public IntPtr abort_callback_user_data; ///

/// Get the default LLamaContextParams ///

/// public static LLamaContextParams Default() { return llama_context_default_params(); [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)] static extern LLamaContextParams llama_context_default_params(); } } }