using System;
using System.Runtime.InteropServices;
namespace LLama.Native
{
///
/// Called by llama.cpp with a progress value between 0 and 1
///
///
///
/// If the provided progress_callback returns true, model loading continues.
/// If it returns false, model loading is immediately aborted.
/// llama_progress_callback
public delegate bool LlamaProgressCallback(float progress, IntPtr ctx);
///
/// A C# representation of the llama.cpp `llama_context_params` struct
///
[StructLayout(LayoutKind.Sequential)]
public struct LLamaContextParams
{
///
/// RNG seed, -1 for random
///
public uint seed;
///
/// text context, 0 = from model
///
public uint n_ctx;
///
/// logical maximum batch size that can be submitted to llama_decode
///
public uint n_batch;
///
/// physical maximum batch size
///
public uint n_ubatch;
///
/// max number of sequences (i.e. distinct states for recurrent models)
///
public uint n_seq_max;
///
/// number of threads to use for generation
///
public uint n_threads;
///
/// number of threads to use for batch processing
///
public uint n_threads_batch;
///
/// RoPE scaling type, from `enum llama_rope_scaling_type`
///
public RopeScalingType rope_scaling_type;
///
/// whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
///
public LLamaPoolingType llama_pooling_type;
///
/// RoPE base frequency, 0 = from model
///
public float rope_freq_base;
///
/// RoPE frequency scaling factor, 0 = from model
///
public float rope_freq_scale;
///
/// YaRN extrapolation mix factor, negative = from model
///
public float yarn_ext_factor;
///
/// YaRN magnitude scaling factor
///
public float yarn_attn_factor;
///
/// YaRN low correction dim
///
public float yarn_beta_fast;
///
/// YaRN high correction dim
///
public float yarn_beta_slow;
///
/// YaRN original context size
///
public uint yarn_orig_ctx;
///
/// defragment the KV cache if holes/size > defrag_threshold, Set to < 0 to disable (default)
///
public float defrag_threshold;
//todo: implement cb_eval callback support
///
/// ggml_backend_sched_eval_callback
///
public IntPtr cb_eval;
//todo: implement cb_eval callback support
///
/// User data passed into cb_eval
///
public IntPtr cb_eval_user_data;
///
/// data type for K cache
///
public GGMLType type_k;
///
/// data type for V cache
///
public GGMLType type_v;
///
/// Deprecated!
///
private sbyte _logits_all;
///
/// if true, extract embeddings (together with logits)
///
public bool embeddings
{
readonly get => Convert.ToBoolean(_embeddings);
set => _embeddings = Convert.ToSByte(value);
}
private sbyte _embeddings;
///
/// whether to offload the KQV ops (including the KV cache) to GPU
///
public bool offload_kqv
{
readonly get => Convert.ToBoolean(_offload_kqv);
set => _offload_kqv = Convert.ToSByte(value);
}
private sbyte _offload_kqv;
//todo: implement abort callback support
///
/// ggml_abort_callback
///
public IntPtr abort_callback;
//todo: implement abort callback support
///
/// User data passed into abort_callback
///
public IntPtr abort_callback_user_data;
///
/// Get the default LLamaContextParams
///
///
public static LLamaContextParams Default()
{
return llama_context_default_params();
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
static extern LLamaContextParams llama_context_default_params();
}
}
}