using System; using System.Runtime.InteropServices; namespace LLama.Native; ///

/// Information associated with an individual cell in the KV cache view (llama_kv_cache_view_cell) ///

[StructLayout(LayoutKind.Sequential)] public struct LLamaKvCacheViewCell { ///

/// The position for this cell. Takes KV cache shifts into account. /// May be negative if the cell is not populated. ///

public LLamaPos pos; } ///

/// An updateable view of the KV cache (llama_kv_cache_view) ///

[StructLayout(LayoutKind.Sequential)] public unsafe struct LLamaKvCacheView { // Number of KV cache cells. This will be the same as the context size. int n_cells; // Maximum number of sequences that can exist in a cell. It's not an error // if there are more sequences in a cell than this value, however they will // not be visible in the view cells_sequences. int n_max_seq; // Number of tokens in the cache. For example, if there are two populated // cells, the first with 1 sequence id in it and the second with 2 sequence // ids then you'll have 3 tokens. int token_count; // Number of populated cache cells. int used_cells; // Maximum contiguous empty slots in the cache. int max_contiguous; // Index to the start of the max_contiguous slot range. Can be negative // when cache is full. int max_contiguous_idx; // Information for an individual cell. LLamaKvCacheViewCell* cells; // The sequences for each cell. There will be n_max_seq items per cell. LLamaSeqId* cells_sequences; } ///

/// A safe handle for a LLamaKvCacheView ///

public class LLamaKvCacheViewSafeHandle : SafeLLamaHandleBase { private readonly SafeLLamaContextHandle _ctx; private LLamaKvCacheView _view; ///

/// Initialize a LLamaKvCacheViewSafeHandle which will call `llama_kv_cache_view_free` when disposed ///

/// /// public LLamaKvCacheViewSafeHandle(SafeLLamaContextHandle ctx, LLamaKvCacheView view) : base((IntPtr)1, true) { _ctx = ctx; _view = view; } ///

/// Allocate a new KV cache view which can be used to inspect the KV cache ///

/// /// The maximum number of sequences visible in this view per cell /// public static LLamaKvCacheViewSafeHandle Allocate(SafeLLamaContextHandle ctx, int maxSequences) { var result = NativeApi.llama_kv_cache_view_init(ctx, maxSequences); return new LLamaKvCacheViewSafeHandle(ctx, result); } /// protected override bool ReleaseHandle() { NativeApi.llama_kv_cache_view_free(ref _view); SetHandle(IntPtr.Zero); return true; } ///

/// Update this view ///

public void Update() { NativeApi.llama_kv_cache_view_update(_ctx, ref _view); } ///

/// Get the raw KV cache view ///

/// public ref LLamaKvCacheView GetView() { return ref _view; } } public static partial class NativeApi { ///

/// Create an empty KV cache view. (use only for debugging purposes) ///

/// /// /// [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] public static extern LLamaKvCacheView llama_kv_cache_view_init(SafeLLamaContextHandle ctx, int n_max_seq); ///

/// Free a KV cache view. (use only for debugging purposes) ///

[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] public static extern void llama_kv_cache_view_free(ref LLamaKvCacheView view); ///

/// Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes) ///

/// /// [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] public static extern void llama_kv_cache_view_update(SafeLLamaContextHandle ctx, ref LLamaKvCacheView view); ///

/// Returns the number of tokens in the KV cache (slow, use only for debug) /// If a KV cell has multiple sequences assigned to it, it will be counted multiple times ///

/// /// [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] public static extern int llama_get_kv_cache_token_count(SafeLLamaContextHandle ctx); ///

/// Returns the number of used KV cells (i.e. have at least one sequence assigned to them) ///

/// /// [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] public static extern int llama_get_kv_cache_used_cells(SafeLLamaContextHandle ctx); }