You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaKvCacheView.cs 5.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. using System;
  2. using System.Runtime.InteropServices;
  3. namespace LLama.Native;
  4. /// <summary>
  5. /// Information associated with an individual cell in the KV cache view (llama_kv_cache_view_cell)
  6. /// </summary>
  7. [StructLayout(LayoutKind.Sequential)]
  8. public struct LLamaKvCacheViewCell
  9. {
  10. /// <summary>
  11. /// The position for this cell. Takes KV cache shifts into account.
  12. /// May be negative if the cell is not populated.
  13. /// </summary>
  14. public LLamaPos pos;
  15. };
  16. /// <summary>
  17. /// An updateable view of the KV cache (llama_kv_cache_view)
  18. /// </summary>
  19. [StructLayout(LayoutKind.Sequential)]
  20. public unsafe struct LLamaKvCacheView
  21. {
  22. // Number of KV cache cells. This will be the same as the context size.
  23. int n_cells;
  24. // Maximum number of sequences that can exist in a cell. It's not an error
  25. // if there are more sequences in a cell than this value, however they will
  26. // not be visible in the view cells_sequences.
  27. int n_max_seq;
  28. // Number of tokens in the cache. For example, if there are two populated
  29. // cells, the first with 1 sequence id in it and the second with 2 sequence
  30. // ids then you'll have 3 tokens.
  31. int token_count;
  32. // Number of populated cache cells.
  33. int used_cells;
  34. // Maximum contiguous empty slots in the cache.
  35. int max_contiguous;
  36. // Index to the start of the max_contiguous slot range. Can be negative
  37. // when cache is full.
  38. int max_contiguous_idx;
  39. // Information for an individual cell.
  40. LLamaKvCacheViewCell* cells;
  41. // The sequences for each cell. There will be n_max_seq items per cell.
  42. LLamaSeqId* cells_sequences;
  43. }
  44. /// <summary>
  45. /// A safe handle for a LLamaKvCacheView
  46. /// </summary>
  47. public class LLamaKvCacheViewSafeHandle
  48. : SafeLLamaHandleBase
  49. {
  50. private readonly SafeLLamaContextHandle _ctx;
  51. private LLamaKvCacheView _view;
  52. /// <summary>
  53. /// Initialize a LLamaKvCacheViewSafeHandle which will call `llama_kv_cache_view_free` when disposed
  54. /// </summary>
  55. /// <param name="ctx"></param>
  56. /// <param name="view"></param>
  57. public LLamaKvCacheViewSafeHandle(SafeLLamaContextHandle ctx, LLamaKvCacheView view)
  58. : base(IntPtr.MaxValue, true)
  59. {
  60. _ctx = ctx;
  61. _view = view;
  62. }
  63. /// <summary>
  64. /// Allocate a new llama_kv_cache_view_free
  65. /// </summary>
  66. /// <param name="ctx"></param>
  67. /// <param name="maxSequences">The maximum number of sequences visible in this view per cell</param>
  68. /// <returns></returns>
  69. public static LLamaKvCacheViewSafeHandle Allocate(SafeLLamaContextHandle ctx, int maxSequences)
  70. {
  71. var result = NativeApi.llama_kv_cache_view_init(ctx, maxSequences);
  72. return new LLamaKvCacheViewSafeHandle(ctx, result);
  73. }
  74. /// <inheritdoc />
  75. protected override bool ReleaseHandle()
  76. {
  77. NativeApi.llama_kv_cache_view_free(ref _view);
  78. SetHandle(IntPtr.Zero);
  79. return true;
  80. }
  81. /// <summary>
  82. /// Update this view
  83. /// </summary>
  84. public void Update()
  85. {
  86. NativeApi.llama_kv_cache_view_update(_ctx, ref _view);
  87. }
  88. /// <summary>
  89. /// Count the number of used cells in the KV cache
  90. /// </summary>
  91. /// <returns></returns>
  92. public int CountCells()
  93. {
  94. return NativeApi.llama_get_kv_cache_used_cells(_ctx);
  95. }
  96. /// <summary>
  97. /// Count the number of tokens in the KV cache. If a token is assigned to multiple sequences it will be countered multiple times
  98. /// </summary>
  99. /// <returns></returns>
  100. public int CountTokens()
  101. {
  102. return NativeApi.llama_get_kv_cache_token_count(_ctx);
  103. }
  104. /// <summary>
  105. /// Get the raw KV cache view
  106. /// </summary>
  107. /// <returns></returns>
  108. public ref LLamaKvCacheView GetView()
  109. {
  110. return ref _view;
  111. }
  112. }
  113. partial class NativeApi
  114. {
  115. /// <summary>
  116. /// Create an empty KV cache view. (use only for debugging purposes)
  117. /// </summary>
  118. /// <param name="ctx"></param>
  119. /// <param name="n_max_seq"></param>
  120. /// <returns></returns>
  121. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  122. public static extern LLamaKvCacheView llama_kv_cache_view_init(SafeLLamaContextHandle ctx, int n_max_seq);
  123. /// <summary>
  124. /// Free a KV cache view. (use only for debugging purposes)
  125. /// </summary>
  126. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  127. public static extern void llama_kv_cache_view_free(ref LLamaKvCacheView view);
  128. /// <summary>
  129. /// Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
  130. /// </summary>
  131. /// <param name="ctx"></param>
  132. /// <param name="view"></param>
  133. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  134. public static extern void llama_kv_cache_view_update(SafeLLamaContextHandle ctx, ref LLamaKvCacheView view);
  135. /// <summary>
  136. /// Returns the number of tokens in the KV cache (slow, use only for debug)
  137. /// If a KV cell has multiple sequences assigned to it, it will be counted multiple times
  138. /// </summary>
  139. /// <param name="ctx"></param>
  140. /// <returns></returns>
  141. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  142. public static extern int llama_get_kv_cache_token_count(SafeLLamaContextHandle ctx);
  143. /// <summary>
  144. /// Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
  145. /// </summary>
  146. /// <param name="ctx"></param>
  147. /// <returns></returns>
  148. [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
  149. public static extern int llama_get_kv_cache_used_cells(SafeLLamaContextHandle ctx);
  150. }