You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

SafeLLamaContextHandle.cs 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. using System;
  2. using System.Buffers;
  3. using System.Runtime.InteropServices;
  4. using System.Text;
  5. using LLama.Exceptions;
  6. namespace LLama.Native
  7. {
  8. /// <summary>
  9. /// A safe wrapper around a llama_context
  10. /// </summary>
  11. public sealed class SafeLLamaContextHandle
  12. : SafeLLamaHandleBase
  13. {
  14. #region properties and fields
  15. /// <summary>
  16. /// Total number of tokens in vocabulary of this model
  17. /// </summary>
  18. public int VocabCount => ThrowIfDisposed().VocabCount;
  19. /// <summary>
  20. /// Total number of tokens in the context
  21. /// </summary>
  22. public int ContextSize => ThrowIfDisposed().ContextSize;
  23. /// <summary>
  24. /// Dimension of embedding vectors
  25. /// </summary>
  26. public int EmbeddingSize => ThrowIfDisposed().EmbeddingSize;
  27. /// <summary>
  28. /// Get the number of tokens in the KV Cache for this context
  29. /// </summary>
  30. public int KVCacheTokenCount
  31. {
  32. get
  33. {
  34. if (IsClosed)
  35. throw new ObjectDisposedException("Cannot use this `SafeLLamaContextHandle` - it has been disposed");
  36. return NativeApi.llama_get_kv_cache_token_count(this);
  37. }
  38. }
  39. /// <summary>
  40. /// Get the model which this context is using
  41. /// </summary>
  42. public SafeLlamaModelHandle ModelHandle => ThrowIfDisposed();
  43. private SafeLlamaModelHandle? _model;
  44. #endregion
  45. #region construction/destruction
  46. /// <summary>
  47. /// Create a new SafeLLamaContextHandle
  48. /// </summary>
  49. /// <param name="handle">pointer to an allocated llama_context</param>
  50. /// <param name="model">the model which this context was created from</param>
  51. public SafeLLamaContextHandle(IntPtr handle, SafeLlamaModelHandle model)
  52. : base(handle)
  53. {
  54. // Increment the model reference count while this context exists
  55. _model = model;
  56. var success = false;
  57. _model.DangerousAddRef(ref success);
  58. if (!success)
  59. throw new RuntimeError("Failed to increment model refcount");
  60. }
  61. /// <inheritdoc />
  62. protected override bool ReleaseHandle()
  63. {
  64. NativeApi.llama_free(DangerousGetHandle());
  65. SetHandle(IntPtr.Zero);
  66. // Decrement refcount on model
  67. _model?.DangerousRelease();
  68. _model = null!;
  69. return true;
  70. }
  71. private SafeLlamaModelHandle ThrowIfDisposed()
  72. {
  73. if (IsClosed)
  74. throw new ObjectDisposedException("Cannot use this `SafeLLamaContextHandle` - it has been disposed");
  75. if (_model == null || _model.IsClosed)
  76. throw new ObjectDisposedException("Cannot use this `SafeLLamaContextHandle` - `SafeLlamaModelHandle` has been disposed");
  77. return _model!;
  78. }
  79. /// <summary>
  80. /// Create a new llama_state for the given model
  81. /// </summary>
  82. /// <param name="model"></param>
  83. /// <param name="lparams"></param>
  84. /// <returns></returns>
  85. /// <exception cref="RuntimeError"></exception>
  86. public static SafeLLamaContextHandle Create(SafeLlamaModelHandle model, LLamaContextParams lparams)
  87. {
  88. var ctx_ptr = NativeApi.llama_new_context_with_model(model, lparams);
  89. if (ctx_ptr == IntPtr.Zero)
  90. throw new RuntimeError("Failed to create context from model");
  91. return new(ctx_ptr, model);
  92. }
  93. /// <summary>
  94. /// Create a new llama context with a clone of the current llama context state
  95. /// </summary>
  96. /// <param name="lparams"></param>
  97. /// <returns></returns>
  98. public SafeLLamaContextHandle Clone(LLamaContextParams lparams)
  99. {
  100. // Allocate space to read the state of the current context
  101. var stateSize = GetStateSize();
  102. var stateMemory = Marshal.AllocHGlobal((nint)stateSize);
  103. try
  104. {
  105. // Copy state from this context into memory
  106. GetState(stateMemory, stateSize);
  107. // Create a new context
  108. var newCtx = Create(ModelHandle, lparams);
  109. // Copy state into new context
  110. newCtx.SetState(stateMemory);
  111. return newCtx;
  112. }
  113. finally
  114. {
  115. Marshal.FreeHGlobal(stateMemory);
  116. }
  117. }
  118. #endregion
  119. /// <summary>
  120. /// Convert the given text into tokens
  121. /// </summary>
  122. /// <param name="text">The text to tokenize</param>
  123. /// <param name="add_bos">Whether the "BOS" token should be added</param>
  124. /// <param name="encoding">Encoding to use for the text</param>
  125. /// <returns></returns>
  126. /// <exception cref="RuntimeError"></exception>
  127. public int[] Tokenize(string text, bool add_bos, Encoding encoding)
  128. {
  129. ThrowIfDisposed();
  130. if (string.IsNullOrEmpty(text) && !add_bos)
  131. return Array.Empty<int>();
  132. // Calculate number of bytes in string, this is a pessimistic estimate of token count. It can't
  133. // possibly be more than this.
  134. var count = encoding.GetByteCount(text) + (add_bos ? 1 : 0);
  135. // "Rent" an array to write results into (avoiding an allocation of a large array)
  136. var temporaryArray = ArrayPool<int>.Shared.Rent(count);
  137. try
  138. {
  139. // Do the actual conversion
  140. var n = NativeApi.llama_tokenize(this, text, encoding, temporaryArray, count, add_bos);
  141. if (n < 0)
  142. {
  143. throw new RuntimeError("Error happened during tokenization. It's possibly caused by wrong encoding. Please try to " +
  144. "specify the encoding.");
  145. }
  146. // Copy the results from the rented into an array which is exactly the right size
  147. var result = new int[n];
  148. Array.ConstrainedCopy(temporaryArray, 0, result, 0, n);
  149. return result;
  150. }
  151. finally
  152. {
  153. ArrayPool<int>.Shared.Return(temporaryArray);
  154. }
  155. }
  156. /// <summary>
  157. /// Token logits obtained from the last call to llama_eval()
  158. /// The logits for the last token are stored in the last row
  159. /// Can be mutated in order to change the probabilities of the next token.<br />
  160. /// Rows: n_tokens<br />
  161. /// Cols: n_vocab
  162. /// </summary>
  163. /// <returns></returns>
  164. public Span<float> GetLogits()
  165. {
  166. var model = ThrowIfDisposed();
  167. unsafe
  168. {
  169. var logits = NativeApi.llama_get_logits(this);
  170. return new Span<float>(logits, model.VocabCount);
  171. }
  172. }
  173. /// <summary>
  174. /// Convert a token into a string
  175. /// </summary>
  176. /// <param name="token">Token to decode into a string</param>
  177. /// <param name="encoding"></param>
  178. /// <returns></returns>
  179. public string TokenToString(int token, Encoding encoding)
  180. {
  181. return ThrowIfDisposed().TokenToString(token, encoding);
  182. }
  183. /// <summary>
  184. /// Append a single llama token to a string builder
  185. /// </summary>
  186. /// <param name="token">Token to decode</param>
  187. /// <param name="encoding"></param>
  188. /// <param name="dest">string builder to append the result to</param>
  189. public void TokenToString(int token, Encoding encoding, StringBuilder dest)
  190. {
  191. ThrowIfDisposed().TokenToString(token, encoding, dest);
  192. }
  193. /// <summary>
  194. /// Convert a single llama token into bytes
  195. /// </summary>
  196. /// <param name="token">Token to decode</param>
  197. /// <param name="dest">A span to attempt to write into. If this is too small nothing will be written</param>
  198. /// <returns>The size of this token. **nothing will be written** if this is larger than `dest`</returns>
  199. public int TokenToSpan(int token, Span<byte> dest)
  200. {
  201. return ThrowIfDisposed().TokenToSpan(token, dest);
  202. }
  203. /// <summary>
  204. /// Run the llama inference to obtain the logits and probabilities for the next token.
  205. /// </summary>
  206. /// <param name="tokens">The provided batch of new tokens to process</param>
  207. /// <param name="n_past">the number of tokens to use from previous eval calls</param>
  208. /// <returns>Returns true on success</returns>
  209. public bool Eval(ReadOnlySpan<int> tokens, int n_past)
  210. {
  211. unsafe
  212. {
  213. fixed (int* pinned = tokens)
  214. {
  215. return NativeApi.llama_eval(this, pinned, tokens.Length, n_past) == 0;
  216. }
  217. }
  218. }
  219. #region state
  220. /// <summary>
  221. /// Get the size of the state, when saved as bytes
  222. /// </summary>
  223. public ulong GetStateSize()
  224. {
  225. return NativeApi.llama_get_state_size(this);
  226. }
  227. /// <summary>
  228. /// Get the raw state of this context, encoded as bytes. Data is written into the `dest` pointer.
  229. /// </summary>
  230. /// <param name="dest">Destination to write to</param>
  231. /// <param name="size">Number of bytes available to write to in dest (check required size with `GetStateSize()`)</param>
  232. /// <returns>The number of bytes written to dest</returns>
  233. /// <exception cref="ArgumentOutOfRangeException">Thrown if dest is too small</exception>
  234. public unsafe ulong GetState(byte* dest, ulong size)
  235. {
  236. return GetState(new IntPtr(dest), size);
  237. }
  238. /// <summary>
  239. /// Get the raw state of this context, encoded as bytes. Data is written into the `dest` pointer.
  240. /// </summary>
  241. /// <param name="dest">Destination to write to</param>
  242. /// <param name="size">Number of bytes available to write to in dest (check required size with `GetStateSize()`)</param>
  243. /// <returns>The number of bytes written to dest</returns>
  244. /// <exception cref="ArgumentOutOfRangeException">Thrown if dest is too small</exception>
  245. public ulong GetState(IntPtr dest, ulong size)
  246. {
  247. var required = GetStateSize();
  248. if (size < required)
  249. throw new ArgumentOutOfRangeException(nameof(size), $"Allocated space is too small, {size} < {required}");
  250. unsafe
  251. {
  252. return NativeApi.llama_copy_state_data(this, (byte*)dest.ToPointer());
  253. }
  254. }
  255. /// <summary>
  256. /// Set the raw state of this context
  257. /// </summary>
  258. /// <param name="src">The pointer to read the state from</param>
  259. /// <returns>Number of bytes read from the src pointer</returns>
  260. public unsafe ulong SetState(byte* src)
  261. {
  262. return SetState(new IntPtr(src));
  263. }
  264. /// <summary>
  265. /// Set the raw state of this context
  266. /// </summary>
  267. /// <param name="src">The pointer to read the state from</param>
  268. /// <returns>Number of bytes read from the src pointer</returns>
  269. public ulong SetState(IntPtr src)
  270. {
  271. unsafe
  272. {
  273. return NativeApi.llama_set_state_data(this, (byte*)src.ToPointer());
  274. }
  275. }
  276. #endregion
  277. }
  278. }