You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

SafeLLamaContextHandle.cs 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. using System;
  2. using System.Buffers;
  3. using System.Collections.Generic;
  4. using System.Text;
  5. using LLama.Exceptions;
  6. namespace LLama.Native
  7. {
  8. /// <summary>
  9. /// A safe wrapper around a llama_context
  10. /// </summary>
  11. public sealed class SafeLLamaContextHandle
  12. : SafeLLamaHandleBase
  13. {
  14. #region properties and fields
  15. /// <summary>
  16. /// Total number of tokens in vocabulary of this model
  17. /// </summary>
  18. public int VocabCount => ThrowIfDisposed().VocabCount;
  19. /// <summary>
  20. /// Total number of tokens in the context
  21. /// </summary>
  22. public int ContextSize => NativeApi.llama_n_ctx(this);
  23. /// <summary>
  24. /// Dimension of embedding vectors
  25. /// </summary>
  26. public int EmbeddingSize => ThrowIfDisposed().EmbeddingSize;
  27. /// <summary>
  28. /// Get the model which this context is using
  29. /// </summary>
  30. public SafeLlamaModelHandle ModelHandle => ThrowIfDisposed();
  31. private SafeLlamaModelHandle? _model;
  32. #endregion
  33. #region construction/destruction
  34. /// <summary>
  35. /// Create a new SafeLLamaContextHandle
  36. /// </summary>
  37. /// <param name="handle">pointer to an allocated llama_context</param>
  38. /// <param name="model">the model which this context was created from</param>
  39. public SafeLLamaContextHandle(IntPtr handle, SafeLlamaModelHandle model)
  40. : base(handle)
  41. {
  42. // Increment the model reference count while this context exists
  43. _model = model;
  44. var success = false;
  45. _model.DangerousAddRef(ref success);
  46. if (!success)
  47. throw new RuntimeError("Failed to increment model refcount");
  48. }
  49. /// <inheritdoc />
  50. protected override bool ReleaseHandle()
  51. {
  52. NativeApi.llama_free(DangerousGetHandle());
  53. SetHandle(IntPtr.Zero);
  54. // Decrement refcount on model
  55. _model?.DangerousRelease();
  56. _model = null!;
  57. return true;
  58. }
  59. private SafeLlamaModelHandle ThrowIfDisposed()
  60. {
  61. if (IsClosed)
  62. throw new ObjectDisposedException("Cannot use this `SafeLLamaContextHandle` - it has been disposed");
  63. if (_model == null || _model.IsClosed)
  64. throw new ObjectDisposedException("Cannot use this `SafeLLamaContextHandle` - `SafeLlamaModelHandle` has been disposed");
  65. return _model!;
  66. }
  67. /// <summary>
  68. /// Create a new llama_state for the given model
  69. /// </summary>
  70. /// <param name="model"></param>
  71. /// <param name="lparams"></param>
  72. /// <returns></returns>
  73. /// <exception cref="RuntimeError"></exception>
  74. public static SafeLLamaContextHandle Create(SafeLlamaModelHandle model, LLamaContextParams lparams)
  75. {
  76. var ctx_ptr = NativeApi.llama_new_context_with_model(model, lparams);
  77. if (ctx_ptr == IntPtr.Zero)
  78. throw new RuntimeError("Failed to create context from model");
  79. return new(ctx_ptr, model);
  80. }
  81. #endregion
  82. /// <summary>
  83. /// Token logits obtained from the last call to llama_eval()
  84. /// The logits for the last token are stored in the last row
  85. /// Can be mutated in order to change the probabilities of the next token.<br />
  86. /// Rows: n_tokens<br />
  87. /// Cols: n_vocab
  88. /// </summary>
  89. /// <returns></returns>
  90. public Span<float> GetLogits()
  91. {
  92. var model = ThrowIfDisposed();
  93. unsafe
  94. {
  95. var logits = NativeApi.llama_get_logits(this);
  96. return new Span<float>(logits, model.VocabCount);
  97. }
  98. }
  99. #region tokens
  100. /// <summary>
  101. /// Convert the given text into tokens
  102. /// </summary>
  103. /// <param name="text">The text to tokenize</param>
  104. /// <param name="add_bos">Whether the "BOS" token should be added</param>
  105. /// <param name="encoding">Encoding to use for the text</param>
  106. /// <param name="special">Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.</param>
  107. /// <returns></returns>
  108. /// <exception cref="RuntimeError"></exception>
  109. public int[] Tokenize(string text, bool add_bos, bool special, Encoding encoding)
  110. {
  111. ThrowIfDisposed();
  112. if (string.IsNullOrEmpty(text) && !add_bos)
  113. return Array.Empty<int>();
  114. // Calculate number of bytes in string, this is a pessimistic estimate of token count. It can't
  115. // possibly be more than this.
  116. var count = encoding.GetByteCount(text) + (add_bos ? 1 : 0);
  117. // "Rent" an array to write results into (avoiding an allocation of a large array)
  118. var temporaryArray = ArrayPool<int>.Shared.Rent(count);
  119. try
  120. {
  121. // Do the actual conversion
  122. var n = NativeApi.llama_tokenize(this, text, encoding, temporaryArray, count, add_bos, special);
  123. if (n < 0)
  124. {
  125. throw new RuntimeError("Error happened during tokenization. It's possibly caused by wrong encoding. Please try to " +
  126. "specify the encoding.");
  127. }
  128. // Copy the results from the rented into an array which is exactly the right size
  129. var result = new int[n];
  130. Array.ConstrainedCopy(temporaryArray, 0, result, 0, n);
  131. return result;
  132. }
  133. finally
  134. {
  135. ArrayPool<int>.Shared.Return(temporaryArray);
  136. }
  137. }
  138. /// <summary>
  139. /// Convert a single llama token into bytes
  140. /// </summary>
  141. /// <param name="token">Token to decode</param>
  142. /// <param name="dest">A span to attempt to write into. If this is too small nothing will be written</param>
  143. /// <returns>The size of this token. **nothing will be written** if this is larger than `dest`</returns>
  144. public int TokenToSpan(int token, Span<byte> dest)
  145. {
  146. return ThrowIfDisposed().TokenToSpan(token, dest);
  147. }
  148. /// <summary>
  149. /// Convert a set of tokens into a string
  150. /// </summary>
  151. /// <param name="tokens"></param>
  152. /// <param name="encoding"></param>
  153. /// <returns></returns>
  154. public string DeTokenize(IReadOnlyList<int> tokens, Encoding encoding)
  155. {
  156. var chars = ArrayPool<char>.Shared.Rent(tokens.Count * 2);
  157. try
  158. {
  159. var span = ThrowIfDisposed().TokensToSpan(tokens, chars.AsSpan(), encoding);
  160. if (span.Length == 0)
  161. return "";
  162. unsafe
  163. {
  164. fixed (char* ptr = &span[0])
  165. return new string(ptr, 0, span.Length);
  166. }
  167. }
  168. finally
  169. {
  170. ArrayPool<char>.Shared.Return(chars);
  171. }
  172. }
  173. #endregion
  174. /// <summary>
  175. /// Run the llama inference to obtain the logits and probabilities for the next token.
  176. /// </summary>
  177. /// <param name="tokens">The provided batch of new tokens to process</param>
  178. /// <param name="n_past">the number of tokens to use from previous eval calls</param>
  179. /// <returns>Returns true on success</returns>
  180. public bool Eval(ReadOnlySpan<int> tokens, int n_past)
  181. {
  182. unsafe
  183. {
  184. fixed (int* pinned = tokens)
  185. {
  186. // the entire `eval` system needs replacing with the new batch system!
  187. var ret = NativeApi.llama_eval(this, pinned, tokens.Length, n_past);
  188. return ret == 0;
  189. }
  190. }
  191. }
  192. /// <summary>
  193. /// </summary>
  194. /// <param name="batch"></param>
  195. /// <returns>Positive return values does not mean a fatal error, but rather a warning:<br />
  196. /// - 0: success<br />
  197. /// - 1: could not find a KV slot for the batch (try reducing the size of the batch or increase the context)<br />
  198. /// - &lt; 0: error<br />
  199. /// </returns>
  200. public int Decode(LLamaBatchSafeHandle batch)
  201. {
  202. return NativeApi.llama_decode(this, batch.NativeBatch);
  203. }
  204. #region state
  205. /// <summary>
  206. /// Get the size of the state, when saved as bytes
  207. /// </summary>
  208. public ulong GetStateSize()
  209. {
  210. return NativeApi.llama_get_state_size(this);
  211. }
  212. /// <summary>
  213. /// Get the raw state of this context, encoded as bytes. Data is written into the `dest` pointer.
  214. /// </summary>
  215. /// <param name="dest">Destination to write to</param>
  216. /// <param name="size">Number of bytes available to write to in dest (check required size with `GetStateSize()`)</param>
  217. /// <returns>The number of bytes written to dest</returns>
  218. /// <exception cref="ArgumentOutOfRangeException">Thrown if dest is too small</exception>
  219. public unsafe ulong GetState(byte* dest, ulong size)
  220. {
  221. return GetState(new IntPtr(dest), size);
  222. }
  223. /// <summary>
  224. /// Get the raw state of this context, encoded as bytes. Data is written into the `dest` pointer.
  225. /// </summary>
  226. /// <param name="dest">Destination to write to</param>
  227. /// <param name="size">Number of bytes available to write to in dest (check required size with `GetStateSize()`)</param>
  228. /// <returns>The number of bytes written to dest</returns>
  229. /// <exception cref="ArgumentOutOfRangeException">Thrown if dest is too small</exception>
  230. public ulong GetState(IntPtr dest, ulong size)
  231. {
  232. var required = GetStateSize();
  233. if (size < required)
  234. throw new ArgumentOutOfRangeException(nameof(size), $"Allocated space is too small, {size} < {required}");
  235. unsafe
  236. {
  237. return NativeApi.llama_copy_state_data(this, (byte*)dest.ToPointer());
  238. }
  239. }
  240. /// <summary>
  241. /// Set the raw state of this context
  242. /// </summary>
  243. /// <param name="src">The pointer to read the state from</param>
  244. /// <returns>Number of bytes read from the src pointer</returns>
  245. public unsafe ulong SetState(byte* src)
  246. {
  247. return SetState(new IntPtr(src));
  248. }
  249. /// <summary>
  250. /// Set the raw state of this context
  251. /// </summary>
  252. /// <param name="src">The pointer to read the state from</param>
  253. /// <returns>Number of bytes read from the src pointer</returns>
  254. public ulong SetState(IntPtr src)
  255. {
  256. unsafe
  257. {
  258. return NativeApi.llama_set_state_data(this, (byte*)src.ToPointer());
  259. }
  260. }
  261. #endregion
  262. }
  263. }