You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

SafeLLamaContextHandle.cs 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. using System;
  2. using System.Buffers;
  3. using System.Runtime.CompilerServices;
  4. using System.Runtime.InteropServices;
  5. using System.Text;
  6. using LLama.Exceptions;
  7. namespace LLama.Native
  8. {
  9. /// <summary>
  10. /// A safe wrapper around a llama_context
  11. /// </summary>
  12. public sealed class SafeLLamaContextHandle
  13. : SafeLLamaHandleBase
  14. {
  15. #region properties and fields
  16. /// <summary>
  17. /// Total number of tokens in vocabulary of this model
  18. /// </summary>
  19. public int VocabCount => ThrowIfDisposed().VocabCount;
  20. /// <summary>
  21. /// Total number of tokens in the context
  22. /// </summary>
  23. public int ContextSize => NativeApi.llama_n_ctx(this);
  24. /// <summary>
  25. /// Dimension of embedding vectors
  26. /// </summary>
  27. public int EmbeddingSize => ThrowIfDisposed().EmbeddingSize;
  28. /// <summary>
  29. /// Get the model which this context is using
  30. /// </summary>
  31. public SafeLlamaModelHandle ModelHandle => ThrowIfDisposed();
  32. private SafeLlamaModelHandle? _model;
  33. #endregion
  34. #region construction/destruction
  35. /// <summary>
  36. /// Create a new SafeLLamaContextHandle
  37. /// </summary>
  38. /// <param name="handle">pointer to an allocated llama_context</param>
  39. /// <param name="model">the model which this context was created from</param>
  40. public SafeLLamaContextHandle(IntPtr handle, SafeLlamaModelHandle model)
  41. : base(handle)
  42. {
  43. // Increment the model reference count while this context exists
  44. _model = model;
  45. var success = false;
  46. _model.DangerousAddRef(ref success);
  47. if (!success)
  48. throw new RuntimeError("Failed to increment model refcount");
  49. }
  50. /// <inheritdoc />
  51. protected override bool ReleaseHandle()
  52. {
  53. NativeApi.llama_free(DangerousGetHandle());
  54. SetHandle(IntPtr.Zero);
  55. // Decrement refcount on model
  56. _model?.DangerousRelease();
  57. _model = null!;
  58. return true;
  59. }
  60. private SafeLlamaModelHandle ThrowIfDisposed()
  61. {
  62. if (IsClosed)
  63. throw new ObjectDisposedException("Cannot use this `SafeLLamaContextHandle` - it has been disposed");
  64. if (_model == null || _model.IsClosed)
  65. throw new ObjectDisposedException("Cannot use this `SafeLLamaContextHandle` - `SafeLlamaModelHandle` has been disposed");
  66. return _model!;
  67. }
  68. /// <summary>
  69. /// Create a new llama_state for the given model
  70. /// </summary>
  71. /// <param name="model"></param>
  72. /// <param name="lparams"></param>
  73. /// <returns></returns>
  74. /// <exception cref="RuntimeError"></exception>
  75. public static SafeLLamaContextHandle Create(SafeLlamaModelHandle model, LLamaContextParams lparams)
  76. {
  77. var ctx_ptr = NativeApi.llama_new_context_with_model(model, lparams);
  78. if (ctx_ptr == IntPtr.Zero)
  79. throw new RuntimeError("Failed to create context from model");
  80. return new(ctx_ptr, model);
  81. }
  82. #endregion
  83. /// <summary>
  84. /// Token logits obtained from the last call to llama_eval()
  85. /// The logits for the last token are stored in the last row
  86. /// Can be mutated in order to change the probabilities of the next token.<br />
  87. /// Rows: n_tokens<br />
  88. /// Cols: n_vocab
  89. /// </summary>
  90. /// <returns></returns>
  91. public Span<float> GetLogits()
  92. {
  93. var model = ThrowIfDisposed();
  94. unsafe
  95. {
  96. var logits = NativeApi.llama_get_logits(this);
  97. return new Span<float>(logits, model.VocabCount);
  98. }
  99. }
  100. #region tokens
  101. /// <summary>
  102. /// Convert the given text into tokens
  103. /// </summary>
  104. /// <param name="text">The text to tokenize</param>
  105. /// <param name="add_bos">Whether the "BOS" token should be added</param>
  106. /// <param name="encoding">Encoding to use for the text</param>
  107. /// <param name="special">Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.</param>
  108. /// <returns></returns>
  109. /// <exception cref="RuntimeError"></exception>
  110. public int[] Tokenize(string text, bool add_bos, bool special, Encoding encoding)
  111. {
  112. ThrowIfDisposed();
  113. if (string.IsNullOrEmpty(text) && !add_bos)
  114. return Array.Empty<int>();
  115. // Calculate number of bytes in string, this is a pessimistic estimate of token count. It can't
  116. // possibly be more than this.
  117. var count = encoding.GetByteCount(text) + (add_bos ? 1 : 0);
  118. // "Rent" an array to write results into (avoiding an allocation of a large array)
  119. var temporaryArray = ArrayPool<int>.Shared.Rent(count);
  120. try
  121. {
  122. // Do the actual conversion
  123. var n = NativeApi.llama_tokenize(this, text, encoding, temporaryArray, count, add_bos, special);
  124. if (n < 0)
  125. {
  126. throw new RuntimeError("Error happened during tokenization. It's possibly caused by wrong encoding. Please try to " +
  127. "specify the encoding.");
  128. }
  129. // Copy the results from the rented into an array which is exactly the right size
  130. var result = new int[n];
  131. Array.ConstrainedCopy(temporaryArray, 0, result, 0, n);
  132. return result;
  133. }
  134. finally
  135. {
  136. ArrayPool<int>.Shared.Return(temporaryArray);
  137. }
  138. }
  139. /// <summary>
  140. /// Convert a token into a string
  141. /// </summary>
  142. /// <param name="token">Token to decode into a string</param>
  143. /// <param name="encoding"></param>
  144. /// <returns></returns>
  145. public string TokenToString(int token, Encoding encoding)
  146. {
  147. return ThrowIfDisposed().TokenToString(token, encoding);
  148. }
  149. /// <summary>
  150. /// Append a single llama token to a string builder
  151. /// </summary>
  152. /// <param name="token">Token to decode</param>
  153. /// <param name="encoding"></param>
  154. /// <param name="dest">string builder to append the result to</param>
  155. public void TokenToString(int token, Encoding encoding, StringBuilder dest)
  156. {
  157. ThrowIfDisposed().TokenToString(token, encoding, dest);
  158. }
  159. /// <summary>
  160. /// Convert a single llama token into bytes
  161. /// </summary>
  162. /// <param name="token">Token to decode</param>
  163. /// <param name="dest">A span to attempt to write into. If this is too small nothing will be written</param>
  164. /// <returns>The size of this token. **nothing will be written** if this is larger than `dest`</returns>
  165. public int TokenToSpan(int token, Span<byte> dest)
  166. {
  167. return ThrowIfDisposed().TokenToSpan(token, dest);
  168. }
  169. #endregion
  170. /// <summary>
  171. /// Run the llama inference to obtain the logits and probabilities for the next token.
  172. /// </summary>
  173. /// <param name="tokens">The provided batch of new tokens to process</param>
  174. /// <param name="n_past">the number of tokens to use from previous eval calls</param>
  175. /// <returns>Returns true on success</returns>
  176. public bool Eval(ReadOnlySpan<int> tokens, int n_past)
  177. {
  178. unsafe
  179. {
  180. fixed (int* pinned = tokens)
  181. {
  182. var ret = NativeApi.llama_eval(this, pinned, tokens.Length, n_past);
  183. return ret == 0;
  184. }
  185. }
  186. }
  187. public int Decode(LLamaBatchSafeHandle batch)
  188. {
  189. return NativeApi.llama_decode(this, batch.Batch);
  190. }
  191. #region state
  192. /// <summary>
  193. /// Get the size of the state, when saved as bytes
  194. /// </summary>
  195. public ulong GetStateSize()
  196. {
  197. return NativeApi.llama_get_state_size(this);
  198. }
  199. /// <summary>
  200. /// Get the raw state of this context, encoded as bytes. Data is written into the `dest` pointer.
  201. /// </summary>
  202. /// <param name="dest">Destination to write to</param>
  203. /// <param name="size">Number of bytes available to write to in dest (check required size with `GetStateSize()`)</param>
  204. /// <returns>The number of bytes written to dest</returns>
  205. /// <exception cref="ArgumentOutOfRangeException">Thrown if dest is too small</exception>
  206. public unsafe ulong GetState(byte* dest, ulong size)
  207. {
  208. return GetState(new IntPtr(dest), size);
  209. }
  210. /// <summary>
  211. /// Get the raw state of this context, encoded as bytes. Data is written into the `dest` pointer.
  212. /// </summary>
  213. /// <param name="dest">Destination to write to</param>
  214. /// <param name="size">Number of bytes available to write to in dest (check required size with `GetStateSize()`)</param>
  215. /// <returns>The number of bytes written to dest</returns>
  216. /// <exception cref="ArgumentOutOfRangeException">Thrown if dest is too small</exception>
  217. public ulong GetState(IntPtr dest, ulong size)
  218. {
  219. var required = GetStateSize();
  220. if (size < required)
  221. throw new ArgumentOutOfRangeException(nameof(size), $"Allocated space is too small, {size} < {required}");
  222. unsafe
  223. {
  224. return NativeApi.llama_copy_state_data(this, (byte*)dest.ToPointer());
  225. }
  226. }
  227. /// <summary>
  228. /// Set the raw state of this context
  229. /// </summary>
  230. /// <param name="src">The pointer to read the state from</param>
  231. /// <returns>Number of bytes read from the src pointer</returns>
  232. public unsafe ulong SetState(byte* src)
  233. {
  234. return SetState(new IntPtr(src));
  235. }
  236. /// <summary>
  237. /// Set the raw state of this context
  238. /// </summary>
  239. /// <param name="src">The pointer to read the state from</param>
  240. /// <returns>Number of bytes read from the src pointer</returns>
  241. public ulong SetState(IntPtr src)
  242. {
  243. unsafe
  244. {
  245. return NativeApi.llama_set_state_data(this, (byte*)src.ToPointer());
  246. }
  247. }
  248. #endregion
  249. }
  250. }