You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

SafeLLamaContextHandle.cs 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. using System;
  2. using System.Buffers;
  3. using System.Runtime.InteropServices;
  4. using System.Text;
  5. using LLama.Exceptions;
  6. namespace LLama.Native
  7. {
  8. /// <summary>
  9. /// A safe wrapper around a llama_context
  10. /// </summary>
  11. // ReSharper disable once ClassNeverInstantiated.Global (used implicitly in native API)
  12. public sealed class SafeLLamaContextHandle
  13. : SafeLLamaHandleBase
  14. {
  15. #region properties and fields
  16. /// <summary>
  17. /// Total number of tokens in vocabulary of this model
  18. /// </summary>
  19. public int VocabCount => ThrowIfDisposed().VocabCount;
  20. /// <summary>
  21. /// Total number of tokens in the context
  22. /// </summary>
  23. public int ContextSize => NativeApi.llama_n_ctx(this);
  24. /// <summary>
  25. /// Dimension of embedding vectors
  26. /// </summary>
  27. public int EmbeddingSize => ThrowIfDisposed().EmbeddingSize;
  28. /// <summary>
  29. /// Get the model which this context is using
  30. /// </summary>
  31. public SafeLlamaModelHandle ModelHandle => ThrowIfDisposed();
  32. private SafeLlamaModelHandle? _model;
  33. #endregion
  34. #region construction/destruction
  35. /// <inheritdoc />
  36. protected override bool ReleaseHandle()
  37. {
  38. llama_free(handle);
  39. SetHandle(IntPtr.Zero);
  40. // Decrement refcount on model
  41. _model?.DangerousRelease();
  42. _model = null!;
  43. return true;
  44. }
  45. private SafeLlamaModelHandle ThrowIfDisposed()
  46. {
  47. if (IsClosed)
  48. throw new ObjectDisposedException("Cannot use this `SafeLLamaContextHandle` - it has been disposed");
  49. if (_model == null || _model.IsClosed)
  50. throw new ObjectDisposedException("Cannot use this `SafeLLamaContextHandle` - `SafeLlamaModelHandle` has been disposed");
  51. return _model!;
  52. }
  53. /// <summary>
  54. /// Create a new llama_state for the given model
  55. /// </summary>
  56. /// <param name="model"></param>
  57. /// <param name="lparams"></param>
  58. /// <returns></returns>
  59. /// <exception cref="RuntimeError"></exception>
  60. public static SafeLLamaContextHandle Create(SafeLlamaModelHandle model, LLamaContextParams lparams)
  61. {
  62. var ctx = llama_new_context_with_model(model, lparams);
  63. if (ctx == null)
  64. throw new RuntimeError("Failed to create context from model");
  65. // Increment the model reference count while this context exists.
  66. // DangerousAddRef throws if it fails, so there is no need to check "success"
  67. ctx._model = model;
  68. var success = false;
  69. ctx._model.DangerousAddRef(ref success);
  70. return ctx;
  71. }
  72. #endregion
  73. #region Native API
  74. static SafeLLamaContextHandle()
  75. {
  76. // This ensures that `NativeApi` has been loaded before calling the two native methods below
  77. NativeApi.llama_empty_call();
  78. }
  79. /// <summary>
  80. /// Create a new llama_context with the given model. **This should never be called directly! Always use SafeLLamaContextHandle.Create**!
  81. /// </summary>
  82. /// <param name="model"></param>
  83. /// <param name="params"></param>
  84. /// <returns></returns>
  85. [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
  86. private static extern SafeLLamaContextHandle llama_new_context_with_model(SafeLlamaModelHandle model, LLamaContextParams @params);
  87. /// <summary>
  88. /// Frees all allocated memory in the given llama_context
  89. /// </summary>
  90. /// <param name="ctx"></param>
  91. [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
  92. private static extern void llama_free(IntPtr ctx);
  93. #endregion
  94. /// <summary>
  95. /// Token logits obtained from the last call to llama_eval()
  96. /// The logits for the last token are stored in the last row
  97. /// Can be mutated in order to change the probabilities of the next token.<br />
  98. /// Rows: n_tokens<br />
  99. /// Cols: n_vocab
  100. /// </summary>
  101. /// <returns></returns>
  102. public Span<float> GetLogits()
  103. {
  104. var model = ThrowIfDisposed();
  105. unsafe
  106. {
  107. var logits = NativeApi.llama_get_logits(this);
  108. return new Span<float>(logits, model.VocabCount);
  109. }
  110. }
  111. /// <summary>
  112. /// Logits for the ith token. Equivalent to: llama_get_logits(ctx) + i*n_vocab
  113. /// </summary>
  114. /// <param name="i"></param>
  115. /// <returns></returns>
  116. public Span<float> GetLogitsIth(int i)
  117. {
  118. var model = ThrowIfDisposed();
  119. unsafe
  120. {
  121. var logits = NativeApi.llama_get_logits_ith(this, i);
  122. return new Span<float>(logits, model.VocabCount);
  123. }
  124. }
  125. #region tokens
  126. /// <summary>
  127. /// Convert the given text into tokens
  128. /// </summary>
  129. /// <param name="text">The text to tokenize</param>
  130. /// <param name="add_bos">Whether the "BOS" token should be added</param>
  131. /// <param name="encoding">Encoding to use for the text</param>
  132. /// <param name="special">Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.</param>
  133. /// <returns></returns>
  134. /// <exception cref="RuntimeError"></exception>
  135. public LLamaToken[] Tokenize(string text, bool add_bos, bool special, Encoding encoding)
  136. {
  137. ThrowIfDisposed();
  138. if (string.IsNullOrEmpty(text) && !add_bos)
  139. return Array.Empty<LLamaToken>();
  140. // Calculate number of bytes in string, this is a pessimistic estimate of token count. It can't
  141. // possibly be more than this.
  142. var count = encoding.GetByteCount(text) + (add_bos ? 1 : 0);
  143. // "Rent" an array to write results into (avoiding an allocation of a large array)
  144. var temporaryArray = ArrayPool<LLamaToken>.Shared.Rent(count);
  145. try
  146. {
  147. // Do the actual conversion
  148. var n = NativeApi.llama_tokenize(this, text, encoding, temporaryArray, count, add_bos, special);
  149. if (n < 0)
  150. {
  151. throw new RuntimeError("Error happened during tokenization. It's possibly caused by wrong encoding. Please try to " +
  152. "specify the encoding.");
  153. }
  154. // Copy the results from the rented into an array which is exactly the right size
  155. var result = new LLamaToken[n];
  156. Array.ConstrainedCopy(temporaryArray, 0, result, 0, n);
  157. return result;
  158. }
  159. finally
  160. {
  161. ArrayPool<LLamaToken>.Shared.Return(temporaryArray);
  162. }
  163. }
  164. /// <summary>
  165. /// Convert a single llama token into bytes
  166. /// </summary>
  167. /// <param name="token">Token to decode</param>
  168. /// <param name="dest">A span to attempt to write into. If this is too small nothing will be written</param>
  169. /// <returns>The size of this token. **nothing will be written** if this is larger than `dest`</returns>
  170. public uint TokenToSpan(LLamaToken token, Span<byte> dest)
  171. {
  172. return ThrowIfDisposed().TokenToSpan(token, dest);
  173. }
  174. #endregion
  175. /// <summary>
  176. /// Run the llama inference to obtain the logits and probabilities for the next token.
  177. /// </summary>
  178. /// <param name="tokens">The provided batch of new tokens to process</param>
  179. /// <param name="n_past">the number of tokens to use from previous eval calls</param>
  180. /// <returns>Returns true on success</returns>
  181. [Obsolete("use llama_decode() instead")]
  182. public bool Eval(ReadOnlySpan<LLamaToken> tokens, int n_past)
  183. {
  184. unsafe
  185. {
  186. fixed (LLamaToken* pinned = tokens)
  187. {
  188. // the entire `eval` system needs replacing with the new batch system!
  189. var ret = NativeApi.llama_eval(this, pinned, tokens.Length, n_past);
  190. return ret == 0;
  191. }
  192. }
  193. }
  194. /// <summary>
  195. /// </summary>
  196. /// <param name="batch"></param>
  197. /// <returns>Positive return values does not mean a fatal error, but rather a warning:<br />
  198. /// - 0: success<br />
  199. /// - 1: could not find a KV slot for the batch (try reducing the size of the batch or increase the context)<br />
  200. /// - &lt; 0: error<br />
  201. /// </returns>
  202. public int Decode(LLamaBatchSafeHandle batch)
  203. {
  204. return NativeApi.llama_decode(this, batch.NativeBatch);
  205. }
  206. #region state
  207. /// <summary>
  208. /// Get the size of the state, when saved as bytes
  209. /// </summary>
  210. public ulong GetStateSize()
  211. {
  212. return NativeApi.llama_get_state_size(this);
  213. }
  214. /// <summary>
  215. /// Get the raw state of this context, encoded as bytes. Data is written into the `dest` pointer.
  216. /// </summary>
  217. /// <param name="dest">Destination to write to</param>
  218. /// <param name="size">Number of bytes available to write to in dest (check required size with `GetStateSize()`)</param>
  219. /// <returns>The number of bytes written to dest</returns>
  220. /// <exception cref="ArgumentOutOfRangeException">Thrown if dest is too small</exception>
  221. public unsafe ulong GetState(byte* dest, ulong size)
  222. {
  223. return GetState(new IntPtr(dest), size);
  224. }
  225. /// <summary>
  226. /// Get the raw state of this context, encoded as bytes. Data is written into the `dest` pointer.
  227. /// </summary>
  228. /// <param name="dest">Destination to write to</param>
  229. /// <param name="size">Number of bytes available to write to in dest (check required size with `GetStateSize()`)</param>
  230. /// <returns>The number of bytes written to dest</returns>
  231. /// <exception cref="ArgumentOutOfRangeException">Thrown if dest is too small</exception>
  232. public ulong GetState(IntPtr dest, ulong size)
  233. {
  234. var required = GetStateSize();
  235. if (size < required)
  236. throw new ArgumentOutOfRangeException(nameof(size), $"Allocated space is too small, {size} < {required}");
  237. unsafe
  238. {
  239. return NativeApi.llama_copy_state_data(this, (byte*)dest.ToPointer());
  240. }
  241. }
  242. /// <summary>
  243. /// Set the raw state of this context
  244. /// </summary>
  245. /// <param name="src">The pointer to read the state from</param>
  246. /// <returns>Number of bytes read from the src pointer</returns>
  247. public unsafe ulong SetState(byte* src)
  248. {
  249. return SetState(new IntPtr(src));
  250. }
  251. /// <summary>
  252. /// Set the raw state of this context
  253. /// </summary>
  254. /// <param name="src">The pointer to read the state from</param>
  255. /// <returns>Number of bytes read from the src pointer</returns>
  256. public ulong SetState(IntPtr src)
  257. {
  258. unsafe
  259. {
  260. return NativeApi.llama_set_state_data(this, (byte*)src.ToPointer());
  261. }
  262. }
  263. #endregion
  264. /// <summary>
  265. /// Set the RNG seed
  266. /// </summary>
  267. /// <param name="seed"></param>
  268. public void SetSeed(uint seed)
  269. {
  270. NativeApi.llama_set_rng_seed(this, seed);
  271. }
  272. }
  273. }