You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

SafeLLamaContextHandle.cs 7.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. using System;
  2. using System.Buffers;
  3. using System.Text;
  4. using LLama.Exceptions;
  5. namespace LLama.Native
  6. {
  7. /// <summary>
  8. /// A safe wrapper around a llama_context
  9. /// </summary>
  10. public class SafeLLamaContextHandle
  11. : SafeLLamaHandleBase
  12. {
  13. #region properties and fields
  14. /// <summary>
  15. /// Total number of tokens in vocabulary of this model
  16. /// </summary>
  17. public int VocabCount => ThrowIfDisposed().VocabCount;
  18. /// <summary>
  19. /// Total number of tokens in the context
  20. /// </summary>
  21. public int ContextSize => ThrowIfDisposed().ContextSize;
  22. /// <summary>
  23. /// Dimension of embedding vectors
  24. /// </summary>
  25. public int EmbeddingCount => ThrowIfDisposed().EmbeddingCount;
  26. /// <summary>
  27. /// This field guarantees that a reference to the model is held for as long as this handle is held
  28. /// </summary>
  29. private SafeLlamaModelHandle? _model;
  30. #endregion
  31. #region construction/destruction
  32. /// <summary>
  33. /// Create a new SafeLLamaContextHandle
  34. /// </summary>
  35. /// <param name="handle">pointer to an allocated llama_context</param>
  36. /// <param name="model">the model which this context was created from</param>
  37. public SafeLLamaContextHandle(IntPtr handle, SafeLlamaModelHandle model)
  38. : base(handle)
  39. {
  40. // Increment the model reference count while this context exists
  41. _model = model;
  42. var success = false;
  43. _model.DangerousAddRef(ref success);
  44. if (!success)
  45. throw new RuntimeError("Failed to increment model refcount");
  46. }
  47. /// <inheritdoc />
  48. protected override bool ReleaseHandle()
  49. {
  50. // Decrement refcount on model
  51. _model?.DangerousRelease();
  52. _model = null;
  53. NativeApi.llama_free(handle);
  54. SetHandle(IntPtr.Zero);
  55. return true;
  56. }
  57. private SafeLlamaModelHandle ThrowIfDisposed()
  58. {
  59. if (IsClosed)
  60. throw new ObjectDisposedException("Cannot use this `SafeLLamaContextHandle` - it has been disposed");
  61. if (_model == null || _model.IsClosed)
  62. throw new ObjectDisposedException("Cannot use this `SafeLLamaContextHandle` - `SafeLlamaModelHandle` has been disposed");
  63. return _model;
  64. }
  65. /// <summary>
  66. /// Create a new llama_state for the given model
  67. /// </summary>
  68. /// <param name="model"></param>
  69. /// <param name="lparams"></param>
  70. /// <returns></returns>
  71. /// <exception cref="RuntimeError"></exception>
  72. public static SafeLLamaContextHandle Create(SafeLlamaModelHandle model, LLamaContextParams lparams)
  73. {
  74. var ctx_ptr = NativeApi.llama_new_context_with_model(model, lparams);
  75. if (ctx_ptr == IntPtr.Zero)
  76. throw new RuntimeError("Failed to create context from model");
  77. return new(ctx_ptr, model);
  78. }
  79. #endregion
  80. /// <summary>
  81. /// Convert the given text into tokens
  82. /// </summary>
  83. /// <param name="text">The text to tokenize</param>
  84. /// <param name="add_bos">Whether the "BOS" token should be added</param>
  85. /// <param name="encoding">Encoding to use for the text</param>
  86. /// <returns></returns>
  87. /// <exception cref="RuntimeError"></exception>
  88. public int[] Tokenize(string text, bool add_bos, Encoding encoding)
  89. {
  90. ThrowIfDisposed();
  91. // Calculate number of bytes in string, this is a pessimistic estimate of token count. It can't
  92. // possibly be more than this.
  93. var count = encoding.GetByteCount(text) + (add_bos ? 1 : 0);
  94. // "Rent" an array to write results into (avoiding an allocation of a large array)
  95. var temporaryArray = ArrayPool<int>.Shared.Rent(count);
  96. try
  97. {
  98. // Do the actual conversion
  99. var n = NativeApi.llama_tokenize(this, text, encoding, temporaryArray, count, add_bos);
  100. if (n < 0)
  101. {
  102. throw new RuntimeError("Error happened during tokenization. It's possibly caused by wrong encoding. Please try to " +
  103. "specify the encoding.");
  104. }
  105. // Copy the results from the rented into an array which is exactly the right size
  106. var result = new int[n];
  107. Array.ConstrainedCopy(temporaryArray, 0, result, 0, n);
  108. return result;
  109. }
  110. finally
  111. {
  112. ArrayPool<int>.Shared.Return(temporaryArray);
  113. }
  114. }
  115. /// <summary>
  116. /// Token logits obtained from the last call to llama_eval()
  117. /// The logits for the last token are stored in the last row
  118. /// Can be mutated in order to change the probabilities of the next token.<br />
  119. /// Rows: n_tokens<br />
  120. /// Cols: n_vocab
  121. /// </summary>
  122. /// <param name="ctx"></param>
  123. /// <returns></returns>
  124. public Span<float> GetLogits()
  125. {
  126. var model = ThrowIfDisposed();
  127. unsafe
  128. {
  129. var logits = NativeApi.llama_get_logits(this);
  130. return new Span<float>(logits, model.VocabCount);
  131. }
  132. }
  133. /// <summary>
  134. /// Convert a token into a string
  135. /// </summary>
  136. /// <param name="token"></param>
  137. /// <param name="encoding"></param>
  138. /// <returns></returns>
  139. public string TokenToString(int token, Encoding encoding)
  140. {
  141. return ThrowIfDisposed().TokenToString(token, encoding);
  142. }
  143. /// <summary>
  144. /// Convert a token into a span of bytes that could be decoded into a string
  145. /// </summary>
  146. /// <param name="token"></param>
  147. /// <returns></returns>
  148. public ReadOnlySpan<byte> TokenToSpan(int token)
  149. {
  150. return ThrowIfDisposed().TokenToSpan(token);
  151. }
  152. /// <summary>
  153. /// Run the llama inference to obtain the logits and probabilities for the next token.
  154. /// </summary>
  155. /// <param name="tokens">The provided batch of new tokens to process</param>
  156. /// <param name="n_past">the number of tokens to use from previous eval calls</param>
  157. /// <param name="n_threads"></param>
  158. /// <returns>Returns true on success</returns>
  159. public bool Eval(Memory<int> tokens, int n_past, int n_threads)
  160. {
  161. using var pin = tokens.Pin();
  162. unsafe
  163. {
  164. return NativeApi.llama_eval_with_pointer(this, (int*)pin.Pointer, tokens.Length, n_past, n_threads) == 0;
  165. }
  166. }
  167. }
  168. }