You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaBatch.cs 8.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Runtime.InteropServices;
  4. namespace LLama.Native;
  5. /// <summary>
  6. /// A batch allows submitting multiple tokens to multiple sequences simultaneously
  7. /// </summary>
  8. public class LLamaBatch
  9. {
  10. private byte[] _logits;
  11. private LLamaToken[] _tokens;
  12. private LLamaPos[] _positions;
  13. private int[] _sequenceIdCount;
  14. private LLamaSeqId[][] _sequenceIds;
  15. private IntPtr[] _sequenceIdsPtrs;
  16. /// <summary>
  17. /// The number of tokens in this batch
  18. /// </summary>
  19. public int TokenCount { get; private set; }
  20. /// <summary>
  21. /// Maximum number of tokens that can be added to this batch (automatically grows if exceeded)
  22. /// </summary>
  23. private int TokenCapacity { get; set; }
  24. /// <summary>
  25. /// Maximum number of sequences a token can be assigned to (automatically grows if exceeded)
  26. /// </summary>
  27. public int SequenceCapacity { get; private set; }
  28. /// <summary>
  29. /// Create a new batch for submitting inputs to llama.cpp
  30. /// </summary>
  31. public LLamaBatch()
  32. {
  33. // These can both be grown later, start off with reasonable numbers.
  34. const int n_tokens = 128;
  35. const int n_seq_max = 1;
  36. SequenceCapacity = n_seq_max;
  37. TokenCapacity = n_tokens;
  38. _logits = new byte[n_tokens];
  39. _tokens = new LLamaToken[n_tokens];
  40. _positions = new LLamaPos[n_tokens];
  41. _sequenceIdCount = new int[n_tokens];
  42. _sequenceIdsPtrs = new IntPtr[_sequenceIdCount.Length];
  43. _sequenceIds = new LLamaSeqId[n_tokens][];
  44. for (var i = 0; i < _sequenceIds.Length; i++)
  45. _sequenceIds[i] = new LLamaSeqId[SequenceCapacity];
  46. }
  47. #region grow
  48. private void GrowTokenCapacity()
  49. {
  50. var n_tokens = TokenCount * 2;
  51. TokenCapacity = n_tokens;
  52. Array.Resize(ref _logits, n_tokens);
  53. Array.Resize(ref _tokens, n_tokens);
  54. Array.Resize(ref _positions, n_tokens);
  55. Array.Resize(ref _sequenceIdCount, n_tokens);
  56. Array.Resize(ref _sequenceIdsPtrs, n_tokens);
  57. Array.Resize(ref _sequenceIds, n_tokens);
  58. for (int i = 0; i < _sequenceIds.Length; i++)
  59. {
  60. // Growing the array filled elements with null, temporarily violating the nullability contract!
  61. // ReSharper disable once ConditionIsAlwaysTrueOrFalseAccordingToNullableAPIContract
  62. if (_sequenceIds[i] == null)
  63. _sequenceIds[i] = new LLamaSeqId[SequenceCapacity];
  64. }
  65. }
  66. private void GrowMaxSequences(int atLeast)
  67. {
  68. var n_seq = Math.Max(SequenceCapacity * 2, atLeast);
  69. SequenceCapacity = n_seq;
  70. for (var i = 0; i < _sequenceIds.Length; i++)
  71. Array.Resize(ref _sequenceIds[i], SequenceCapacity);
  72. }
  73. #endregion
  74. internal GroupDisposable ToNativeBatch(out LLamaNativeBatch batch)
  75. {
  76. // This group holds all of the memory pins
  77. var group = new GroupDisposable();
  78. unsafe
  79. {
  80. batch = new LLamaNativeBatch
  81. {
  82. n_tokens = TokenCount,
  83. logits = (byte*)group.Add(_logits.AsMemory().Pin()).Pointer,
  84. n_seq_id = (int*)group.Add(_sequenceIdCount.AsMemory().Pin()).Pointer,
  85. pos = (LLamaPos*)group.Add(_positions.AsMemory().Pin()).Pointer,
  86. seq_id = (LLamaSeqId**)group.Add(_sequenceIdsPtrs.AsMemory().Pin()).Pointer,
  87. // embd is not currently supported, so this is always null!
  88. embd = null,
  89. // Note that if embd is **not null** then this will be null!
  90. tokens = (LLamaToken*)group.Add(_tokens.AsMemory().Pin()).Pointer,
  91. };
  92. // Create pointers to each of the arrays in turns
  93. for (var i = 0; i < _sequenceIdsPtrs.Length; i++)
  94. _sequenceIdsPtrs[i] = (IntPtr)group.Add(_sequenceIds[i].AsMemory().Pin()).Pointer;
  95. }
  96. return group;
  97. }
  98. #region add
  99. /// <summary>
  100. /// Add a single token to the batch at the same position in several sequences
  101. /// </summary>
  102. /// <remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
  103. /// <param name="token">The token to add</param>
  104. /// <param name="pos">The position to add it att</param>
  105. /// <param name="sequences">The set of sequences to add this token to</param>
  106. /// <param name="logits"></param>
  107. public void Add(LLamaToken token, LLamaPos pos, ReadOnlySpan<LLamaSeqId> sequences, bool logits)
  108. {
  109. if (TokenCount == TokenCapacity)
  110. GrowTokenCapacity();
  111. if (sequences.Length > SequenceCapacity)
  112. GrowMaxSequences(sequences.Length);
  113. _tokens[TokenCount] = token;
  114. _positions[TokenCount] = pos;
  115. _sequenceIdCount[TokenCount] = sequences.Length;
  116. for (var i = 0; i < sequences.Length; i++)
  117. _sequenceIds[TokenCount][i] = sequences[i];
  118. _logits[TokenCount] = Convert.ToByte(logits);
  119. TokenCount++;
  120. }
  121. /// <summary>
  122. /// Add a single token to the batch at the same position in several sequences
  123. /// </summary>
  124. /// <remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
  125. /// <param name="token">The token to add</param>
  126. /// <param name="pos">The position to add it att</param>
  127. /// <param name="sequences">The set of sequences to add this token to</param>
  128. /// <param name="logits"></param>
  129. public void Add(LLamaToken token, LLamaPos pos, List<LLamaSeqId> sequences, bool logits)
  130. {
  131. #if NET5_0_OR_GREATER
  132. var seqSpan = CollectionsMarshal.AsSpan(sequences);
  133. Add(token, pos, seqSpan, logits);
  134. #else
  135. // on netstandard2.0 we can't use CollectionsMarshal to get directly at the internal memory of
  136. // the list. Instead rent an array and copy the data into it. This avoids an allocation, but can't
  137. // avoid the copying.
  138. var rented = System.Buffers.ArrayPool<LLamaSeqId>.Shared.Rent(sequences.Count);
  139. try
  140. {
  141. sequences.CopyTo(rented, 0);
  142. Add(token, pos, rented.AsSpan(0, sequences.Count), logits);
  143. }
  144. finally
  145. {
  146. System.Buffers.ArrayPool<LLamaSeqId>.Shared.Return(rented);
  147. }
  148. #endif
  149. }
  150. /// <summary>
  151. /// Add a single token to the batch at a certain position for a single sequences
  152. /// </summary>
  153. /// <remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
  154. /// <param name="token">The token to add</param>
  155. /// <param name="pos">The position to add it att</param>
  156. /// <param name="sequence">The sequence to add this token to</param>
  157. /// <param name="logits"></param>
  158. public void Add(LLamaToken token, LLamaPos pos, LLamaSeqId sequence, bool logits)
  159. {
  160. // Create a temporary span to contain 1 item without allocating
  161. Span<LLamaSeqId> sequences = stackalloc LLamaSeqId[1];
  162. sequences[0] = sequence;
  163. // Add it
  164. Add(token, pos, sequences, logits);
  165. }
  166. /// <summary>
  167. /// Add a range of tokens to a single sequence, start at the given position.
  168. /// </summary>
  169. /// <param name="tokens">The tokens to add</param>
  170. /// <param name="start">The starting position to add tokens at</param>
  171. /// <param name="sequence">The sequence to add this token to</param>
  172. /// <param name="logitsLast">Whether the final token should generate logits</param>
  173. public void AddRange(ReadOnlySpan<LLamaToken> tokens, LLamaPos start, LLamaSeqId sequence, bool logitsLast)
  174. {
  175. for (var i = 0; i < tokens.Length; i++)
  176. {
  177. var logits = (i == tokens.Length - 1) & logitsLast;
  178. Add(tokens[i], start.Value + i, sequence, logits);
  179. }
  180. }
  181. #endregion
  182. /// <summary>
  183. /// Set TokenCount to zero for this batch
  184. /// </summary>
  185. public void Clear()
  186. {
  187. TokenCount = 0;
  188. }
  189. }