You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaBatch.cs 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Runtime.InteropServices;
  4. namespace LLama.Native;
  5. /// <summary>
  6. /// A batch allows submitting multiple tokens to multiple sequences simultaneously
  7. /// </summary>
  8. public class LLamaBatch
  9. {
  10. private byte[] _logits;
  11. private LLamaToken[] _tokens;
  12. private LLamaPos[] _positions;
  13. private int[] _sequenceIdCount;
  14. private LLamaSeqId[][] _sequenceIds;
  15. private IntPtr[] _sequenceIdsPtrs;
  16. /// <summary>
  17. /// Keep track of the index of existing token/position combos in the batch
  18. /// </summary>
  19. private readonly Dictionary<(LLamaToken, LLamaPos), int> _index = new();
  20. /// <summary>
  21. /// Keep a list of where logits can be sampled from
  22. /// </summary>
  23. private readonly List<(LLamaSeqId, int)> _logitPositions = new();
  24. /// <summary>
  25. /// Get the number of logit positions that will be generated from this batch
  26. /// </summary>
  27. internal int LogitPositionCount => _logitPositions.Count;
  28. /// <summary>
  29. /// The number of tokens in this batch
  30. /// </summary>
  31. public int TokenCount { get; private set; }
  32. /// <summary>
  33. /// Maximum number of tokens that can be added to this batch (automatically grows if exceeded)
  34. /// </summary>
  35. private int TokenCapacity { get; set; }
  36. /// <summary>
  37. /// Maximum number of sequences a token can be assigned to (automatically grows if exceeded)
  38. /// </summary>
  39. public int SequenceCapacity { get; private set; }
  40. /// <summary>
  41. /// Create a new batch for submitting inputs to llama.cpp
  42. /// </summary>
  43. public LLamaBatch()
  44. {
  45. // These can both be grown later, start off with reasonable numbers.
  46. const int n_tokens = 128;
  47. const int n_seq_max = 1;
  48. SequenceCapacity = n_seq_max;
  49. TokenCapacity = n_tokens;
  50. _logits = new byte[n_tokens];
  51. _tokens = new LLamaToken[n_tokens];
  52. _positions = new LLamaPos[n_tokens];
  53. _sequenceIdCount = new int[n_tokens];
  54. _sequenceIdsPtrs = new IntPtr[_sequenceIdCount.Length];
  55. _sequenceIds = new LLamaSeqId[n_tokens][];
  56. for (var i = 0; i < _sequenceIds.Length; i++)
  57. _sequenceIds[i] = new LLamaSeqId[SequenceCapacity];
  58. }
  59. #region grow
  60. private void GrowTokenCapacity()
  61. {
  62. var n_tokens = TokenCount * 2;
  63. TokenCapacity = n_tokens;
  64. Array.Resize(ref _logits, n_tokens);
  65. Array.Resize(ref _tokens, n_tokens);
  66. Array.Resize(ref _positions, n_tokens);
  67. Array.Resize(ref _sequenceIdCount, n_tokens);
  68. Array.Resize(ref _sequenceIdsPtrs, n_tokens);
  69. Array.Resize(ref _sequenceIds, n_tokens);
  70. for (int i = 0; i < _sequenceIds.Length; i++)
  71. {
  72. // Growing the array filled elements with null, temporarily violating the nullability contract!
  73. // ReSharper disable once ConditionIsAlwaysTrueOrFalseAccordingToNullableAPIContract
  74. if (_sequenceIds[i] == null)
  75. _sequenceIds[i] = new LLamaSeqId[SequenceCapacity];
  76. }
  77. }
  78. private void GrowMaxSequences(int atLeast)
  79. {
  80. var n_seq = Math.Max(SequenceCapacity * 2, atLeast);
  81. SequenceCapacity = n_seq;
  82. for (var i = 0; i < _sequenceIds.Length; i++)
  83. Array.Resize(ref _sequenceIds[i], SequenceCapacity);
  84. }
  85. #endregion
  86. internal GroupDisposable ToNativeBatch(out LLamaNativeBatch batch)
  87. {
  88. // This group holds all of the memory pins
  89. var group = new GroupDisposable();
  90. unsafe
  91. {
  92. batch = new LLamaNativeBatch
  93. {
  94. n_tokens = TokenCount,
  95. logits = (byte*)group.Add(_logits.AsMemory().Pin()).Pointer,
  96. n_seq_id = (int*)group.Add(_sequenceIdCount.AsMemory().Pin()).Pointer,
  97. pos = (LLamaPos*)group.Add(_positions.AsMemory().Pin()).Pointer,
  98. seq_id = (LLamaSeqId**)group.Add(_sequenceIdsPtrs.AsMemory().Pin()).Pointer,
  99. // embd is not currently supported, so this is always null!
  100. embd = null,
  101. // Note that if embd is **not null** then this will be null!
  102. tokens = (LLamaToken*)group.Add(_tokens.AsMemory().Pin()).Pointer,
  103. };
  104. // Create pointers to each of the arrays in turns
  105. for (var i = 0; i < _sequenceIdsPtrs.Length; i++)
  106. _sequenceIdsPtrs[i] = (IntPtr)group.Add(_sequenceIds[i].AsMemory().Pin()).Pointer;
  107. }
  108. return group;
  109. }
  110. #region add
  111. /// <summary>
  112. /// Add a single token to the batch at the same position in several sequences
  113. /// </summary>
  114. /// <remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
  115. /// <param name="token">The token to add</param>
  116. /// <param name="pos">The position to add it att</param>
  117. /// <param name="sequences">The set of sequences to add this token to</param>
  118. /// <param name="logits"></param>
  119. /// <returns>The index that the token was added at. Use this for GetLogitsIth</returns>
  120. public int Add(LLamaToken token, LLamaPos pos, ReadOnlySpan<LLamaSeqId> sequences, bool logits)
  121. {
  122. // Try to find this (token, position) combo somewhere in the batch to re-use it by adding this
  123. // sequence ID to the list.
  124. // Do **not** do this if this token wants logits, to prevent logits being shared between sequences.
  125. if (!logits && _index.TryGetValue((token, pos), out var existingIndex))
  126. {
  127. if (_sequenceIdCount[existingIndex] + sequences.Length > SequenceCapacity)
  128. GrowMaxSequences(_sequenceIdCount[existingIndex] + sequences.Length);
  129. foreach (var sequence in sequences)
  130. {
  131. _sequenceIds[existingIndex][_sequenceIdCount[existingIndex]] = sequence;
  132. _sequenceIdCount[existingIndex]++;
  133. }
  134. return existingIndex;
  135. }
  136. // Couldn't find this token/position combo anywhere in the batch. Add a new item.
  137. // Grow capacity as necessary
  138. if (TokenCount == TokenCapacity)
  139. GrowTokenCapacity();
  140. if (sequences.Length > SequenceCapacity)
  141. GrowMaxSequences(sequences.Length);
  142. // Store the position in the index, so it can be found later.
  143. // We need to check that it's not already there in case we skipped the check above (because logits is true).
  144. if (!_index.ContainsKey((token, pos)))
  145. _index.Add((token, pos), TokenCount);
  146. // Add the items to the arrays
  147. _tokens[TokenCount] = token;
  148. _positions[TokenCount] = pos;
  149. _sequenceIdCount[TokenCount] = sequences.Length;
  150. for (var i = 0; i < sequences.Length; i++)
  151. _sequenceIds[TokenCount][i] = sequences[i];
  152. _logits[TokenCount] = Convert.ToByte(logits);
  153. // Store this position in the logits lookup if necessary
  154. if (logits)
  155. {
  156. foreach (var sequence in sequences)
  157. _logitPositions.Add((sequence, TokenCount));
  158. }
  159. return TokenCount++;
  160. }
  161. /// <summary>
  162. /// Add a single token to the batch at the same position in several sequences
  163. /// </summary>
  164. /// <remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
  165. /// <param name="token">The token to add</param>
  166. /// <param name="pos">The position to add it att</param>
  167. /// <param name="sequences">The set of sequences to add this token to</param>
  168. /// <param name="logits"></param>
  169. /// <returns>The index that the token was added at. Use this for GetLogitsIth</returns>
  170. public int Add(LLamaToken token, LLamaPos pos, List<LLamaSeqId> sequences, bool logits)
  171. {
  172. #if NET5_0_OR_GREATER
  173. var seqSpan = CollectionsMarshal.AsSpan(sequences);
  174. return Add(token, pos, seqSpan, logits);
  175. #else
  176. // on netstandard2.0 we can't use CollectionsMarshal to get directly at the internal memory of
  177. // the list. Instead rent an array and copy the data into it. This avoids an allocation, but can't
  178. // avoid the copying.
  179. var rented = System.Buffers.ArrayPool<LLamaSeqId>.Shared.Rent(sequences.Count);
  180. try
  181. {
  182. sequences.CopyTo(rented, 0);
  183. return Add(token, pos, rented.AsSpan(0, sequences.Count), logits);
  184. }
  185. finally
  186. {
  187. System.Buffers.ArrayPool<LLamaSeqId>.Shared.Return(rented);
  188. }
  189. #endif
  190. }
  191. /// <summary>
  192. /// Add a single token to the batch at a certain position for a single sequences
  193. /// </summary>
  194. /// <remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
  195. /// <param name="token">The token to add</param>
  196. /// <param name="pos">The position to add it att</param>
  197. /// <param name="sequence">The sequence to add this token to</param>
  198. /// <param name="logits"></param>
  199. /// <returns>The index that the token was added at. Use this for GetLogitsIth</returns>
  200. public int Add(LLamaToken token, LLamaPos pos, LLamaSeqId sequence, bool logits)
  201. {
  202. // Create a temporary span to contain 1 item without allocating
  203. Span<LLamaSeqId> sequences = stackalloc LLamaSeqId[1];
  204. sequences[0] = sequence;
  205. // Add it
  206. return Add(token, pos, sequences, logits);
  207. }
  208. /// <summary>
  209. /// Add a range of tokens to a single sequence, start at the given position.
  210. /// </summary>
  211. /// <param name="tokens">The tokens to add</param>
  212. /// <param name="start">The starting position to add tokens at</param>
  213. /// <param name="sequence">The sequence to add this token to</param>
  214. /// <param name="logitsLast">Whether the final token should generate logits</param>
  215. /// <returns>The index that the final token was added at. Use this for GetLogitsIth</returns>
  216. public int AddRange(ReadOnlySpan<LLamaToken> tokens, LLamaPos start, LLamaSeqId sequence, bool logitsLast)
  217. {
  218. var last = -1;
  219. for (var i = 0; i < tokens.Length; i++)
  220. {
  221. var logits = (i == tokens.Length - 1) & logitsLast;
  222. last = Add(tokens[i], start.Value + i, sequence, logits);
  223. }
  224. return last;
  225. }
  226. #endregion
  227. /// <summary>
  228. /// Set TokenCount to zero for this batch
  229. /// </summary>
  230. public void Clear()
  231. {
  232. TokenCount = 0;
  233. _index.Clear();
  234. _logitPositions.Clear();
  235. }
  236. /// <summary>
  237. /// Get the positions where logits can be sampled from
  238. /// </summary>
  239. /// <returns></returns>
  240. internal Span<(LLamaSeqId, int)> GetLogitPositions(Span<(LLamaSeqId, int)> dest)
  241. {
  242. for (var i = 0; i < _logitPositions.Count; i++)
  243. dest[i] = _logitPositions[i];
  244. return dest.Slice(0, _logitPositions.Count);
  245. }
  246. }