You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaBatch.cs 5.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. using System;
  2. namespace LLama.Native;
  3. /// <summary>
  4. /// A batch allows submitting multiple tokens to multiple sequences simultaneously
  5. /// </summary>
  6. public class LLamaBatch
  7. {
  8. private byte[] _logits;
  9. private LLamaToken[] _tokens;
  10. private LLamaPos[] _positions;
  11. private int[] _sequenceIdCount;
  12. private LLamaSeqId[][] _sequenceIds;
  13. private IntPtr[] _sequenceIdsPtrs;
  14. /// <summary>
  15. /// The number of tokens in this batch
  16. /// </summary>
  17. public int TokenCount { get; private set; }
  18. /// <summary>
  19. /// Maximum number of tokens that can be added to this batch
  20. /// </summary>
  21. private int TokenCapacity { get; set; }
  22. /// <summary>
  23. /// Maximum number of sequences a token can be assigned to
  24. /// </summary>
  25. public int MaxSequences { get; private set; }
  26. /// <summary>
  27. /// Create a new batch for submitting inputs to llama.cpp
  28. /// </summary>
  29. public LLamaBatch()
  30. {
  31. // These can both be grown later, start off with reasonable numbers.
  32. const int n_tokens = 128;
  33. const int n_seq_max = 4;
  34. MaxSequences = n_seq_max;
  35. TokenCapacity = n_tokens;
  36. _logits = new byte[n_tokens];
  37. _tokens = new LLamaToken[n_tokens];
  38. _positions = new LLamaPos[n_tokens];
  39. _sequenceIdCount = new int[n_tokens];
  40. _sequenceIdsPtrs = new IntPtr[_sequenceIdCount.Length];
  41. _sequenceIds = new LLamaSeqId[n_tokens][];
  42. for (var i = 0; i < _sequenceIds.Length; i++)
  43. _sequenceIds[i] = new LLamaSeqId[MaxSequences];
  44. }
  45. private void GrowTokenCapacity()
  46. {
  47. var n_tokens = TokenCount * 2;
  48. TokenCapacity = n_tokens;
  49. Array.Resize(ref _logits, n_tokens);
  50. Array.Resize(ref _tokens, n_tokens);
  51. Array.Resize(ref _positions, n_tokens);
  52. Array.Resize(ref _sequenceIdCount, n_tokens);
  53. Array.Resize(ref _sequenceIdsPtrs, n_tokens);
  54. Array.Resize(ref _sequenceIds, n_tokens);
  55. for (int i = 0; i < _sequenceIds.Length; i++)
  56. {
  57. // Growing the array filled elements with null, temporarily violating the nullability contract!
  58. // ReSharper disable once ConditionIsAlwaysTrueOrFalseAccordingToNullableAPIContract
  59. if (_sequenceIds[i] == null)
  60. _sequenceIds[i] = new LLamaSeqId[MaxSequences];
  61. }
  62. }
  63. private void GrowMaxSequences()
  64. {
  65. var n_seq = MaxSequences * 2;
  66. MaxSequences = n_seq;
  67. for (var i = 0; i < _sequenceIds.Length; i++)
  68. Array.Resize(ref _sequenceIds[i], MaxSequences);
  69. }
  70. internal GroupDisposable ToNativeBatch(out LLamaNativeBatch batch)
  71. {
  72. // This group holds all of the memory pins
  73. var group = new GroupDisposable();
  74. unsafe
  75. {
  76. batch = new LLamaNativeBatch
  77. {
  78. n_tokens = TokenCount,
  79. logits = (byte*)group.Add(_logits.AsMemory().Pin()).Pointer,
  80. n_seq_id = (int*)group.Add(_sequenceIdCount.AsMemory().Pin()).Pointer,
  81. pos = (LLamaPos*)group.Add(_positions.AsMemory().Pin()).Pointer,
  82. seq_id = (LLamaSeqId**)group.Add(_sequenceIdsPtrs.AsMemory().Pin()).Pointer,
  83. // embd is not currently supported, so this is always null!
  84. embd = null,
  85. // Note that if embd is **not null** then this will be null!
  86. tokens = (LLamaToken*)group.Add(_tokens.AsMemory().Pin()).Pointer,
  87. };
  88. // Create pointers to each of the arrays in turns
  89. for (var i = 0; i < _sequenceIdsPtrs.Length; i++)
  90. _sequenceIdsPtrs[i] = (IntPtr)group.Add(_sequenceIds[i].AsMemory().Pin()).Pointer;
  91. }
  92. return group;
  93. }
  94. /// <summary>
  95. /// Add a single token to the batch at the same position in several sequences
  96. /// </summary>
  97. /// <remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
  98. /// <param name="token">The token to add</param>
  99. /// <param name="pos">The position to add it att</param>
  100. /// <param name="sequences">The set of sequences to add this token to</param>
  101. /// <param name="logits"></param>
  102. public void Add(LLamaToken token, LLamaPos pos, ReadOnlySpan<LLamaSeqId> sequences, bool logits)
  103. {
  104. if (TokenCount == TokenCapacity)
  105. GrowTokenCapacity();
  106. if (sequences.Length > MaxSequences)
  107. GrowMaxSequences();
  108. _tokens[TokenCount] = token;
  109. _positions[TokenCount] = pos;
  110. _sequenceIdCount[TokenCount] = sequences.Length;
  111. for (var i = 0; i < sequences.Length; i++)
  112. _sequenceIds[TokenCount][i] = sequences[i];
  113. _logits[TokenCount] = Convert.ToByte(logits);
  114. TokenCount++;
  115. }
  116. /// <summary>
  117. /// Add a single token to the batch at a certain position for a single sequences
  118. /// </summary>
  119. /// <remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
  120. /// <param name="token">The token to add</param>
  121. /// <param name="pos">The position to add it att</param>
  122. /// <param name="sequence">The sequence to add this token to</param>
  123. /// <param name="logits"></param>
  124. public void Add(LLamaToken token, LLamaPos pos, LLamaSeqId sequence, bool logits)
  125. {
  126. // Create a temporary span to contain 1 item without allocating
  127. Span<LLamaSeqId> sequences = stackalloc LLamaSeqId[1];
  128. sequences[0] = sequence;
  129. // Add it
  130. Add(token, pos, sequences, logits);
  131. }
  132. /// <summary>
  133. /// Set TokenCount to zero for this batch
  134. /// </summary>
  135. public void Clear()
  136. {
  137. TokenCount = 0;
  138. }
  139. }