You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaBatch.cs 4.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. using System;
  2. namespace LLama.Native;
  3. /// <summary>
  4. /// A batch allows submitting multiple tokens to multiple sequences simultaneously
  5. /// </summary>
  6. public class LLamaBatch
  7. {
  8. private readonly byte[] _logits;
  9. private readonly LLamaToken[] _tokens;
  10. private readonly LLamaPos[] _positions;
  11. private readonly int[] _sequenceIdCount;
  12. private readonly LLamaSeqId[][] _sequenceIds;
  13. private readonly IntPtr[] _sequenceIdsPtrs;
  14. /// <summary>
  15. /// The number of tokens in this batch
  16. /// </summary>
  17. public int TokenCount { get; private set; }
  18. /// <summary>
  19. /// Create a new batch for submitting inputs to llama.cpp
  20. /// </summary>
  21. /// <param name="n_tokens"></param>
  22. /// <param name="n_seq_max"></param>
  23. public LLamaBatch(int n_tokens, int n_seq_max)
  24. {
  25. _logits = new byte[n_tokens];
  26. _tokens = new LLamaToken[n_tokens];
  27. _positions = new LLamaPos[n_tokens];
  28. _sequenceIdCount = new int[n_tokens];
  29. _sequenceIdsPtrs = new IntPtr[_sequenceIdCount.Length];
  30. _sequenceIds = new LLamaSeqId[n_tokens][];
  31. for (var i = 0; i < _sequenceIds.Length; i++)
  32. _sequenceIds[i] = new LLamaSeqId[n_seq_max];
  33. }
  34. internal GroupDisposable ToNativeBatch(out LLamaNativeBatch batch)
  35. {
  36. // This group holds all of the memory pins
  37. var group = new GroupDisposable();
  38. unsafe
  39. {
  40. batch = new LLamaNativeBatch
  41. {
  42. n_tokens = TokenCount,
  43. logits = (byte*)group.Add(_logits.AsMemory().Pin()).Pointer,
  44. n_seq_id = (int*)group.Add(_sequenceIdCount.AsMemory().Pin()).Pointer,
  45. pos = (LLamaPos*)group.Add(_positions.AsMemory().Pin()).Pointer,
  46. seq_id = (LLamaSeqId**)group.Add(_sequenceIdsPtrs.AsMemory().Pin()).Pointer,
  47. // embd is not currently supported, so this is always null!
  48. embd = null,
  49. // Note that if embd is **not null** then this will be null!
  50. tokens = (LLamaToken*)group.Add(_tokens.AsMemory().Pin()).Pointer,
  51. };
  52. // Create pointers to each of the arrays in turns
  53. for (var i = 0; i < _sequenceIdsPtrs.Length; i++)
  54. _sequenceIdsPtrs[i] = (IntPtr)group.Add(_sequenceIds[i].AsMemory().Pin()).Pointer;
  55. }
  56. return group;
  57. }
  58. /// <summary>
  59. /// Add a single token to the batch at the same position in several sequences
  60. /// </summary>
  61. /// <remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
  62. /// <param name="token">The token to add</param>
  63. /// <param name="pos">The position to add it att</param>
  64. /// <param name="sequences">The set of sequences to add this token to</param>
  65. /// <param name="logits"></param>
  66. public void LLamaBatchAdd(LLamaToken token, LLamaPos pos, ReadOnlySpan<LLamaSeqId> sequences, bool logits)
  67. {
  68. _tokens[TokenCount] = token;
  69. _positions[TokenCount] = pos;
  70. _sequenceIdCount[TokenCount] = sequences.Length;
  71. for (var i = 0; i < sequences.Length; i++)
  72. _sequenceIds[TokenCount][i] = sequences[i];
  73. _logits[TokenCount] = Convert.ToByte(logits);
  74. TokenCount++;
  75. }
  76. /// <summary>
  77. /// Add a single token to the batch at a certain position for a single sequences
  78. /// </summary>
  79. /// <remarks>https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2</remarks>
  80. /// <param name="token">The token to add</param>
  81. /// <param name="pos">The position to add it att</param>
  82. /// <param name="sequence">The sequence to add this token to</param>
  83. /// <param name="logits"></param>
  84. public void LLamaBatchAdd(LLamaToken token, LLamaPos pos, LLamaSeqId sequence, bool logits)
  85. {
  86. // Create a temporary span to contain 1 item without allocating
  87. Span<LLamaSeqId> sequences = stackalloc LLamaSeqId[1];
  88. sequences[0] = sequence;
  89. // Add it
  90. LLamaBatchAdd(token, pos, sequences, logits);
  91. }
  92. /// <summary>
  93. /// Set TokenCount to zero for this batch
  94. /// </summary>
  95. public void LLamaBatchClear()
  96. {
  97. TokenCount = 0;
  98. }
  99. }