using System; namespace LLama.Native; /// /// A batch allows submitting multiple tokens to multiple sequences simultaneously /// public class LLamaBatch { private readonly byte[] _logits; private readonly LLamaToken[] _tokens; private readonly LLamaPos[] _positions; private readonly int[] _sequenceIdCount; private readonly LLamaSeqId[][] _sequenceIds; private readonly IntPtr[] _sequenceIdsPtrs; /// /// The number of tokens in this batch /// public int TokenCount { get; private set; } /// /// Create a new batch for submitting inputs to llama.cpp /// /// /// public LLamaBatch(int n_tokens, int n_seq_max) { _logits = new byte[n_tokens]; _tokens = new LLamaToken[n_tokens]; _positions = new LLamaPos[n_tokens]; _sequenceIdCount = new int[n_tokens]; _sequenceIdsPtrs = new IntPtr[_sequenceIdCount.Length]; _sequenceIds = new LLamaSeqId[n_tokens][]; for (var i = 0; i < _sequenceIds.Length; i++) _sequenceIds[i] = new LLamaSeqId[n_seq_max]; } internal GroupDisposable ToNativeBatch(out LLamaNativeBatch batch) { // This group holds all of the memory pins var group = new GroupDisposable(); unsafe { batch = new LLamaNativeBatch { n_tokens = TokenCount, logits = (byte*)group.Add(_logits.AsMemory().Pin()).Pointer, n_seq_id = (int*)group.Add(_sequenceIdCount.AsMemory().Pin()).Pointer, pos = (LLamaPos*)group.Add(_positions.AsMemory().Pin()).Pointer, seq_id = (LLamaSeqId**)group.Add(_sequenceIdsPtrs.AsMemory().Pin()).Pointer, // embd is not currently supported, so this is always null! embd = null, // Note that if embd is **not null** then this will be null! tokens = (LLamaToken*)group.Add(_tokens.AsMemory().Pin()).Pointer, }; // Create pointers to each of the arrays in turns for (var i = 0; i < _sequenceIdsPtrs.Length; i++) _sequenceIdsPtrs[i] = (IntPtr)group.Add(_sequenceIds[i].AsMemory().Pin()).Pointer; } return group; } /// /// Add a single token to the batch at the same position in several sequences /// /// https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2 /// The token to add /// The position to add it att /// The set of sequences to add this token to /// public void LLamaBatchAdd(LLamaToken token, LLamaPos pos, ReadOnlySpan sequences, bool logits) { _tokens[TokenCount] = token; _positions[TokenCount] = pos; _sequenceIdCount[TokenCount] = sequences.Length; for (var i = 0; i < sequences.Length; i++) _sequenceIds[TokenCount][i] = sequences[i]; _logits[TokenCount] = Convert.ToByte(logits); TokenCount++; } /// /// Add a single token to the batch at a certain position for a single sequences /// /// https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2 /// The token to add /// The position to add it att /// The sequence to add this token to /// public void LLamaBatchAdd(LLamaToken token, LLamaPos pos, LLamaSeqId sequence, bool logits) { // Create a temporary span to contain 1 item without allocating Span sequences = stackalloc LLamaSeqId[1]; sequences[0] = sequence; // Add it LLamaBatchAdd(token, pos, sequences, logits); } /// /// Set TokenCount to zero for this batch /// public void LLamaBatchClear() { TokenCount = 0; } }