using System;
namespace LLama.Native;
///
/// A batch allows submitting multiple tokens to multiple sequences simultaneously
///
public class LLamaBatch
{
private readonly byte[] _logits;
private readonly LLamaToken[] _tokens;
private readonly LLamaPos[] _positions;
private readonly int[] _sequenceIdCount;
private readonly LLamaSeqId[][] _sequenceIds;
private readonly IntPtr[] _sequenceIdsPtrs;
///
/// The number of tokens in this batch
///
public int TokenCount { get; private set; }
///
/// Create a new batch for submitting inputs to llama.cpp
///
///
///
public LLamaBatch(int n_tokens, int n_seq_max)
{
_logits = new byte[n_tokens];
_tokens = new LLamaToken[n_tokens];
_positions = new LLamaPos[n_tokens];
_sequenceIdCount = new int[n_tokens];
_sequenceIdsPtrs = new IntPtr[_sequenceIdCount.Length];
_sequenceIds = new LLamaSeqId[n_tokens][];
for (var i = 0; i < _sequenceIds.Length; i++)
_sequenceIds[i] = new LLamaSeqId[n_seq_max];
}
internal GroupDisposable ToNativeBatch(out LLamaNativeBatch batch)
{
// This group holds all of the memory pins
var group = new GroupDisposable();
unsafe
{
batch = new LLamaNativeBatch
{
n_tokens = TokenCount,
logits = (byte*)group.Add(_logits.AsMemory().Pin()).Pointer,
n_seq_id = (int*)group.Add(_sequenceIdCount.AsMemory().Pin()).Pointer,
pos = (LLamaPos*)group.Add(_positions.AsMemory().Pin()).Pointer,
seq_id = (LLamaSeqId**)group.Add(_sequenceIdsPtrs.AsMemory().Pin()).Pointer,
// embd is not currently supported, so this is always null!
embd = null,
// Note that if embd is **not null** then this will be null!
tokens = (LLamaToken*)group.Add(_tokens.AsMemory().Pin()).Pointer,
};
// Create pointers to each of the arrays in turns
for (var i = 0; i < _sequenceIdsPtrs.Length; i++)
_sequenceIdsPtrs[i] = (IntPtr)group.Add(_sequenceIds[i].AsMemory().Pin()).Pointer;
}
return group;
}
///
/// Add a single token to the batch at the same position in several sequences
///
/// https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2
/// The token to add
/// The position to add it att
/// The set of sequences to add this token to
///
public void LLamaBatchAdd(LLamaToken token, LLamaPos pos, ReadOnlySpan sequences, bool logits)
{
_tokens[TokenCount] = token;
_positions[TokenCount] = pos;
_sequenceIdCount[TokenCount] = sequences.Length;
for (var i = 0; i < sequences.Length; i++)
_sequenceIds[TokenCount][i] = sequences[i];
_logits[TokenCount] = Convert.ToByte(logits);
TokenCount++;
}
///
/// Add a single token to the batch at a certain position for a single sequences
///
/// https://github.com/ggerganov/llama.cpp/blob/ad939626577cd25b462e8026cc543efb71528472/common/common.cpp#L829C2-L829C2
/// The token to add
/// The position to add it att
/// The sequence to add this token to
///
public void LLamaBatchAdd(LLamaToken token, LLamaPos pos, LLamaSeqId sequence, bool logits)
{
// Create a temporary span to contain 1 item without allocating
Span sequences = stackalloc LLamaSeqId[1];
sequences[0] = sequence;
// Add it
LLamaBatchAdd(token, pos, sequences, logits);
}
///
/// Set TokenCount to zero for this batch
///
public void LLamaBatchClear()
{
TokenCount = 0;
}
}