|
|
|
@@ -1,5 +1,4 @@ |
|
|
|
using System; |
|
|
|
using System.Collections.Generic; |
|
|
|
|
|
|
|
namespace LLama.Native; |
|
|
|
|
|
|
|
@@ -35,11 +34,11 @@ public class LLamaBatch |
|
|
|
/// <summary> |
|
|
|
/// Create a new batch for submitting inputs to llama.cpp |
|
|
|
/// </summary> |
|
|
|
/// <param name="n_seq_max">Max number of sequences a token can be assigned to</param> |
|
|
|
public LLamaBatch(int n_seq_max) |
|
|
|
public LLamaBatch() |
|
|
|
{ |
|
|
|
// The number of tokens can be grown later, start off with a reasonable guess. |
|
|
|
const int n_tokens = 64; |
|
|
|
// These can both be grown later, start off with reasonable numbers. |
|
|
|
const int n_tokens = 128; |
|
|
|
const int n_seq_max = 4; |
|
|
|
|
|
|
|
MaxSequences = n_seq_max; |
|
|
|
TokenCapacity = n_tokens; |
|
|
|
@@ -56,7 +55,7 @@ public class LLamaBatch |
|
|
|
_sequenceIds[i] = new LLamaSeqId[MaxSequences]; |
|
|
|
} |
|
|
|
|
|
|
|
private void Grow() |
|
|
|
private void GrowTokenCapacity() |
|
|
|
{ |
|
|
|
var n_tokens = TokenCount * 2; |
|
|
|
TokenCapacity = n_tokens; |
|
|
|
@@ -78,6 +77,15 @@ public class LLamaBatch |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
private void GrowMaxSequences() |
|
|
|
{ |
|
|
|
var n_seq = MaxSequences * 2; |
|
|
|
MaxSequences = n_seq; |
|
|
|
|
|
|
|
for (var i = 0; i < _sequenceIds.Length; i++) |
|
|
|
Array.Resize(ref _sequenceIds[i], MaxSequences); |
|
|
|
} |
|
|
|
|
|
|
|
internal GroupDisposable ToNativeBatch(out LLamaNativeBatch batch) |
|
|
|
{ |
|
|
|
// This group holds all of the memory pins |
|
|
|
@@ -120,7 +128,9 @@ public class LLamaBatch |
|
|
|
public void Add(LLamaToken token, LLamaPos pos, ReadOnlySpan<LLamaSeqId> sequences, bool logits) |
|
|
|
{ |
|
|
|
if (TokenCount == TokenCapacity) |
|
|
|
Grow(); |
|
|
|
GrowTokenCapacity(); |
|
|
|
if (sequences.Length > MaxSequences) |
|
|
|
GrowMaxSequences(); |
|
|
|
|
|
|
|
_tokens[TokenCount] = token; |
|
|
|
_positions[TokenCount] = pos; |
|
|
|
|