diff --git a/LLama/Native/SafeLLamaContextHandle.cs b/LLama/Native/SafeLLamaContextHandle.cs
index c9e959a0..51568769 100644
--- a/LLama/Native/SafeLLamaContextHandle.cs
+++ b/LLama/Native/SafeLLamaContextHandle.cs
@@ -368,6 +368,9 @@ namespace LLama.Native
///
public DecodeResult Decode(LLamaBatch batch)
{
+ if (batch.TokenCount == 0)
+ return DecodeResult.Ok;
+
lock (GlobalInferenceLock)
using (batch.ToNativeBatch(out var nb))
return (DecodeResult)llama_decode(this, nb);
@@ -383,6 +386,9 @@ namespace LLama.Native
/// A tuple, containing the decode result and the number of tokens that have not been decoded yet.
internal (DecodeResult, int) Decode(List tokens, LLamaSeqId id, LLamaBatch batch, ref int n_past)
{
+ if (tokens.Count == 0)
+ return (DecodeResult.Ok, 0);
+
var batchSize = checked((int)BatchSize);
// Evaluate the prompt, in chunks smaller than the max batch size