diff --git a/LLama/Native/SafeLLamaContextHandle.cs b/LLama/Native/SafeLLamaContextHandle.cs index c9e959a0..51568769 100644 --- a/LLama/Native/SafeLLamaContextHandle.cs +++ b/LLama/Native/SafeLLamaContextHandle.cs @@ -368,6 +368,9 @@ namespace LLama.Native /// public DecodeResult Decode(LLamaBatch batch) { + if (batch.TokenCount == 0) + return DecodeResult.Ok; + lock (GlobalInferenceLock) using (batch.ToNativeBatch(out var nb)) return (DecodeResult)llama_decode(this, nb); @@ -383,6 +386,9 @@ namespace LLama.Native /// A tuple, containing the decode result and the number of tokens that have not been decoded yet. internal (DecodeResult, int) Decode(List tokens, LLamaSeqId id, LLamaBatch batch, ref int n_past) { + if (tokens.Count == 0) + return (DecodeResult.Ok, 0); + var batchSize = checked((int)BatchSize); // Evaluate the prompt, in chunks smaller than the max batch size