diff --git a/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
index d8c366bc..b72f49a0 100644
--- a/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
+++ b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
@@ -104,6 +104,6 @@ namespace LLamaSharp.KernelMemory
}
///
- public int CountTokens(string text) => _embedder.Context.Tokenize(text).Length;
+ public int CountTokens(string text) => _embedder.Context.Tokenize(text, special: true).Length;
}
}
diff --git a/LLama.KernelMemory/LlamaSharpTextGenerator.cs b/LLama.KernelMemory/LlamaSharpTextGenerator.cs
index de6373ee..e3d18b3c 100644
--- a/LLama.KernelMemory/LlamaSharpTextGenerator.cs
+++ b/LLama.KernelMemory/LlamaSharpTextGenerator.cs
@@ -1,13 +1,7 @@
using LLama;
-using LLama.Abstractions;
using LLama.Common;
using LLama.Native;
using Microsoft.KernelMemory.AI;
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
namespace LLamaSharp.KernelMemory
{
@@ -111,6 +105,6 @@ namespace LLamaSharp.KernelMemory
}
///
- public int CountTokens(string text) => _context.Tokenize(text).Length;
+ public int CountTokens(string text) => _context.Tokenize(text, special: true).Length;
}
}
diff --git a/LLama/LLamaInstructExecutor.cs b/LLama/LLamaInstructExecutor.cs
index c3a9a420..917dc5eb 100644
--- a/LLama/LLamaInstructExecutor.cs
+++ b/LLama/LLamaInstructExecutor.cs
@@ -38,8 +38,8 @@ namespace LLama
ILogger? logger = null)
: base(context, logger)
{
- _inp_pfx = Context.Tokenize(instructionPrefix, true);
- _inp_sfx = Context.Tokenize(instructionSuffix, false);
+ _inp_pfx = Context.Tokenize(instructionPrefix, true, true);
+ _inp_sfx = Context.Tokenize(instructionSuffix, false, true);
_instructionPrefix = instructionPrefix;
}
@@ -124,7 +124,7 @@ namespace LLama
if (_is_prompt_run)
{
// When running the first input (prompt) in inteactive mode, we should specially process it.
- _embed_inps = Context.Tokenize(text, true).ToList();
+ _embed_inps = Context.Tokenize(text, true, true).ToList();
}
else
{
@@ -135,7 +135,7 @@ namespace LLama
_consumedTokensCount = _embed_inps.Count;
_embed_inps.AddRange(_inp_pfx);
- var line_inp = Context.Tokenize(text, false);
+ var line_inp = Context.Tokenize(text, false, true);
_embed_inps.AddRange(line_inp);
_embed_inps.AddRange(_inp_sfx);
diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs
index 5acf4bd3..9aaa1ca2 100644
--- a/LLama/LLamaInteractExecutor.cs
+++ b/LLama/LLamaInteractExecutor.cs
@@ -119,7 +119,7 @@ namespace LLama
// When running the first input (prompt) in interactive mode, we should specially process it.
if (!this.IsMultiModal)
{
- _embed_inps = Context.Tokenize(text, true).ToList();
+ _embed_inps = Context.Tokenize(text, true, true).ToList();
}
else
{
@@ -135,7 +135,7 @@ namespace LLama
if (!this.IsMultiModal)
{
- var line_inp = Context.Tokenize(text, false);
+ var line_inp = Context.Tokenize(text, false, true);
_embed_inps.AddRange(line_inp);
args.RemainedTokens -= line_inp.Length;
}
@@ -165,11 +165,11 @@ namespace LLama
int imageIndex = text.IndexOf("");
// Tokenize segment 1 (before tag)
string preImagePrompt = text.Substring(0, imageIndex);
- var segment1 = Context.Tokenize(preImagePrompt, addBos );
+ var segment1 = Context.Tokenize(preImagePrompt, addBos, true);
// Remember the position to add the image embeddings
_EmbedImagePosition = segment1.Length;
string postImagePrompt = text.Substring(imageIndex + 7);
- var segment2 = Context.Tokenize(postImagePrompt, false);
+ var segment2 = Context.Tokenize(postImagePrompt, false, true);
_embed_inps.AddRange(segment1);
_embed_inps.AddRange(segment2);
usedTokens += (segment1.Length + segment2.Length);
@@ -178,11 +178,11 @@ namespace LLama
{
if (addBos)
{
- _embed_inps = Context.Tokenize(text, true).ToList();
+ _embed_inps = Context.Tokenize(text, true, true).ToList();
}
else
{
- var line_inp = Context.Tokenize(text, false);
+ var line_inp = Context.Tokenize(text, false, true);
_embed_inps.AddRange(line_inp);
args.RemainedTokens -= line_inp.Length;
}
diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs
index 487fe293..a3c52a02 100644
--- a/LLama/LLamaStatelessExecutor.cs
+++ b/LLama/LLamaStatelessExecutor.cs
@@ -90,7 +90,7 @@ namespace LLama
lastTokens.Add(0);
// Tokenize the prompt
- var tokens = Context.Tokenize(prompt).ToList();
+ var tokens = Context.Tokenize(prompt, special: true).ToList();
lastTokens.AddRange(tokens);
// Evaluate the prompt, in chunks smaller than the max batch size