diff --git a/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs index d8c366bc..b72f49a0 100644 --- a/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs +++ b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs @@ -104,6 +104,6 @@ namespace LLamaSharp.KernelMemory } /// - public int CountTokens(string text) => _embedder.Context.Tokenize(text).Length; + public int CountTokens(string text) => _embedder.Context.Tokenize(text, special: true).Length; } } diff --git a/LLama.KernelMemory/LlamaSharpTextGenerator.cs b/LLama.KernelMemory/LlamaSharpTextGenerator.cs index de6373ee..e3d18b3c 100644 --- a/LLama.KernelMemory/LlamaSharpTextGenerator.cs +++ b/LLama.KernelMemory/LlamaSharpTextGenerator.cs @@ -1,13 +1,7 @@ using LLama; -using LLama.Abstractions; using LLama.Common; using LLama.Native; using Microsoft.KernelMemory.AI; -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; namespace LLamaSharp.KernelMemory { @@ -111,6 +105,6 @@ namespace LLamaSharp.KernelMemory } /// - public int CountTokens(string text) => _context.Tokenize(text).Length; + public int CountTokens(string text) => _context.Tokenize(text, special: true).Length; } } diff --git a/LLama/LLamaInstructExecutor.cs b/LLama/LLamaInstructExecutor.cs index c3a9a420..917dc5eb 100644 --- a/LLama/LLamaInstructExecutor.cs +++ b/LLama/LLamaInstructExecutor.cs @@ -38,8 +38,8 @@ namespace LLama ILogger? logger = null) : base(context, logger) { - _inp_pfx = Context.Tokenize(instructionPrefix, true); - _inp_sfx = Context.Tokenize(instructionSuffix, false); + _inp_pfx = Context.Tokenize(instructionPrefix, true, true); + _inp_sfx = Context.Tokenize(instructionSuffix, false, true); _instructionPrefix = instructionPrefix; } @@ -124,7 +124,7 @@ namespace LLama if (_is_prompt_run) { // When running the first input (prompt) in inteactive mode, we should specially process it. - _embed_inps = Context.Tokenize(text, true).ToList(); + _embed_inps = Context.Tokenize(text, true, true).ToList(); } else { @@ -135,7 +135,7 @@ namespace LLama _consumedTokensCount = _embed_inps.Count; _embed_inps.AddRange(_inp_pfx); - var line_inp = Context.Tokenize(text, false); + var line_inp = Context.Tokenize(text, false, true); _embed_inps.AddRange(line_inp); _embed_inps.AddRange(_inp_sfx); diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs index 5acf4bd3..9aaa1ca2 100644 --- a/LLama/LLamaInteractExecutor.cs +++ b/LLama/LLamaInteractExecutor.cs @@ -119,7 +119,7 @@ namespace LLama // When running the first input (prompt) in interactive mode, we should specially process it. if (!this.IsMultiModal) { - _embed_inps = Context.Tokenize(text, true).ToList(); + _embed_inps = Context.Tokenize(text, true, true).ToList(); } else { @@ -135,7 +135,7 @@ namespace LLama if (!this.IsMultiModal) { - var line_inp = Context.Tokenize(text, false); + var line_inp = Context.Tokenize(text, false, true); _embed_inps.AddRange(line_inp); args.RemainedTokens -= line_inp.Length; } @@ -165,11 +165,11 @@ namespace LLama int imageIndex = text.IndexOf(""); // Tokenize segment 1 (before tag) string preImagePrompt = text.Substring(0, imageIndex); - var segment1 = Context.Tokenize(preImagePrompt, addBos ); + var segment1 = Context.Tokenize(preImagePrompt, addBos, true); // Remember the position to add the image embeddings _EmbedImagePosition = segment1.Length; string postImagePrompt = text.Substring(imageIndex + 7); - var segment2 = Context.Tokenize(postImagePrompt, false); + var segment2 = Context.Tokenize(postImagePrompt, false, true); _embed_inps.AddRange(segment1); _embed_inps.AddRange(segment2); usedTokens += (segment1.Length + segment2.Length); @@ -178,11 +178,11 @@ namespace LLama { if (addBos) { - _embed_inps = Context.Tokenize(text, true).ToList(); + _embed_inps = Context.Tokenize(text, true, true).ToList(); } else { - var line_inp = Context.Tokenize(text, false); + var line_inp = Context.Tokenize(text, false, true); _embed_inps.AddRange(line_inp); args.RemainedTokens -= line_inp.Length; } diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs index 487fe293..a3c52a02 100644 --- a/LLama/LLamaStatelessExecutor.cs +++ b/LLama/LLamaStatelessExecutor.cs @@ -90,7 +90,7 @@ namespace LLama lastTokens.Add(0); // Tokenize the prompt - var tokens = Context.Tokenize(prompt).ToList(); + var tokens = Context.Tokenize(prompt, special: true).ToList(); lastTokens.AddRange(tokens); // Evaluate the prompt, in chunks smaller than the max batch size