| @@ -104,6 +104,6 @@ namespace LLamaSharp.KernelMemory | |||||
| } | } | ||||
| /// <inheritdoc/> | /// <inheritdoc/> | ||||
| public int CountTokens(string text) => _embedder.Context.Tokenize(text).Length; | |||||
| public int CountTokens(string text) => _embedder.Context.Tokenize(text, special: true).Length; | |||||
| } | } | ||||
| } | } | ||||
| @@ -1,13 +1,7 @@ | |||||
| using LLama; | using LLama; | ||||
| using LLama.Abstractions; | |||||
| using LLama.Common; | using LLama.Common; | ||||
| using LLama.Native; | using LLama.Native; | ||||
| using Microsoft.KernelMemory.AI; | using Microsoft.KernelMemory.AI; | ||||
| using System; | |||||
| using System.Collections.Generic; | |||||
| using System.Linq; | |||||
| using System.Text; | |||||
| using System.Threading.Tasks; | |||||
| namespace LLamaSharp.KernelMemory | namespace LLamaSharp.KernelMemory | ||||
| { | { | ||||
| @@ -111,6 +105,6 @@ namespace LLamaSharp.KernelMemory | |||||
| } | } | ||||
| /// <inheritdoc/> | /// <inheritdoc/> | ||||
| public int CountTokens(string text) => _context.Tokenize(text).Length; | |||||
| public int CountTokens(string text) => _context.Tokenize(text, special: true).Length; | |||||
| } | } | ||||
| } | } | ||||
| @@ -38,8 +38,8 @@ namespace LLama | |||||
| ILogger? logger = null) | ILogger? logger = null) | ||||
| : base(context, logger) | : base(context, logger) | ||||
| { | { | ||||
| _inp_pfx = Context.Tokenize(instructionPrefix, true); | |||||
| _inp_sfx = Context.Tokenize(instructionSuffix, false); | |||||
| _inp_pfx = Context.Tokenize(instructionPrefix, true, true); | |||||
| _inp_sfx = Context.Tokenize(instructionSuffix, false, true); | |||||
| _instructionPrefix = instructionPrefix; | _instructionPrefix = instructionPrefix; | ||||
| } | } | ||||
| @@ -124,7 +124,7 @@ namespace LLama | |||||
| if (_is_prompt_run) | if (_is_prompt_run) | ||||
| { | { | ||||
| // When running the first input (prompt) in inteactive mode, we should specially process it. | // When running the first input (prompt) in inteactive mode, we should specially process it. | ||||
| _embed_inps = Context.Tokenize(text, true).ToList(); | |||||
| _embed_inps = Context.Tokenize(text, true, true).ToList(); | |||||
| } | } | ||||
| else | else | ||||
| { | { | ||||
| @@ -135,7 +135,7 @@ namespace LLama | |||||
| _consumedTokensCount = _embed_inps.Count; | _consumedTokensCount = _embed_inps.Count; | ||||
| _embed_inps.AddRange(_inp_pfx); | _embed_inps.AddRange(_inp_pfx); | ||||
| var line_inp = Context.Tokenize(text, false); | |||||
| var line_inp = Context.Tokenize(text, false, true); | |||||
| _embed_inps.AddRange(line_inp); | _embed_inps.AddRange(line_inp); | ||||
| _embed_inps.AddRange(_inp_sfx); | _embed_inps.AddRange(_inp_sfx); | ||||
| @@ -119,7 +119,7 @@ namespace LLama | |||||
| // When running the first input (prompt) in interactive mode, we should specially process it. | // When running the first input (prompt) in interactive mode, we should specially process it. | ||||
| if (!this.IsMultiModal) | if (!this.IsMultiModal) | ||||
| { | { | ||||
| _embed_inps = Context.Tokenize(text, true).ToList(); | |||||
| _embed_inps = Context.Tokenize(text, true, true).ToList(); | |||||
| } | } | ||||
| else | else | ||||
| { | { | ||||
| @@ -135,7 +135,7 @@ namespace LLama | |||||
| if (!this.IsMultiModal) | if (!this.IsMultiModal) | ||||
| { | { | ||||
| var line_inp = Context.Tokenize(text, false); | |||||
| var line_inp = Context.Tokenize(text, false, true); | |||||
| _embed_inps.AddRange(line_inp); | _embed_inps.AddRange(line_inp); | ||||
| args.RemainedTokens -= line_inp.Length; | args.RemainedTokens -= line_inp.Length; | ||||
| } | } | ||||
| @@ -165,11 +165,11 @@ namespace LLama | |||||
| int imageIndex = text.IndexOf("<image>"); | int imageIndex = text.IndexOf("<image>"); | ||||
| // Tokenize segment 1 (before <image> tag) | // Tokenize segment 1 (before <image> tag) | ||||
| string preImagePrompt = text.Substring(0, imageIndex); | string preImagePrompt = text.Substring(0, imageIndex); | ||||
| var segment1 = Context.Tokenize(preImagePrompt, addBos ); | |||||
| var segment1 = Context.Tokenize(preImagePrompt, addBos, true); | |||||
| // Remember the position to add the image embeddings | // Remember the position to add the image embeddings | ||||
| _EmbedImagePosition = segment1.Length; | _EmbedImagePosition = segment1.Length; | ||||
| string postImagePrompt = text.Substring(imageIndex + 7); | string postImagePrompt = text.Substring(imageIndex + 7); | ||||
| var segment2 = Context.Tokenize(postImagePrompt, false); | |||||
| var segment2 = Context.Tokenize(postImagePrompt, false, true); | |||||
| _embed_inps.AddRange(segment1); | _embed_inps.AddRange(segment1); | ||||
| _embed_inps.AddRange(segment2); | _embed_inps.AddRange(segment2); | ||||
| usedTokens += (segment1.Length + segment2.Length); | usedTokens += (segment1.Length + segment2.Length); | ||||
| @@ -178,11 +178,11 @@ namespace LLama | |||||
| { | { | ||||
| if (addBos) | if (addBos) | ||||
| { | { | ||||
| _embed_inps = Context.Tokenize(text, true).ToList(); | |||||
| _embed_inps = Context.Tokenize(text, true, true).ToList(); | |||||
| } | } | ||||
| else | else | ||||
| { | { | ||||
| var line_inp = Context.Tokenize(text, false); | |||||
| var line_inp = Context.Tokenize(text, false, true); | |||||
| _embed_inps.AddRange(line_inp); | _embed_inps.AddRange(line_inp); | ||||
| args.RemainedTokens -= line_inp.Length; | args.RemainedTokens -= line_inp.Length; | ||||
| } | } | ||||
| @@ -90,7 +90,7 @@ namespace LLama | |||||
| lastTokens.Add(0); | lastTokens.Add(0); | ||||
| // Tokenize the prompt | // Tokenize the prompt | ||||
| var tokens = Context.Tokenize(prompt).ToList(); | |||||
| var tokens = Context.Tokenize(prompt, special: true).ToList(); | |||||
| lastTokens.AddRange(tokens); | lastTokens.AddRange(tokens); | ||||
| // Evaluate the prompt, in chunks smaller than the max batch size | // Evaluate the prompt, in chunks smaller than the max batch size | ||||