Made special tokens included in prompts tokenize as intended (#677)

1 year ago · f01c13ee54
--- a/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
+++ b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
@@ -104,6 +104,6 @@ namespace LLamaSharp.KernelMemory
        }
        /// <inheritdoc/>
        public int CountTokens(string text) => _embedder.Context.Tokenize(text).Length;
        public int CountTokens(string text) => _embedder.Context.Tokenize(text, special: true).Length;
    }
 }
--- a/LLama.KernelMemory/LlamaSharpTextGenerator.cs
+++ b/LLama.KernelMemory/LlamaSharpTextGenerator.cs
@@ -1,13 +1,7 @@
 using LLama;
 using LLama.Abstractions;
 using LLama.Common;
 using LLama.Native;
 using Microsoft.KernelMemory.AI;
 using System;
 using System.Collections.Generic;
 using System.Linq;
 using System.Text;
 using System.Threading.Tasks;
 namespace LLamaSharp.KernelMemory
 {
@@ -111,6 +105,6 @@ namespace LLamaSharp.KernelMemory
        }
        /// <inheritdoc/>
        public int CountTokens(string text) => _context.Tokenize(text).Length;
        public int CountTokens(string text) => _context.Tokenize(text, special: true).Length;
    }
 }
--- a/LLama/LLamaInstructExecutor.cs
+++ b/LLama/LLamaInstructExecutor.cs
@@ -38,8 +38,8 @@ namespace LLama
                                ILogger? logger = null)
            : base(context, logger)
        {
            _inp_pfx = Context.Tokenize(instructionPrefix, true);
            _inp_sfx = Context.Tokenize(instructionSuffix, false);
            _inp_pfx = Context.Tokenize(instructionPrefix, true, true);
            _inp_sfx = Context.Tokenize(instructionSuffix, false, true);
            _instructionPrefix = instructionPrefix;
        }
@@ -124,7 +124,7 @@ namespace LLama
            if (_is_prompt_run)
            {
                // When running the first input (prompt) in inteactive mode, we should specially process it.
                _embed_inps = Context.Tokenize(text, true).ToList();
                _embed_inps = Context.Tokenize(text, true, true).ToList();
            }
            else
            {
@@ -135,7 +135,7 @@ namespace LLama
                _consumedTokensCount = _embed_inps.Count;
                _embed_inps.AddRange(_inp_pfx);
                var line_inp = Context.Tokenize(text, false);
                var line_inp = Context.Tokenize(text, false, true);
                _embed_inps.AddRange(line_inp);
                _embed_inps.AddRange(_inp_sfx);
--- a/LLama/LLamaInteractExecutor.cs
+++ b/LLama/LLamaInteractExecutor.cs
@@ -119,7 +119,7 @@ namespace LLama
                // When running the first input (prompt) in interactive mode, we should specially process it.
                if (!this.IsMultiModal)
                {
                    _embed_inps = Context.Tokenize(text, true).ToList();
                    _embed_inps = Context.Tokenize(text, true, true).ToList();
                }
                else
                {
@@ -135,7 +135,7 @@ namespace LLama
                if (!this.IsMultiModal)
                {
                    var line_inp = Context.Tokenize(text, false);
                    var line_inp = Context.Tokenize(text, false, true);
                    _embed_inps.AddRange(line_inp);
                    args.RemainedTokens -= line_inp.Length;
                }
@@ -165,11 +165,11 @@ namespace LLama
                int imageIndex = text.IndexOf("<image>");
                // Tokenize segment 1 (before <image> tag)
                string preImagePrompt = text.Substring(0, imageIndex);
                var segment1 = Context.Tokenize(preImagePrompt, addBos );
                var segment1 = Context.Tokenize(preImagePrompt, addBos, true);
                // Remember the position to add the image embeddings
                _EmbedImagePosition = segment1.Length;
                string postImagePrompt = text.Substring(imageIndex + 7);
                var segment2 = Context.Tokenize(postImagePrompt, false);
                var segment2 = Context.Tokenize(postImagePrompt, false, true);
                _embed_inps.AddRange(segment1);
                _embed_inps.AddRange(segment2);
                usedTokens += (segment1.Length + segment2.Length);
@@ -178,11 +178,11 @@ namespace LLama
            {
                if (addBos)
                {
                    _embed_inps = Context.Tokenize(text, true).ToList();
                    _embed_inps = Context.Tokenize(text, true, true).ToList();
                }
                else
                {
                    var line_inp = Context.Tokenize(text, false);
                    var line_inp = Context.Tokenize(text, false, true);
                    _embed_inps.AddRange(line_inp);
                    args.RemainedTokens -= line_inp.Length;                    
                }
--- a/LLama/LLamaStatelessExecutor.cs
+++ b/LLama/LLamaStatelessExecutor.cs
@@ -90,7 +90,7 @@ namespace LLama
                lastTokens.Add(0);
            // Tokenize the prompt
            var tokens = Context.Tokenize(prompt).ToList();
            var tokens = Context.Tokenize(prompt, special: true).ToList();
            lastTokens.AddRange(tokens);
            // Evaluate the prompt, in chunks smaller than the max batch size