Initial approach to clear images

1 year ago · d6890e4ec4
--- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
+++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
@@ -3,6 +3,7 @@ using LLama.Batched;
 using LLama.Common;
 using Spectre.Console;
 using LLama.Abstractions;
 using LLama.Native;

 namespace LLama.Examples.Examples
 {
@@ -21,9 +22,6 @@ namespace LLama.Examples.Examples

            var parameters = new ModelParams(modelPath)
            {
                ContextSize = 4096,
                Seed = 1337,
                GpuLayerCount = 10
            };
            using var model = LLamaWeights.LoadFromFile(parameters);
            using var context = model.CreateContext(parameters);
@@ -69,6 +67,9 @@ namespace LLama.Examples.Examples
                        break;
                    }

                    // Each prompt with images we clear cache
                    // When the prompt contains images we clear KV_CACHE to restart conversation
                    ex.Context.NativeHandle.KvCacheRemove( LLamaSeqId.Zero, -1, -1 );

                    int index = 0;
                    foreach (var path in imagePathsWithCurlyBraces)
--- a/LLama/LLamaInteractExecutor.cs
+++ b/LLama/LLamaInteractExecutor.cs
@@ -11,7 +11,7 @@ using System.Threading.Tasks;
 using LLama.Exceptions;
 using LLama.Extensions;
 using Microsoft.Extensions.Logging;
 using System.Net.Http;


 namespace LLama
 {
@@ -136,20 +136,29 @@ namespace LLama
                    text += "\n";
                }

                var line_inp = Context.Tokenize(text, false);
                _embed_inps.AddRange(line_inp);
                args.RemainedTokens -= line_inp.Length;
                if (!this.IsMultiModal)
                {
                    var line_inp = Context.Tokenize(text, false);
                    _embed_inps.AddRange(line_inp);
                    args.RemainedTokens -= line_inp.Length;
                }
                else
                {
                    PreprocessLlava(text, args, false);
                }
            }

            return Task.CompletedTask;
        }

        /// <inheritdoc />
        private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = true )
        {
            int usedTokens = 0;
            
            // If the prompt contains the tag <image> extract this.
            _imageInPrompt = text.Contains("<image>");
            if (_imageInPrompt && ClipModel != null)
            if (_imageInPrompt && IsMultiModal )
            {
                foreach (var image in Images)
                {
@@ -170,7 +179,16 @@ namespace LLama
            }
            else
            {
                _embed_inps = Context.Tokenize(text, true).ToList();
                if (addBos)
                {
                    _embed_inps = Context.Tokenize(text, true).ToList();
                }
                else
                {
                    var line_inp = Context.Tokenize(text, false);
                    _embed_inps.AddRange(line_inp);
                    args.RemainedTokens -= line_inp.Length;                    
                }
            }
            return Task.CompletedTask;
        }
@@ -239,6 +257,7 @@ namespace LLama

                    _EmbedImagePosition = -1;
                    _imageEmbedHandles.Clear();
                    Images.Clear();
                }
                else
                {