Merge pull request #502 from vikramvee/Examples

Updated Examples
1 year ago · ea12ff4e07
--- a/docs/Examples/BatchDecoding.md
+++ b/docs/Examples/BatchDecoding.md
@@ -0,0 +1,170 @@
 # Batch decoding
 ```cs
 using System.Diagnostics;
 using System.Text;
 using LLama.Common;
 using LLama.Native;
 using LLama.Sampling;
 public class BatchedDecoding
 {
    private const int n_parallel = 8;
    private const int n_len = 32;
    public static async Task Run()
    {
        Console.Write("Please input your model path: ");
        var modelPath = Console.ReadLine();
        Console.WriteLine("Prompt (leave blank to select automatically):");
        var prompt = Console.ReadLine();
        if (string.IsNullOrWhiteSpace(prompt))
            prompt = "Not many people know that";
        // Load model
        var parameters = new ModelParams(modelPath);
        using var model = LLamaWeights.LoadFromFile(parameters);
        // Tokenize prompt
        var prompt_tokens = model.Tokenize(prompt, true, false, Encoding.UTF8);
        var n_kv_req = prompt_tokens.Length + (n_len - prompt_tokens.Length) * n_parallel;
        // Create a context
        parameters.ContextSize = (uint)model.ContextSize;
        parameters.BatchSize = (uint)Math.Max(n_len, n_parallel);
        using var context = model.CreateContext(parameters);
        var n_ctx = context.ContextSize;
        // make sure the KV cache is big enough to hold all the prompt and generated tokens
        if (n_kv_req > n_ctx)
        {
            await Console.Error.WriteLineAsync($"error: n_kv_req ({n_kv_req}) > n_ctx, the required KV cache size is not big enough\n");
            await Console.Error.WriteLineAsync("        either reduce n_parallel or increase n_ctx\n");
            return;
        }
        var batch = new LLamaBatch();
        // evaluate the initial prompt
        batch.AddRange(prompt_tokens, 0, LLamaSeqId.Zero, true);
        if (await context.DecodeAsync(batch) != DecodeResult.Ok)
        {
            await Console.Error.WriteLineAsync("llama_decode failed");
            return;
        }
        // assign the system KV cache to all parallel sequences
        // this way, the parallel sequences will "reuse" the prompt tokens without having to copy them
        for (var i = 1; i < n_parallel; ++i)
        {
            context.NativeHandle.KvCacheSequenceCopy((LLamaSeqId)0, (LLamaSeqId)i, 0, batch.TokenCount);
        }
        if (n_parallel > 1)
        {
            Console.WriteLine();
            Console.WriteLine($"generating {n_parallel} sequences...");
        }
        // remember the batch index of the last token for each parallel sequence
        // we need this to determine which logits to sample from
        List<int> i_batch = new();
        for (var i = 0; i < n_parallel; i++)
            i_batch.Add(batch.TokenCount - 1);
        // Create per-stream decoder and sampler
        var decoders = new StreamingTokenDecoder[n_parallel];
        var samplers = new ISamplingPipeline[n_parallel];
        for (var i = 0; i < n_parallel; i++)
        {
            decoders[i] = new StreamingTokenDecoder(context);
            samplers[i] = new DefaultSamplingPipeline
            {
                Temperature = 0.1f + (float)i / n_parallel,
                MinP = 0.25f,
            };
        }
        var n_cur = batch.TokenCount;
        var n_decode = 0;
        var timer = new Stopwatch();
        timer.Start();
        while (n_cur <= n_len)
        {
            batch.Clear();
            for (var i = 0; i < n_parallel; i++)
            {
                // Skip completed streams
                if (i_batch[i] < 0)
                    continue;
                // Use the sampling pipeline to select a token
                var new_token_id = samplers[i].Sample(
                    context.NativeHandle,
                    context.NativeHandle.GetLogitsIth(i_batch[i]),
                    Array.Empty<LLamaToken>()
                );
                // Finish this stream early if necessary
                if (new_token_id == model.EndOfSentenceToken || new_token_id == model.NewlineToken)
                {
                    i_batch[i] = -1;
                    Console.WriteLine($"Completed Stream {i} early");
                    continue;
                }
                // Add this token to the decoder, so it will be turned into text
                decoders[i].Add(new_token_id);
                i_batch[i] = batch.TokenCount;
                // push this new token for next evaluation
                batch.Add(new_token_id, n_cur, (LLamaSeqId)i, true);
                n_decode++;
            }
            // Check if all streams are finished
            if (batch.TokenCount == 0)
            {
                break;
            }
            n_cur++;
            // evaluate the current batch with the transformer model
            if (await context.DecodeAsync(batch) != 0)
            {
                await Console.Error.WriteLineAsync("failed to eval");
                return;
            }
        }
        timer.Stop();
        Console.ForegroundColor = ConsoleColor.Yellow;
        Console.WriteLine();
        Console.WriteLine($"Decoded {n_decode} tokens in {timer.ElapsedMilliseconds}ms");
        Console.WriteLine($"Rate: {n_decode / timer.Elapsed.TotalSeconds:##.000} tokens/second");
        var index = 0;
        foreach (var stream in decoders)
        {
            var text = stream.Read();
            Console.ForegroundColor = ConsoleColor.Green;
            Console.Write($"{index++}. {prompt}");
            Console.ForegroundColor = ConsoleColor.Red;
            Console.WriteLine(text);
        }
        Console.WriteLine("Press any key to exit demo");
        Console.ReadKey(true);
    }
 }
 ```
--- a/docs/Examples/ChatChineseGB2312.md
+++ b/docs/Examples/ChatChineseGB2312.md
@@ -0,0 +1,125 @@
 # Chat Chinese
 ```cs
 using System.Text;
 using LLama.Common;
 public class ChatChineseGB2312
 {
    private static string ConvertEncoding(string input, Encoding original, Encoding target)
    {
        byte[] bytes = original.GetBytes(input);
        var convertedBytes = Encoding.Convert(original, target, bytes);
        return target.GetString(convertedBytes);
    }
    public static async Task Run()
    {
        // Register provider for GB2312 encoding
        Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
        Console.ForegroundColor = ConsoleColor.Yellow;
        Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" +
            " to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers.");
        Console.ForegroundColor = ConsoleColor.White;
        Console.Write("Please input your model path: ");
        var modelPath = Console.ReadLine();
        var parameters = new ModelParams(modelPath)
        {
            ContextSize = 1024,
            Seed = 1337,
            GpuLayerCount = 5,
            Encoding = Encoding.UTF8
        };
        using var model = LLamaWeights.LoadFromFile(parameters);
        using var context = model.CreateContext(parameters);
        var executor = new InteractiveExecutor(context);
        ChatSession session;
        if (Directory.Exists("Assets/chat-with-kunkun-chinese"))
        {
            Console.ForegroundColor = ConsoleColor.Yellow;
            Console.WriteLine("Loading session from disk.");
            Console.ForegroundColor = ConsoleColor.White;
            session = new ChatSession(executor);
            session.LoadSession("Assets/chat-with-kunkun-chinese");
        }
        else
        {
            var chatHistoryJson = File.ReadAllText("Assets/chat-with-kunkun-chinese.json");
            ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory();
            session = new ChatSession(executor, chatHistory);
        }
        session
            .WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户", "坤坤"));
        InferenceParams inferenceParams = new InferenceParams()
        {
            Temperature = 0.9f,
            AntiPrompts = new List<string> { "用户：" }
        };
        Console.ForegroundColor = ConsoleColor.Yellow;
        Console.WriteLine("The chat session has started.");
        // show the prompt
        Console.ForegroundColor = ConsoleColor.White;
        Console.Write("用户：");
        Console.ForegroundColor = ConsoleColor.Green;
        string userInput = Console.ReadLine() ?? "";
        while (userInput != "exit")
        {
            // Convert the encoding from gb2312 to utf8 for the language model
            // and later saving to the history json file.
            userInput = ConvertEncoding(userInput, Encoding.GetEncoding("gb2312"), Encoding.UTF8);
            if (userInput == "save")
            {
                session.SaveSession("Assets/chat-with-kunkun-chinese");
                Console.ForegroundColor = ConsoleColor.Yellow;
                Console.WriteLine("Session saved.");
            }
            else if (userInput == "regenerate")
            {
                Console.ForegroundColor = ConsoleColor.Yellow;
                Console.WriteLine("Regenerating last response ...");
                await foreach (
                    var text
                    in session.RegenerateAssistantMessageAsync(
                        inferenceParams))
                {
                    Console.ForegroundColor = ConsoleColor.White;
                    // Convert the encoding from utf8 to gb2312 for the console output.
                    Console.Write(ConvertEncoding(text, Encoding.UTF8, Encoding.GetEncoding("gb2312")));
                }
            }
            else
            {
                await foreach (
                    var text
                    in session.ChatAsync(
                        new ChatHistory.Message(AuthorRole.User, userInput),
                        inferenceParams))
                {
                    Console.ForegroundColor = ConsoleColor.White;
                    Console.Write(text);
                }
            }
            Console.ForegroundColor = ConsoleColor.Green;
            userInput = Console.ReadLine() ?? "";
            Console.ForegroundColor = ConsoleColor.White;
        }
    }
 }
 ```
--- a/docs/Examples/ChatSessionStripRoleName.md
+++ b/docs/Examples/ChatSessionStripRoleName.md
@@ -13,24 +13,54 @@ public class ChatSessionStripRoleName
    public static void Run()
    {
        Console.Write("Please input your model path: ");
        string modelPath = Console.ReadLine();
        var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
        InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
        ChatSession session = new ChatSession(ex).WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { "User:", "Bob:" }, redundancyLength: 8));
        var modelPath = Console.ReadLine();
        var parameters = new ModelParams(modelPath)
        {
            ContextSize = 1024,
            Seed = 1337,
            GpuLayerCount = 5
        };
        using var model = LLamaWeights.LoadFromFile(parameters);
        using var context = model.CreateContext(parameters);
        var executor = new InteractiveExecutor(context);
        var chatHistoryJson = File.ReadAllText("Assets/chat-with-bob.json");
        ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory();
        ChatSession session = new(executor, chatHistory);
        session.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(
            new string[] { "User:", "Assistant:" },
            redundancyLength: 8));
        InferenceParams inferenceParams = new InferenceParams()
        {
            Temperature = 0.9f,
            AntiPrompts = new List<string> { "User:" }
        };
        Console.ForegroundColor = ConsoleColor.Yellow;
        Console.WriteLine("The chat session has started. The role names won't be printed.");
        Console.ForegroundColor = ConsoleColor.White;
        Console.WriteLine("The chat session has started.");
        while (true)
        // show the prompt
        Console.ForegroundColor = ConsoleColor.Green;
        string userInput = Console.ReadLine() ?? "";
        while (userInput != "exit")
        {
            foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } }))
            await foreach (
                var text
                in session.ChatAsync(
                    new ChatHistory.Message(AuthorRole.User, userInput),
                    inferenceParams))
            {
                Console.ForegroundColor = ConsoleColor.White;
                Console.Write(text);
            }
            Console.ForegroundColor = ConsoleColor.Green;
            prompt = Console.ReadLine();
            userInput = Console.ReadLine() ?? "";
            Console.ForegroundColor = ConsoleColor.White;
        }
    }
--- a/docs/Examples/ChatSessionWithHistory.md
+++ b/docs/Examples/ChatSessionWithHistory.md
@@ -0,0 +1,104 @@
 # Chat session with history
 ```cs
 using LLama.Common;
 namespace LLama.Examples.Examples;
 public class ChatSessionWithHistory
 {
    public static async Task Run()
    {
        Console.Write("Please input your model path: ");
        var modelPath = Console.ReadLine();
        var parameters = new ModelParams(modelPath)
        {
            ContextSize = 1024,
            Seed = 1337,
            GpuLayerCount = 5
        };
        using var model = LLamaWeights.LoadFromFile(parameters);
        using var context = model.CreateContext(parameters);
        var executor = new InteractiveExecutor(context);
        ChatSession session;
        if (Directory.Exists("Assets/chat-with-bob"))
        {
            Console.ForegroundColor = ConsoleColor.Yellow;
            Console.WriteLine("Loading session from disk.");
            Console.ForegroundColor = ConsoleColor.White;
            session = new ChatSession(executor);
            session.LoadSession("Assets/chat-with-bob");
        }
        else
        {
            var chatHistoryJson = File.ReadAllText("Assets/chat-with-bob.json");
            ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory();
            session = new ChatSession(executor, chatHistory);
        }
        session.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(
            new string[] { "User:", "Assistant:" },
            redundancyLength: 8));
        InferenceParams inferenceParams = new InferenceParams()
        {
            Temperature = 0.9f,
            AntiPrompts = new List<string> { "User:" }
        };
        Console.ForegroundColor = ConsoleColor.Yellow;
        Console.WriteLine("The chat session has started.");
        // show the prompt
        Console.ForegroundColor = ConsoleColor.Green;
        string userInput = Console.ReadLine() ?? "";
        while (userInput != "exit")
        {
            if (userInput == "save")
            {
                session.SaveSession("Assets/chat-with-bob");
                Console.ForegroundColor = ConsoleColor.Yellow;
                Console.WriteLine("Session saved.");
            }
            else if (userInput == "regenerate")
            {
                Console.ForegroundColor = ConsoleColor.Yellow;
                Console.WriteLine("Regenerating last response ...");
                await foreach (
                    var text
                    in session.RegenerateAssistantMessageAsync(
                        inferenceParams))
                {
                    Console.ForegroundColor = ConsoleColor.White;
                    Console.Write(text);
                }
            }
            else
            {
                await foreach (
                    var text
                    in session.ChatAsync(
                        new ChatHistory.Message(AuthorRole.User, userInput),
                        inferenceParams))
                {
                    Console.ForegroundColor = ConsoleColor.White;
                    Console.Write(text);
                }
            }
            Console.ForegroundColor = ConsoleColor.Green;
            userInput = Console.ReadLine() ?? "";
            Console.ForegroundColor = ConsoleColor.White;
        }
    }
 }
 ```
--- a/docs/Examples/ChatSessionWithRoleName.md
+++ b/docs/Examples/ChatSessionWithRoleName.md
@@ -13,26 +13,51 @@ public class ChatSessionWithRoleName
    public static void Run()
    {
        Console.Write("Please input your model path: ");
        string modelPath = Console.ReadLine();
        var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
        InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
        ChatSession session = new ChatSession(ex); // The only change is to remove the transform for the output text stream.
        var modelPath = Console.ReadLine();
        var parameters = new ModelParams(modelPath)
        {
            ContextSize = 1024,
            Seed = 1337,
            GpuLayerCount = 5
        };
        using var model = LLamaWeights.LoadFromFile(parameters);
        using var context = model.CreateContext(parameters);
        var executor = new InteractiveExecutor(context);
        var chatHistoryJson = File.ReadAllText("Assets/chat-with-bob.json");
        ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory();
        ChatSession session = new(executor, chatHistory);
        InferenceParams inferenceParams = new InferenceParams()
        {
            Temperature = 0.9f,
            AntiPrompts = new List<string> { "User:" }
        };
        Console.ForegroundColor = ConsoleColor.Yellow;
        Console.WriteLine("The chat session has started. In this example, the prompt is printed for better visual result.");
        Console.ForegroundColor = ConsoleColor.White;
        Console.WriteLine("The chat session has started.");
        // show the prompt
        Console.Write(prompt);
        while (true)
        Console.ForegroundColor = ConsoleColor.Green;
        string userInput = Console.ReadLine() ?? "";
        while (userInput != "exit")
        {
            foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } }))
            await foreach (
                var text
                in session.ChatAsync(
                    new ChatHistory.Message(AuthorRole.User, userInput),
                    inferenceParams))
            {
                Console.ForegroundColor = ConsoleColor.White;
                Console.Write(text);
            }
            Console.ForegroundColor = ConsoleColor.Green;
            prompt = Console.ReadLine();
            userInput = Console.ReadLine() ?? "";
            Console.ForegroundColor = ConsoleColor.White;
        }
    }
--- a/docs/Examples/CodingAssistant.md
+++ b/docs/Examples/CodingAssistant.md
@@ -0,0 +1,97 @@
 # Coding Assistant
 ```cs
 using LLama.Common;
 using System;
 using System.Reflection;
 internal class CodingAssistant
 {
    const string DefaultModelUri = "https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_S.gguf";
    // Source paper with example prompts:
    // https://doi.org/10.48550/arXiv.2308.12950
    const string InstructionPrefix = "[INST]";
    const string InstructionSuffix = "[/INST]";
    const string SystemInstruction = "You're an intelligent, concise coding assistant. Wrap code in ``` for readability. Don't repeat yourself. Use best practice and good coding standards.";
    private static string ModelsDirectory = Path.Combine(Directory.GetParent(Assembly.GetExecutingAssembly().Location)!.FullName, "Models");
    public static async Task Run()
    {
        Console.Write("Please input your model path (if left empty, a default model will be downloaded for you): ");
        var modelPath = Console.ReadLine();
        if(string.IsNullOrWhiteSpace(modelPath) )
        {
            modelPath = await GetDefaultModel();
        }
        var parameters = new ModelParams(modelPath)
        {
            ContextSize = 4096
        };
        using var model = LLamaWeights.LoadFromFile(parameters);
        using var context = model.CreateContext(parameters);
        var executor = new InstructExecutor(context, InstructionPrefix, InstructionSuffix, null);
        Console.ForegroundColor = ConsoleColor.Yellow;
        Console.WriteLine("The executor has been enabled. In this example, the LLM will follow your instructions." +
            "\nIt's a 7B Code Llama, so it's trained for programming tasks like \"Write a C# function reading a file name from a given URI\" or \"Write some programming interview questions\"." +
            "\nWrite 'exit' to exit");
        Console.ForegroundColor = ConsoleColor.White;
        var inferenceParams = new InferenceParams() { 
            Temperature = 0.8f, 
            MaxTokens = -1,
        };
        string instruction = $"{SystemInstruction}\n\n";
        await Console.Out.WriteAsync("Instruction: ");
        instruction += Console.ReadLine() ?? "Ask me for instructions.";
        while (instruction != "exit")
        {
            Console.ForegroundColor = ConsoleColor.Green;
            await foreach (var text in executor.InferAsync(instruction + System.Environment.NewLine, inferenceParams))
            {
                Console.Write(text);
            }
            Console.ForegroundColor = ConsoleColor.White;
            await Console.Out.WriteAsync("Instruction: ");
            instruction = Console.ReadLine() ?? "Ask me for instructions.";
        }
    }
    private static async Task<string> GetDefaultModel()
    {
        var uri = new Uri(DefaultModelUri);
        var modelName = uri.Segments[^1];
        await Console.Out.WriteLineAsync($"The following model will be used: {modelName}");
        var modelPath = Path.Combine(ModelsDirectory, modelName);
        if(!Directory.Exists(ModelsDirectory))
        {
            Directory.CreateDirectory(ModelsDirectory);
        }
        if (File.Exists(modelPath))
        {
            await Console.Out.WriteLineAsync($"Existing model found, using {modelPath}");
        }
        else
        {
            await Console.Out.WriteLineAsync($"Model not found locally, downloading {DefaultModelUri}...");
            using var http = new HttpClient();
            await using var downloadStream = await http.GetStreamAsync(uri);
            await using var fileStream = new FileStream(modelPath, FileMode.Create, FileAccess.Write);
            await downloadStream.CopyToAsync(fileStream);
            await Console.Out.WriteLineAsync($"Model downloaded and saved to {modelPath}");
        }
        return modelPath;
    }
 }
 ```
--- a/docs/Examples/GrammerJsonResponse.md
+++ b/docs/Examples/GrammerJsonResponse.md
@@ -0,0 +1,55 @@
 # Grammer json response
 ```cs
 using LLama.Common;
 using LLama.Grammars;
 public class GrammarJsonResponse
 {
    public static async Task Run()
    {
        var gbnf = (await File.ReadAllTextAsync("Assets/json.gbnf")).Trim();
        var grammar = Grammar.Parse(gbnf, "root");
        Console.Write("Please input your model path: ");
        var modelPath = Console.ReadLine();
        var parameters = new ModelParams(modelPath)
        {
            ContextSize = 1024,
            Seed = 1337,
            GpuLayerCount = 5
        };
        using var model = LLamaWeights.LoadFromFile(parameters);
        var ex = new StatelessExecutor(model, parameters);
        Console.ForegroundColor = ConsoleColor.Yellow;
        Console.WriteLine("The executor has been enabled. In this example, the LLM will follow your instructions and always respond in a JSON format. For example, you can input \"Tell me the attributes of a good dish\"");
        Console.ForegroundColor = ConsoleColor.White;
        using var grammarInstance = grammar.CreateInstance();
        var inferenceParams = new InferenceParams() 
        { 
            Temperature = 0.6f, 
            AntiPrompts = new List<string> { "Question:", "#", "Question: ", ".\n" }, 
            MaxTokens = 50,
            Grammar = grammarInstance
        };
        while (true)
        {
            Console.Write("\nQuestion: ");
            Console.ForegroundColor = ConsoleColor.Green;
            var prompt = Console.ReadLine();
            Console.ForegroundColor = ConsoleColor.White;
            Console.Write("Answer: ");
            prompt = $"Question: {prompt?.Trim()} Answer: ";
            await foreach (var text in ex.InferAsync(prompt, inferenceParams))
            {
                Console.Write(text);
            }
        }
    }
 }
 ```
--- a/docs/Examples/KernelMemory.md
+++ b/docs/Examples/KernelMemory.md
@@ -0,0 +1,62 @@
 # Kernel memory
 ```cs
 using System;
 using System.Collections.Generic;
 using System.Linq;
 using System.Text;
 using System.Threading.Tasks;
 using LLamaSharp.KernelMemory;
 using Microsoft.KernelMemory;
 using Microsoft.KernelMemory.Configuration;
 using Microsoft.KernelMemory.Handlers;
 public class KernelMemory
 {
    public static async Task Run()
    {
        Console.WriteLine("Example from: https://github.com/microsoft/kernel-memory/blob/main/examples/101-using-core-nuget/Program.cs");
        Console.Write("Please input your model path: ");
        var modelPath = Console.ReadLine();
        var searchClientConfig = new SearchClientConfig
        {
            MaxMatchesCount = 1,
            AnswerTokens = 100,
        };
        var memory = new KernelMemoryBuilder()
                .WithLLamaSharpDefaults(new LLamaSharpConfig(modelPath)
                {
                    DefaultInferenceParams = new Common.InferenceParams
                    {
                        AntiPrompts = new List<string> { "\n\n" }
                    }
                })
                .WithSearchClientConfig(searchClientConfig)
                .With(new TextPartitioningOptions
                {
                    MaxTokensPerParagraph = 300,
                    MaxTokensPerLine = 100,
                    OverlappingTokens = 30
                })
            .Build();
        await memory.ImportDocumentAsync(@"./Assets/sample-SK-Readme.pdf", steps: Constants.PipelineWithoutSummary);
        var question = "What's Semantic Kernel?";
        Console.WriteLine($"\n\nQuestion: {question}");
        var answer = await memory.AskAsync(question);
        Console.WriteLine($"\nAnswer: {answer.Result}");
        Console.WriteLine("\n\n  Sources:\n");
        foreach (var x in answer.RelevantSources)
        {
            Console.WriteLine($"  - {x.SourceName}  - {x.Link} [{x.Partitions.First().LastUpdate:D}]");
        }
    }
 }
 ```
--- a/docs/Examples/SemanticKernelMemory.md
+++ b/docs/Examples/SemanticKernelMemory.md
@@ -0,0 +1,170 @@
 # Semantic kernel memory
 ```cs
 using LLama.Common;
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.Memory;
 using LLamaSharp.SemanticKernel.TextEmbedding;
 using Microsoft.SemanticKernel.AI.Embeddings;
 using Microsoft.SemanticKernel.Plugins.Memory;
 public class SemanticKernelMemory
 {
    private const string MemoryCollectionName = "SKGitHub";
    public static async Task Run()
    {
        var loggerFactory = ConsoleLogger.LoggerFactory;
        Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example14_SemanticMemory.cs");
        Console.Write("Please input your model path: ");
        var modelPath = Console.ReadLine();
        var seed = 1337u;
        // Load weights into memory
        var parameters = new ModelParams(modelPath)
        {
            Seed = seed,
            EmbeddingMode = true
        };
        using var model = LLamaWeights.LoadFromFile(parameters);
        var embedding = new LLamaEmbedder(model, parameters);
        Console.WriteLine("====================================================");
        Console.WriteLine("======== Semantic Memory (volatile, in RAM) ========");
        Console.WriteLine("====================================================");
        /* You can build your own semantic memory combining an Embedding Generator
            * with a Memory storage that supports search by similarity (ie semantic search).
            *
            * In this example we use a volatile memory, a local simulation of a vector DB.
            *
            * You can replace VolatileMemoryStore with Qdrant (see QdrantMemoryStore connector)
            * or implement your connectors for Pinecone, Vespa, Postgres + pgvector, SQLite VSS, etc.
            */
        var memory = new MemoryBuilder()
            .WithTextEmbeddingGeneration(new LLamaSharpEmbeddingGeneration(embedding))
            .WithMemoryStore(new VolatileMemoryStore())
            .Build();
        await RunExampleAsync(memory);
    }
    private static async Task RunExampleAsync(ISemanticTextMemory memory)
    {
        await StoreMemoryAsync(memory);
        await SearchMemoryAsync(memory, "How do I get started?");
        /*
        Output:
        Query: How do I get started?
        Result 1:
            URL:     : https://github.com/microsoft/semantic-kernel/blob/main/README.md
            Title    : README: Installation, getting started, and how to contribute
        Result 2:
            URL:     : https://github.com/microsoft/semantic-kernel/blob/main/samples/dotnet-jupyter-notebooks/00-getting-started.ipynb
            Title    : Jupyter notebook describing how to get started with the Semantic Kernel
        */
        await SearchMemoryAsync(memory, "Can I build a chat with SK?");
        /*
        Output:
        Query: Can I build a chat with SK?
        Result 1:
            URL:     : https://github.com/microsoft/semantic-kernel/tree/main/samples/skills/ChatSkill/ChatGPT
            Title    : Sample demonstrating how to create a chat skill interfacing with ChatGPT
        Result 2:
            URL:     : https://github.com/microsoft/semantic-kernel/blob/main/samples/apps/chat-summary-webapp-react/README.md
            Title    : README: README associated with a sample chat summary react-based webapp
        */
        await SearchMemoryAsync(memory, "Jupyter notebook");
        await SearchMemoryAsync(memory, "README: README associated with a sample chat summary react-based webapp");
        await SearchMemoryAsync(memory, "Jupyter notebook describing how to pass prompts from a file to a semantic skill or function");
    }
    private static async Task SearchMemoryAsync(ISemanticTextMemory memory, string query)
    {
        Console.WriteLine("\nQuery: " + query + "\n");
        var memories = memory.SearchAsync(MemoryCollectionName, query, limit: 10, minRelevanceScore: 0.5);
        int i = 0;
        await foreach (MemoryQueryResult result in memories)
        {
            Console.WriteLine($"Result {++i}:");
            Console.WriteLine("  URL:     : " + result.Metadata.Id);
            Console.WriteLine("  Title    : " + result.Metadata.Description);
            Console.WriteLine("  Relevance: " + result.Relevance);
            Console.WriteLine();
        }
        Console.WriteLine("----------------------");
    }
    private static async Task StoreMemoryAsync(ISemanticTextMemory memory)
    {
        /* Store some data in the semantic memory.
            *
            * When using Azure Cognitive Search the data is automatically indexed on write.
            *
            * When using the combination of VolatileStore and Embedding generation, SK takes
            * care of creating and storing the index
            */
        Console.WriteLine("\nAdding some GitHub file URLs and their descriptions to the semantic memory.");
        var githubFiles = SampleData();
        var i = 0;
        foreach (var entry in githubFiles)
        {
            var result = await memory.SaveReferenceAsync(
                collection: MemoryCollectionName,
                externalSourceName: "GitHub",
                externalId: entry.Key,
                description: entry.Value,
                text: entry.Value);
            Console.WriteLine($"#{++i} saved.");
            Console.WriteLine(result);
        }
        Console.WriteLine("\n----------------------");
    }
    private static Dictionary<string, string> SampleData()
    {
        return new Dictionary<string, string>
        {
            ["https://github.com/microsoft/semantic-kernel/blob/main/README.md"]
                = "README: Installation, getting started, and how to contribute",
            ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/notebooks/02-running-prompts-from-file.ipynb"]
                = "Jupyter notebook describing how to pass prompts from a file to a semantic skill or function",
            ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/notebooks//00-getting-started.ipynb"]
                = "Jupyter notebook describing how to get started with the Semantic Kernel",
            ["https://github.com/microsoft/semantic-kernel/tree/main/samples/skills/ChatSkill/ChatGPT"]
                = "Sample demonstrating how to create a chat skill interfacing with ChatGPT",
            ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel/Memory/VolatileMemoryStore.cs"]
                = "C# class that defines a volatile embedding store",
            ["https://github.com/microsoft/semantic-kernel/blob/main/samples/dotnet/KernelHttpServer/README.md"]
                = "README: How to set up a Semantic Kernel Service API using Azure Function Runtime v4",
            ["https://github.com/microsoft/semantic-kernel/blob/main/samples/apps/chat-summary-webapp-react/README.md"]
                = "README: README associated with a sample chat summary react-based webapp",
        };
    }
 }
 ```
--- a/docs/Examples/SemanticKernelPrompt.md
+++ b/docs/Examples/SemanticKernelPrompt.md
@@ -0,0 +1,55 @@
 # Semantic kernel mode
 ```cs
 using System.Security.Cryptography;
 using LLama.Common;
 using LLamaSharp.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel;
 using LLamaSharp.SemanticKernel.TextCompletion;
 using Microsoft.SemanticKernel.TextGeneration;
 using Microsoft.Extensions.DependencyInjection;
 public class SemanticKernelPrompt
 {
    public static async Task Run()
    {
        Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md");
        Console.Write("Please input your model path: ");
        var modelPath = Console.ReadLine();
        // Load weights into memory
        var parameters = new ModelParams(modelPath);
        using var model = LLamaWeights.LoadFromFile(parameters);
        var ex = new StatelessExecutor(model, parameters);
        var builder = Kernel.CreateBuilder();
        builder.Services.AddKeyedSingleton<ITextGenerationService>("local-llama", new LLamaSharpTextCompletion(ex));
        var kernel = builder.Build();
        var prompt = @"{{$input}}
 One line TLDR with the fewest words.";
        ChatRequestSettings settings = new() { MaxTokens = 100 };
        var summarize = kernel.CreateFunctionFromPrompt(prompt, settings);
        string text1 = @"
 1st Law of Thermodynamics - Energy cannot be created or destroyed.
 2nd Law of Thermodynamics - For a spontaneous process, the entropy of the universe increases.
 3rd Law of Thermodynamics - A perfect crystal at zero Kelvin has zero entropy.";
        string text2 = @"
 1. An object at rest remains at rest, and an object in motion remains in motion at constant speed and in a straight line unless acted on by an unbalanced force.
 2. The acceleration of an object depends on the mass of the object and the amount of force applied.
 3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first.";
        Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text1 })).GetValue<string>());
        Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text2 })).GetValue<string>());
    }
 }
 ```
--- a/docs/Examples/TalkToYourself.md
+++ b/docs/Examples/TalkToYourself.md
@@ -0,0 +1,73 @@
 # Talk to yourself
 ```cs
 using System.Security.Cryptography;
 using System.Text;
 using LLama.Abstractions;
 using LLama.Common;
 public class TalkToYourself
 {
    public static async Task Run()
    {
        Console.Write("Please input your model path: ");
        var modelPath = Console.ReadLine();
        // Load weights into memory
        var @params = new ModelParams(modelPath);
        using var weights = LLamaWeights.LoadFromFile(@params);
        // Create 2 contexts sharing the same weights
        using var aliceCtx = weights.CreateContext(@params);
        var alice = new InteractiveExecutor(aliceCtx);
        using var bobCtx = weights.CreateContext(@params);
        var bob = new InteractiveExecutor(bobCtx);
        // Initial alice prompt
        var alicePrompt = "Transcript of a dialog, where the Alice interacts a person named Bob. Alice is friendly, kind, honest and good at writing.\nAlice: Hello";
        var aliceResponse = await Prompt(alice, ConsoleColor.Green, alicePrompt, false, false);
        // Initial bob prompt
        var bobPrompt = $"Transcript of a dialog, where the Bob interacts a person named Alice. Bob is smart, intellectual and good at writing.\nAlice: Hello{aliceResponse}";
        var bobResponse = await Prompt(bob, ConsoleColor.Red, bobPrompt, true, true);
        // swap back and forth from Alice to Bob
        while (true)
        {
            aliceResponse = await Prompt(alice, ConsoleColor.Green, bobResponse, false, true);
            bobResponse = await Prompt(bob, ConsoleColor.Red, aliceResponse, false, true);
            if (Console.KeyAvailable)
                break;
        }
    }
    private static async Task<string> Prompt(ILLamaExecutor executor, ConsoleColor color, string prompt, bool showPrompt, bool showResponse)
    {
        var inferenceParams = new InferenceParams
        {
            Temperature = 0.9f,
            AntiPrompts = new List<string> { "Alice:", "Bob:", "User:" },
            MaxTokens = 128,
            Mirostat = MirostatType.Mirostat2,
            MirostatTau = 10,
        };
        Console.ForegroundColor = ConsoleColor.White;
        if (showPrompt)
            Console.Write(prompt);
        Console.ForegroundColor = color;
        var builder = new StringBuilder();
        await foreach (var text in executor.InferAsync(prompt, inferenceParams))
        {
            builder.Append(text);
            if (showResponse)
                Console.Write(text);
        }
        return builder.ToString();
    }
 }
 ```