| @@ -0,0 +1,170 @@ | |||
| # Batch decoding | |||
| ```cs | |||
| using System.Diagnostics; | |||
| using System.Text; | |||
| using LLama.Common; | |||
| using LLama.Native; | |||
| using LLama.Sampling; | |||
| public class BatchedDecoding | |||
| { | |||
| private const int n_parallel = 8; | |||
| private const int n_len = 32; | |||
| public static async Task Run() | |||
| { | |||
| Console.Write("Please input your model path: "); | |||
| var modelPath = Console.ReadLine(); | |||
| Console.WriteLine("Prompt (leave blank to select automatically):"); | |||
| var prompt = Console.ReadLine(); | |||
| if (string.IsNullOrWhiteSpace(prompt)) | |||
| prompt = "Not many people know that"; | |||
| // Load model | |||
| var parameters = new ModelParams(modelPath); | |||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||
| // Tokenize prompt | |||
| var prompt_tokens = model.Tokenize(prompt, true, false, Encoding.UTF8); | |||
| var n_kv_req = prompt_tokens.Length + (n_len - prompt_tokens.Length) * n_parallel; | |||
| // Create a context | |||
| parameters.ContextSize = (uint)model.ContextSize; | |||
| parameters.BatchSize = (uint)Math.Max(n_len, n_parallel); | |||
| using var context = model.CreateContext(parameters); | |||
| var n_ctx = context.ContextSize; | |||
| // make sure the KV cache is big enough to hold all the prompt and generated tokens | |||
| if (n_kv_req > n_ctx) | |||
| { | |||
| await Console.Error.WriteLineAsync($"error: n_kv_req ({n_kv_req}) > n_ctx, the required KV cache size is not big enough\n"); | |||
| await Console.Error.WriteLineAsync(" either reduce n_parallel or increase n_ctx\n"); | |||
| return; | |||
| } | |||
| var batch = new LLamaBatch(); | |||
| // evaluate the initial prompt | |||
| batch.AddRange(prompt_tokens, 0, LLamaSeqId.Zero, true); | |||
| if (await context.DecodeAsync(batch) != DecodeResult.Ok) | |||
| { | |||
| await Console.Error.WriteLineAsync("llama_decode failed"); | |||
| return; | |||
| } | |||
| // assign the system KV cache to all parallel sequences | |||
| // this way, the parallel sequences will "reuse" the prompt tokens without having to copy them | |||
| for (var i = 1; i < n_parallel; ++i) | |||
| { | |||
| context.NativeHandle.KvCacheSequenceCopy((LLamaSeqId)0, (LLamaSeqId)i, 0, batch.TokenCount); | |||
| } | |||
| if (n_parallel > 1) | |||
| { | |||
| Console.WriteLine(); | |||
| Console.WriteLine($"generating {n_parallel} sequences..."); | |||
| } | |||
| // remember the batch index of the last token for each parallel sequence | |||
| // we need this to determine which logits to sample from | |||
| List<int> i_batch = new(); | |||
| for (var i = 0; i < n_parallel; i++) | |||
| i_batch.Add(batch.TokenCount - 1); | |||
| // Create per-stream decoder and sampler | |||
| var decoders = new StreamingTokenDecoder[n_parallel]; | |||
| var samplers = new ISamplingPipeline[n_parallel]; | |||
| for (var i = 0; i < n_parallel; i++) | |||
| { | |||
| decoders[i] = new StreamingTokenDecoder(context); | |||
| samplers[i] = new DefaultSamplingPipeline | |||
| { | |||
| Temperature = 0.1f + (float)i / n_parallel, | |||
| MinP = 0.25f, | |||
| }; | |||
| } | |||
| var n_cur = batch.TokenCount; | |||
| var n_decode = 0; | |||
| var timer = new Stopwatch(); | |||
| timer.Start(); | |||
| while (n_cur <= n_len) | |||
| { | |||
| batch.Clear(); | |||
| for (var i = 0; i < n_parallel; i++) | |||
| { | |||
| // Skip completed streams | |||
| if (i_batch[i] < 0) | |||
| continue; | |||
| // Use the sampling pipeline to select a token | |||
| var new_token_id = samplers[i].Sample( | |||
| context.NativeHandle, | |||
| context.NativeHandle.GetLogitsIth(i_batch[i]), | |||
| Array.Empty<LLamaToken>() | |||
| ); | |||
| // Finish this stream early if necessary | |||
| if (new_token_id == model.EndOfSentenceToken || new_token_id == model.NewlineToken) | |||
| { | |||
| i_batch[i] = -1; | |||
| Console.WriteLine($"Completed Stream {i} early"); | |||
| continue; | |||
| } | |||
| // Add this token to the decoder, so it will be turned into text | |||
| decoders[i].Add(new_token_id); | |||
| i_batch[i] = batch.TokenCount; | |||
| // push this new token for next evaluation | |||
| batch.Add(new_token_id, n_cur, (LLamaSeqId)i, true); | |||
| n_decode++; | |||
| } | |||
| // Check if all streams are finished | |||
| if (batch.TokenCount == 0) | |||
| { | |||
| break; | |||
| } | |||
| n_cur++; | |||
| // evaluate the current batch with the transformer model | |||
| if (await context.DecodeAsync(batch) != 0) | |||
| { | |||
| await Console.Error.WriteLineAsync("failed to eval"); | |||
| return; | |||
| } | |||
| } | |||
| timer.Stop(); | |||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||
| Console.WriteLine(); | |||
| Console.WriteLine($"Decoded {n_decode} tokens in {timer.ElapsedMilliseconds}ms"); | |||
| Console.WriteLine($"Rate: {n_decode / timer.Elapsed.TotalSeconds:##.000} tokens/second"); | |||
| var index = 0; | |||
| foreach (var stream in decoders) | |||
| { | |||
| var text = stream.Read(); | |||
| Console.ForegroundColor = ConsoleColor.Green; | |||
| Console.Write($"{index++}. {prompt}"); | |||
| Console.ForegroundColor = ConsoleColor.Red; | |||
| Console.WriteLine(text); | |||
| } | |||
| Console.WriteLine("Press any key to exit demo"); | |||
| Console.ReadKey(true); | |||
| } | |||
| } | |||
| ``` | |||
| @@ -0,0 +1,125 @@ | |||
| # Chat Chinese | |||
| ```cs | |||
| using System.Text; | |||
| using LLama.Common; | |||
| public class ChatChineseGB2312 | |||
| { | |||
| private static string ConvertEncoding(string input, Encoding original, Encoding target) | |||
| { | |||
| byte[] bytes = original.GetBytes(input); | |||
| var convertedBytes = Encoding.Convert(original, target, bytes); | |||
| return target.GetString(convertedBytes); | |||
| } | |||
| public static async Task Run() | |||
| { | |||
| // Register provider for GB2312 encoding | |||
| Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); | |||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||
| Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" + | |||
| " to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers."); | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| Console.Write("Please input your model path: "); | |||
| var modelPath = Console.ReadLine(); | |||
| var parameters = new ModelParams(modelPath) | |||
| { | |||
| ContextSize = 1024, | |||
| Seed = 1337, | |||
| GpuLayerCount = 5, | |||
| Encoding = Encoding.UTF8 | |||
| }; | |||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||
| using var context = model.CreateContext(parameters); | |||
| var executor = new InteractiveExecutor(context); | |||
| ChatSession session; | |||
| if (Directory.Exists("Assets/chat-with-kunkun-chinese")) | |||
| { | |||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||
| Console.WriteLine("Loading session from disk."); | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| session = new ChatSession(executor); | |||
| session.LoadSession("Assets/chat-with-kunkun-chinese"); | |||
| } | |||
| else | |||
| { | |||
| var chatHistoryJson = File.ReadAllText("Assets/chat-with-kunkun-chinese.json"); | |||
| ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory(); | |||
| session = new ChatSession(executor, chatHistory); | |||
| } | |||
| session | |||
| .WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户", "坤坤")); | |||
| InferenceParams inferenceParams = new InferenceParams() | |||
| { | |||
| Temperature = 0.9f, | |||
| AntiPrompts = new List<string> { "用户:" } | |||
| }; | |||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||
| Console.WriteLine("The chat session has started."); | |||
| // show the prompt | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| Console.Write("用户:"); | |||
| Console.ForegroundColor = ConsoleColor.Green; | |||
| string userInput = Console.ReadLine() ?? ""; | |||
| while (userInput != "exit") | |||
| { | |||
| // Convert the encoding from gb2312 to utf8 for the language model | |||
| // and later saving to the history json file. | |||
| userInput = ConvertEncoding(userInput, Encoding.GetEncoding("gb2312"), Encoding.UTF8); | |||
| if (userInput == "save") | |||
| { | |||
| session.SaveSession("Assets/chat-with-kunkun-chinese"); | |||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||
| Console.WriteLine("Session saved."); | |||
| } | |||
| else if (userInput == "regenerate") | |||
| { | |||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||
| Console.WriteLine("Regenerating last response ..."); | |||
| await foreach ( | |||
| var text | |||
| in session.RegenerateAssistantMessageAsync( | |||
| inferenceParams)) | |||
| { | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| // Convert the encoding from utf8 to gb2312 for the console output. | |||
| Console.Write(ConvertEncoding(text, Encoding.UTF8, Encoding.GetEncoding("gb2312"))); | |||
| } | |||
| } | |||
| else | |||
| { | |||
| await foreach ( | |||
| var text | |||
| in session.ChatAsync( | |||
| new ChatHistory.Message(AuthorRole.User, userInput), | |||
| inferenceParams)) | |||
| { | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| Console.Write(text); | |||
| } | |||
| } | |||
| Console.ForegroundColor = ConsoleColor.Green; | |||
| userInput = Console.ReadLine() ?? ""; | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| } | |||
| } | |||
| } | |||
| ``` | |||
| @@ -13,24 +13,54 @@ public class ChatSessionStripRoleName | |||
| public static void Run() | |||
| { | |||
| Console.Write("Please input your model path: "); | |||
| string modelPath = Console.ReadLine(); | |||
| var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim(); | |||
| InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5))); | |||
| ChatSession session = new ChatSession(ex).WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { "User:", "Bob:" }, redundancyLength: 8)); | |||
| var modelPath = Console.ReadLine(); | |||
| var parameters = new ModelParams(modelPath) | |||
| { | |||
| ContextSize = 1024, | |||
| Seed = 1337, | |||
| GpuLayerCount = 5 | |||
| }; | |||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||
| using var context = model.CreateContext(parameters); | |||
| var executor = new InteractiveExecutor(context); | |||
| var chatHistoryJson = File.ReadAllText("Assets/chat-with-bob.json"); | |||
| ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory(); | |||
| ChatSession session = new(executor, chatHistory); | |||
| session.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform( | |||
| new string[] { "User:", "Assistant:" }, | |||
| redundancyLength: 8)); | |||
| InferenceParams inferenceParams = new InferenceParams() | |||
| { | |||
| Temperature = 0.9f, | |||
| AntiPrompts = new List<string> { "User:" } | |||
| }; | |||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||
| Console.WriteLine("The chat session has started. The role names won't be printed."); | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| Console.WriteLine("The chat session has started."); | |||
| while (true) | |||
| // show the prompt | |||
| Console.ForegroundColor = ConsoleColor.Green; | |||
| string userInput = Console.ReadLine() ?? ""; | |||
| while (userInput != "exit") | |||
| { | |||
| foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } })) | |||
| await foreach ( | |||
| var text | |||
| in session.ChatAsync( | |||
| new ChatHistory.Message(AuthorRole.User, userInput), | |||
| inferenceParams)) | |||
| { | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| Console.Write(text); | |||
| } | |||
| Console.ForegroundColor = ConsoleColor.Green; | |||
| prompt = Console.ReadLine(); | |||
| userInput = Console.ReadLine() ?? ""; | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| } | |||
| } | |||
| @@ -0,0 +1,104 @@ | |||
| # Chat session with history | |||
| ```cs | |||
| using LLama.Common; | |||
| namespace LLama.Examples.Examples; | |||
| public class ChatSessionWithHistory | |||
| { | |||
| public static async Task Run() | |||
| { | |||
| Console.Write("Please input your model path: "); | |||
| var modelPath = Console.ReadLine(); | |||
| var parameters = new ModelParams(modelPath) | |||
| { | |||
| ContextSize = 1024, | |||
| Seed = 1337, | |||
| GpuLayerCount = 5 | |||
| }; | |||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||
| using var context = model.CreateContext(parameters); | |||
| var executor = new InteractiveExecutor(context); | |||
| ChatSession session; | |||
| if (Directory.Exists("Assets/chat-with-bob")) | |||
| { | |||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||
| Console.WriteLine("Loading session from disk."); | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| session = new ChatSession(executor); | |||
| session.LoadSession("Assets/chat-with-bob"); | |||
| } | |||
| else | |||
| { | |||
| var chatHistoryJson = File.ReadAllText("Assets/chat-with-bob.json"); | |||
| ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory(); | |||
| session = new ChatSession(executor, chatHistory); | |||
| } | |||
| session.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform( | |||
| new string[] { "User:", "Assistant:" }, | |||
| redundancyLength: 8)); | |||
| InferenceParams inferenceParams = new InferenceParams() | |||
| { | |||
| Temperature = 0.9f, | |||
| AntiPrompts = new List<string> { "User:" } | |||
| }; | |||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||
| Console.WriteLine("The chat session has started."); | |||
| // show the prompt | |||
| Console.ForegroundColor = ConsoleColor.Green; | |||
| string userInput = Console.ReadLine() ?? ""; | |||
| while (userInput != "exit") | |||
| { | |||
| if (userInput == "save") | |||
| { | |||
| session.SaveSession("Assets/chat-with-bob"); | |||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||
| Console.WriteLine("Session saved."); | |||
| } | |||
| else if (userInput == "regenerate") | |||
| { | |||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||
| Console.WriteLine("Regenerating last response ..."); | |||
| await foreach ( | |||
| var text | |||
| in session.RegenerateAssistantMessageAsync( | |||
| inferenceParams)) | |||
| { | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| Console.Write(text); | |||
| } | |||
| } | |||
| else | |||
| { | |||
| await foreach ( | |||
| var text | |||
| in session.ChatAsync( | |||
| new ChatHistory.Message(AuthorRole.User, userInput), | |||
| inferenceParams)) | |||
| { | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| Console.Write(text); | |||
| } | |||
| } | |||
| Console.ForegroundColor = ConsoleColor.Green; | |||
| userInput = Console.ReadLine() ?? ""; | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| } | |||
| } | |||
| } | |||
| ``` | |||
| @@ -13,26 +13,51 @@ public class ChatSessionWithRoleName | |||
| public static void Run() | |||
| { | |||
| Console.Write("Please input your model path: "); | |||
| string modelPath = Console.ReadLine(); | |||
| var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim(); | |||
| InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5))); | |||
| ChatSession session = new ChatSession(ex); // The only change is to remove the transform for the output text stream. | |||
| var modelPath = Console.ReadLine(); | |||
| var parameters = new ModelParams(modelPath) | |||
| { | |||
| ContextSize = 1024, | |||
| Seed = 1337, | |||
| GpuLayerCount = 5 | |||
| }; | |||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||
| using var context = model.CreateContext(parameters); | |||
| var executor = new InteractiveExecutor(context); | |||
| var chatHistoryJson = File.ReadAllText("Assets/chat-with-bob.json"); | |||
| ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory(); | |||
| ChatSession session = new(executor, chatHistory); | |||
| InferenceParams inferenceParams = new InferenceParams() | |||
| { | |||
| Temperature = 0.9f, | |||
| AntiPrompts = new List<string> { "User:" } | |||
| }; | |||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||
| Console.WriteLine("The chat session has started. In this example, the prompt is printed for better visual result."); | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| Console.WriteLine("The chat session has started."); | |||
| // show the prompt | |||
| Console.Write(prompt); | |||
| while (true) | |||
| Console.ForegroundColor = ConsoleColor.Green; | |||
| string userInput = Console.ReadLine() ?? ""; | |||
| while (userInput != "exit") | |||
| { | |||
| foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } })) | |||
| await foreach ( | |||
| var text | |||
| in session.ChatAsync( | |||
| new ChatHistory.Message(AuthorRole.User, userInput), | |||
| inferenceParams)) | |||
| { | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| Console.Write(text); | |||
| } | |||
| Console.ForegroundColor = ConsoleColor.Green; | |||
| prompt = Console.ReadLine(); | |||
| userInput = Console.ReadLine() ?? ""; | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| } | |||
| } | |||
| @@ -0,0 +1,97 @@ | |||
| # Coding Assistant | |||
| ```cs | |||
| using LLama.Common; | |||
| using System; | |||
| using System.Reflection; | |||
| internal class CodingAssistant | |||
| { | |||
| const string DefaultModelUri = "https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_S.gguf"; | |||
| // Source paper with example prompts: | |||
| // https://doi.org/10.48550/arXiv.2308.12950 | |||
| const string InstructionPrefix = "[INST]"; | |||
| const string InstructionSuffix = "[/INST]"; | |||
| const string SystemInstruction = "You're an intelligent, concise coding assistant. Wrap code in ``` for readability. Don't repeat yourself. Use best practice and good coding standards."; | |||
| private static string ModelsDirectory = Path.Combine(Directory.GetParent(Assembly.GetExecutingAssembly().Location)!.FullName, "Models"); | |||
| public static async Task Run() | |||
| { | |||
| Console.Write("Please input your model path (if left empty, a default model will be downloaded for you): "); | |||
| var modelPath = Console.ReadLine(); | |||
| if(string.IsNullOrWhiteSpace(modelPath) ) | |||
| { | |||
| modelPath = await GetDefaultModel(); | |||
| } | |||
| var parameters = new ModelParams(modelPath) | |||
| { | |||
| ContextSize = 4096 | |||
| }; | |||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||
| using var context = model.CreateContext(parameters); | |||
| var executor = new InstructExecutor(context, InstructionPrefix, InstructionSuffix, null); | |||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||
| Console.WriteLine("The executor has been enabled. In this example, the LLM will follow your instructions." + | |||
| "\nIt's a 7B Code Llama, so it's trained for programming tasks like \"Write a C# function reading a file name from a given URI\" or \"Write some programming interview questions\"." + | |||
| "\nWrite 'exit' to exit"); | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| var inferenceParams = new InferenceParams() { | |||
| Temperature = 0.8f, | |||
| MaxTokens = -1, | |||
| }; | |||
| string instruction = $"{SystemInstruction}\n\n"; | |||
| await Console.Out.WriteAsync("Instruction: "); | |||
| instruction += Console.ReadLine() ?? "Ask me for instructions."; | |||
| while (instruction != "exit") | |||
| { | |||
| Console.ForegroundColor = ConsoleColor.Green; | |||
| await foreach (var text in executor.InferAsync(instruction + System.Environment.NewLine, inferenceParams)) | |||
| { | |||
| Console.Write(text); | |||
| } | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| await Console.Out.WriteAsync("Instruction: "); | |||
| instruction = Console.ReadLine() ?? "Ask me for instructions."; | |||
| } | |||
| } | |||
| private static async Task<string> GetDefaultModel() | |||
| { | |||
| var uri = new Uri(DefaultModelUri); | |||
| var modelName = uri.Segments[^1]; | |||
| await Console.Out.WriteLineAsync($"The following model will be used: {modelName}"); | |||
| var modelPath = Path.Combine(ModelsDirectory, modelName); | |||
| if(!Directory.Exists(ModelsDirectory)) | |||
| { | |||
| Directory.CreateDirectory(ModelsDirectory); | |||
| } | |||
| if (File.Exists(modelPath)) | |||
| { | |||
| await Console.Out.WriteLineAsync($"Existing model found, using {modelPath}"); | |||
| } | |||
| else | |||
| { | |||
| await Console.Out.WriteLineAsync($"Model not found locally, downloading {DefaultModelUri}..."); | |||
| using var http = new HttpClient(); | |||
| await using var downloadStream = await http.GetStreamAsync(uri); | |||
| await using var fileStream = new FileStream(modelPath, FileMode.Create, FileAccess.Write); | |||
| await downloadStream.CopyToAsync(fileStream); | |||
| await Console.Out.WriteLineAsync($"Model downloaded and saved to {modelPath}"); | |||
| } | |||
| return modelPath; | |||
| } | |||
| } | |||
| ``` | |||
| @@ -0,0 +1,55 @@ | |||
| # Grammer json response | |||
| ```cs | |||
| using LLama.Common; | |||
| using LLama.Grammars; | |||
| public class GrammarJsonResponse | |||
| { | |||
| public static async Task Run() | |||
| { | |||
| var gbnf = (await File.ReadAllTextAsync("Assets/json.gbnf")).Trim(); | |||
| var grammar = Grammar.Parse(gbnf, "root"); | |||
| Console.Write("Please input your model path: "); | |||
| var modelPath = Console.ReadLine(); | |||
| var parameters = new ModelParams(modelPath) | |||
| { | |||
| ContextSize = 1024, | |||
| Seed = 1337, | |||
| GpuLayerCount = 5 | |||
| }; | |||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||
| var ex = new StatelessExecutor(model, parameters); | |||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||
| Console.WriteLine("The executor has been enabled. In this example, the LLM will follow your instructions and always respond in a JSON format. For example, you can input \"Tell me the attributes of a good dish\""); | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| using var grammarInstance = grammar.CreateInstance(); | |||
| var inferenceParams = new InferenceParams() | |||
| { | |||
| Temperature = 0.6f, | |||
| AntiPrompts = new List<string> { "Question:", "#", "Question: ", ".\n" }, | |||
| MaxTokens = 50, | |||
| Grammar = grammarInstance | |||
| }; | |||
| while (true) | |||
| { | |||
| Console.Write("\nQuestion: "); | |||
| Console.ForegroundColor = ConsoleColor.Green; | |||
| var prompt = Console.ReadLine(); | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| Console.Write("Answer: "); | |||
| prompt = $"Question: {prompt?.Trim()} Answer: "; | |||
| await foreach (var text in ex.InferAsync(prompt, inferenceParams)) | |||
| { | |||
| Console.Write(text); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| ``` | |||
| @@ -0,0 +1,62 @@ | |||
| # Kernel memory | |||
| ```cs | |||
| using System; | |||
| using System.Collections.Generic; | |||
| using System.Linq; | |||
| using System.Text; | |||
| using System.Threading.Tasks; | |||
| using LLamaSharp.KernelMemory; | |||
| using Microsoft.KernelMemory; | |||
| using Microsoft.KernelMemory.Configuration; | |||
| using Microsoft.KernelMemory.Handlers; | |||
| public class KernelMemory | |||
| { | |||
| public static async Task Run() | |||
| { | |||
| Console.WriteLine("Example from: https://github.com/microsoft/kernel-memory/blob/main/examples/101-using-core-nuget/Program.cs"); | |||
| Console.Write("Please input your model path: "); | |||
| var modelPath = Console.ReadLine(); | |||
| var searchClientConfig = new SearchClientConfig | |||
| { | |||
| MaxMatchesCount = 1, | |||
| AnswerTokens = 100, | |||
| }; | |||
| var memory = new KernelMemoryBuilder() | |||
| .WithLLamaSharpDefaults(new LLamaSharpConfig(modelPath) | |||
| { | |||
| DefaultInferenceParams = new Common.InferenceParams | |||
| { | |||
| AntiPrompts = new List<string> { "\n\n" } | |||
| } | |||
| }) | |||
| .WithSearchClientConfig(searchClientConfig) | |||
| .With(new TextPartitioningOptions | |||
| { | |||
| MaxTokensPerParagraph = 300, | |||
| MaxTokensPerLine = 100, | |||
| OverlappingTokens = 30 | |||
| }) | |||
| .Build(); | |||
| await memory.ImportDocumentAsync(@"./Assets/sample-SK-Readme.pdf", steps: Constants.PipelineWithoutSummary); | |||
| var question = "What's Semantic Kernel?"; | |||
| Console.WriteLine($"\n\nQuestion: {question}"); | |||
| var answer = await memory.AskAsync(question); | |||
| Console.WriteLine($"\nAnswer: {answer.Result}"); | |||
| Console.WriteLine("\n\n Sources:\n"); | |||
| foreach (var x in answer.RelevantSources) | |||
| { | |||
| Console.WriteLine($" - {x.SourceName} - {x.Link} [{x.Partitions.First().LastUpdate:D}]"); | |||
| } | |||
| } | |||
| } | |||
| ``` | |||
| @@ -0,0 +1,170 @@ | |||
| # Semantic kernel memory | |||
| ```cs | |||
| using LLama.Common; | |||
| using Microsoft.SemanticKernel; | |||
| using Microsoft.SemanticKernel.Memory; | |||
| using LLamaSharp.SemanticKernel.TextEmbedding; | |||
| using Microsoft.SemanticKernel.AI.Embeddings; | |||
| using Microsoft.SemanticKernel.Plugins.Memory; | |||
| public class SemanticKernelMemory | |||
| { | |||
| private const string MemoryCollectionName = "SKGitHub"; | |||
| public static async Task Run() | |||
| { | |||
| var loggerFactory = ConsoleLogger.LoggerFactory; | |||
| Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example14_SemanticMemory.cs"); | |||
| Console.Write("Please input your model path: "); | |||
| var modelPath = Console.ReadLine(); | |||
| var seed = 1337u; | |||
| // Load weights into memory | |||
| var parameters = new ModelParams(modelPath) | |||
| { | |||
| Seed = seed, | |||
| EmbeddingMode = true | |||
| }; | |||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||
| var embedding = new LLamaEmbedder(model, parameters); | |||
| Console.WriteLine("===================================================="); | |||
| Console.WriteLine("======== Semantic Memory (volatile, in RAM) ========"); | |||
| Console.WriteLine("===================================================="); | |||
| /* You can build your own semantic memory combining an Embedding Generator | |||
| * with a Memory storage that supports search by similarity (ie semantic search). | |||
| * | |||
| * In this example we use a volatile memory, a local simulation of a vector DB. | |||
| * | |||
| * You can replace VolatileMemoryStore with Qdrant (see QdrantMemoryStore connector) | |||
| * or implement your connectors for Pinecone, Vespa, Postgres + pgvector, SQLite VSS, etc. | |||
| */ | |||
| var memory = new MemoryBuilder() | |||
| .WithTextEmbeddingGeneration(new LLamaSharpEmbeddingGeneration(embedding)) | |||
| .WithMemoryStore(new VolatileMemoryStore()) | |||
| .Build(); | |||
| await RunExampleAsync(memory); | |||
| } | |||
| private static async Task RunExampleAsync(ISemanticTextMemory memory) | |||
| { | |||
| await StoreMemoryAsync(memory); | |||
| await SearchMemoryAsync(memory, "How do I get started?"); | |||
| /* | |||
| Output: | |||
| Query: How do I get started? | |||
| Result 1: | |||
| URL: : https://github.com/microsoft/semantic-kernel/blob/main/README.md | |||
| Title : README: Installation, getting started, and how to contribute | |||
| Result 2: | |||
| URL: : https://github.com/microsoft/semantic-kernel/blob/main/samples/dotnet-jupyter-notebooks/00-getting-started.ipynb | |||
| Title : Jupyter notebook describing how to get started with the Semantic Kernel | |||
| */ | |||
| await SearchMemoryAsync(memory, "Can I build a chat with SK?"); | |||
| /* | |||
| Output: | |||
| Query: Can I build a chat with SK? | |||
| Result 1: | |||
| URL: : https://github.com/microsoft/semantic-kernel/tree/main/samples/skills/ChatSkill/ChatGPT | |||
| Title : Sample demonstrating how to create a chat skill interfacing with ChatGPT | |||
| Result 2: | |||
| URL: : https://github.com/microsoft/semantic-kernel/blob/main/samples/apps/chat-summary-webapp-react/README.md | |||
| Title : README: README associated with a sample chat summary react-based webapp | |||
| */ | |||
| await SearchMemoryAsync(memory, "Jupyter notebook"); | |||
| await SearchMemoryAsync(memory, "README: README associated with a sample chat summary react-based webapp"); | |||
| await SearchMemoryAsync(memory, "Jupyter notebook describing how to pass prompts from a file to a semantic skill or function"); | |||
| } | |||
| private static async Task SearchMemoryAsync(ISemanticTextMemory memory, string query) | |||
| { | |||
| Console.WriteLine("\nQuery: " + query + "\n"); | |||
| var memories = memory.SearchAsync(MemoryCollectionName, query, limit: 10, minRelevanceScore: 0.5); | |||
| int i = 0; | |||
| await foreach (MemoryQueryResult result in memories) | |||
| { | |||
| Console.WriteLine($"Result {++i}:"); | |||
| Console.WriteLine(" URL: : " + result.Metadata.Id); | |||
| Console.WriteLine(" Title : " + result.Metadata.Description); | |||
| Console.WriteLine(" Relevance: " + result.Relevance); | |||
| Console.WriteLine(); | |||
| } | |||
| Console.WriteLine("----------------------"); | |||
| } | |||
| private static async Task StoreMemoryAsync(ISemanticTextMemory memory) | |||
| { | |||
| /* Store some data in the semantic memory. | |||
| * | |||
| * When using Azure Cognitive Search the data is automatically indexed on write. | |||
| * | |||
| * When using the combination of VolatileStore and Embedding generation, SK takes | |||
| * care of creating and storing the index | |||
| */ | |||
| Console.WriteLine("\nAdding some GitHub file URLs and their descriptions to the semantic memory."); | |||
| var githubFiles = SampleData(); | |||
| var i = 0; | |||
| foreach (var entry in githubFiles) | |||
| { | |||
| var result = await memory.SaveReferenceAsync( | |||
| collection: MemoryCollectionName, | |||
| externalSourceName: "GitHub", | |||
| externalId: entry.Key, | |||
| description: entry.Value, | |||
| text: entry.Value); | |||
| Console.WriteLine($"#{++i} saved."); | |||
| Console.WriteLine(result); | |||
| } | |||
| Console.WriteLine("\n----------------------"); | |||
| } | |||
| private static Dictionary<string, string> SampleData() | |||
| { | |||
| return new Dictionary<string, string> | |||
| { | |||
| ["https://github.com/microsoft/semantic-kernel/blob/main/README.md"] | |||
| = "README: Installation, getting started, and how to contribute", | |||
| ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/notebooks/02-running-prompts-from-file.ipynb"] | |||
| = "Jupyter notebook describing how to pass prompts from a file to a semantic skill or function", | |||
| ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/notebooks//00-getting-started.ipynb"] | |||
| = "Jupyter notebook describing how to get started with the Semantic Kernel", | |||
| ["https://github.com/microsoft/semantic-kernel/tree/main/samples/skills/ChatSkill/ChatGPT"] | |||
| = "Sample demonstrating how to create a chat skill interfacing with ChatGPT", | |||
| ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel/Memory/VolatileMemoryStore.cs"] | |||
| = "C# class that defines a volatile embedding store", | |||
| ["https://github.com/microsoft/semantic-kernel/blob/main/samples/dotnet/KernelHttpServer/README.md"] | |||
| = "README: How to set up a Semantic Kernel Service API using Azure Function Runtime v4", | |||
| ["https://github.com/microsoft/semantic-kernel/blob/main/samples/apps/chat-summary-webapp-react/README.md"] | |||
| = "README: README associated with a sample chat summary react-based webapp", | |||
| }; | |||
| } | |||
| } | |||
| ``` | |||
| @@ -0,0 +1,55 @@ | |||
| # Semantic kernel mode | |||
| ```cs | |||
| using System.Security.Cryptography; | |||
| using LLama.Common; | |||
| using LLamaSharp.SemanticKernel.ChatCompletion; | |||
| using Microsoft.SemanticKernel; | |||
| using LLamaSharp.SemanticKernel.TextCompletion; | |||
| using Microsoft.SemanticKernel.TextGeneration; | |||
| using Microsoft.Extensions.DependencyInjection; | |||
| public class SemanticKernelPrompt | |||
| { | |||
| public static async Task Run() | |||
| { | |||
| Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md"); | |||
| Console.Write("Please input your model path: "); | |||
| var modelPath = Console.ReadLine(); | |||
| // Load weights into memory | |||
| var parameters = new ModelParams(modelPath); | |||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||
| var ex = new StatelessExecutor(model, parameters); | |||
| var builder = Kernel.CreateBuilder(); | |||
| builder.Services.AddKeyedSingleton<ITextGenerationService>("local-llama", new LLamaSharpTextCompletion(ex)); | |||
| var kernel = builder.Build(); | |||
| var prompt = @"{{$input}} | |||
| One line TLDR with the fewest words."; | |||
| ChatRequestSettings settings = new() { MaxTokens = 100 }; | |||
| var summarize = kernel.CreateFunctionFromPrompt(prompt, settings); | |||
| string text1 = @" | |||
| 1st Law of Thermodynamics - Energy cannot be created or destroyed. | |||
| 2nd Law of Thermodynamics - For a spontaneous process, the entropy of the universe increases. | |||
| 3rd Law of Thermodynamics - A perfect crystal at zero Kelvin has zero entropy."; | |||
| string text2 = @" | |||
| 1. An object at rest remains at rest, and an object in motion remains in motion at constant speed and in a straight line unless acted on by an unbalanced force. | |||
| 2. The acceleration of an object depends on the mass of the object and the amount of force applied. | |||
| 3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first."; | |||
| Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text1 })).GetValue<string>()); | |||
| Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text2 })).GetValue<string>()); | |||
| } | |||
| } | |||
| ``` | |||
| @@ -0,0 +1,73 @@ | |||
| # Talk to yourself | |||
| ```cs | |||
| using System.Security.Cryptography; | |||
| using System.Text; | |||
| using LLama.Abstractions; | |||
| using LLama.Common; | |||
| public class TalkToYourself | |||
| { | |||
| public static async Task Run() | |||
| { | |||
| Console.Write("Please input your model path: "); | |||
| var modelPath = Console.ReadLine(); | |||
| // Load weights into memory | |||
| var @params = new ModelParams(modelPath); | |||
| using var weights = LLamaWeights.LoadFromFile(@params); | |||
| // Create 2 contexts sharing the same weights | |||
| using var aliceCtx = weights.CreateContext(@params); | |||
| var alice = new InteractiveExecutor(aliceCtx); | |||
| using var bobCtx = weights.CreateContext(@params); | |||
| var bob = new InteractiveExecutor(bobCtx); | |||
| // Initial alice prompt | |||
| var alicePrompt = "Transcript of a dialog, where the Alice interacts a person named Bob. Alice is friendly, kind, honest and good at writing.\nAlice: Hello"; | |||
| var aliceResponse = await Prompt(alice, ConsoleColor.Green, alicePrompt, false, false); | |||
| // Initial bob prompt | |||
| var bobPrompt = $"Transcript of a dialog, where the Bob interacts a person named Alice. Bob is smart, intellectual and good at writing.\nAlice: Hello{aliceResponse}"; | |||
| var bobResponse = await Prompt(bob, ConsoleColor.Red, bobPrompt, true, true); | |||
| // swap back and forth from Alice to Bob | |||
| while (true) | |||
| { | |||
| aliceResponse = await Prompt(alice, ConsoleColor.Green, bobResponse, false, true); | |||
| bobResponse = await Prompt(bob, ConsoleColor.Red, aliceResponse, false, true); | |||
| if (Console.KeyAvailable) | |||
| break; | |||
| } | |||
| } | |||
| private static async Task<string> Prompt(ILLamaExecutor executor, ConsoleColor color, string prompt, bool showPrompt, bool showResponse) | |||
| { | |||
| var inferenceParams = new InferenceParams | |||
| { | |||
| Temperature = 0.9f, | |||
| AntiPrompts = new List<string> { "Alice:", "Bob:", "User:" }, | |||
| MaxTokens = 128, | |||
| Mirostat = MirostatType.Mirostat2, | |||
| MirostatTau = 10, | |||
| }; | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| if (showPrompt) | |||
| Console.Write(prompt); | |||
| Console.ForegroundColor = color; | |||
| var builder = new StringBuilder(); | |||
| await foreach (var text in executor.InferAsync(prompt, inferenceParams)) | |||
| { | |||
| builder.Append(text); | |||
| if (showResponse) | |||
| Console.Write(text); | |||
| } | |||
| return builder.ToString(); | |||
| } | |||
| } | |||
| ``` | |||