| @@ -0,0 +1,170 @@ | |||||
| # Batch decoding | |||||
| ```cs | |||||
| using System.Diagnostics; | |||||
| using System.Text; | |||||
| using LLama.Common; | |||||
| using LLama.Native; | |||||
| using LLama.Sampling; | |||||
| public class BatchedDecoding | |||||
| { | |||||
| private const int n_parallel = 8; | |||||
| private const int n_len = 32; | |||||
| public static async Task Run() | |||||
| { | |||||
| Console.Write("Please input your model path: "); | |||||
| var modelPath = Console.ReadLine(); | |||||
| Console.WriteLine("Prompt (leave blank to select automatically):"); | |||||
| var prompt = Console.ReadLine(); | |||||
| if (string.IsNullOrWhiteSpace(prompt)) | |||||
| prompt = "Not many people know that"; | |||||
| // Load model | |||||
| var parameters = new ModelParams(modelPath); | |||||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||||
| // Tokenize prompt | |||||
| var prompt_tokens = model.Tokenize(prompt, true, false, Encoding.UTF8); | |||||
| var n_kv_req = prompt_tokens.Length + (n_len - prompt_tokens.Length) * n_parallel; | |||||
| // Create a context | |||||
| parameters.ContextSize = (uint)model.ContextSize; | |||||
| parameters.BatchSize = (uint)Math.Max(n_len, n_parallel); | |||||
| using var context = model.CreateContext(parameters); | |||||
| var n_ctx = context.ContextSize; | |||||
| // make sure the KV cache is big enough to hold all the prompt and generated tokens | |||||
| if (n_kv_req > n_ctx) | |||||
| { | |||||
| await Console.Error.WriteLineAsync($"error: n_kv_req ({n_kv_req}) > n_ctx, the required KV cache size is not big enough\n"); | |||||
| await Console.Error.WriteLineAsync(" either reduce n_parallel or increase n_ctx\n"); | |||||
| return; | |||||
| } | |||||
| var batch = new LLamaBatch(); | |||||
| // evaluate the initial prompt | |||||
| batch.AddRange(prompt_tokens, 0, LLamaSeqId.Zero, true); | |||||
| if (await context.DecodeAsync(batch) != DecodeResult.Ok) | |||||
| { | |||||
| await Console.Error.WriteLineAsync("llama_decode failed"); | |||||
| return; | |||||
| } | |||||
| // assign the system KV cache to all parallel sequences | |||||
| // this way, the parallel sequences will "reuse" the prompt tokens without having to copy them | |||||
| for (var i = 1; i < n_parallel; ++i) | |||||
| { | |||||
| context.NativeHandle.KvCacheSequenceCopy((LLamaSeqId)0, (LLamaSeqId)i, 0, batch.TokenCount); | |||||
| } | |||||
| if (n_parallel > 1) | |||||
| { | |||||
| Console.WriteLine(); | |||||
| Console.WriteLine($"generating {n_parallel} sequences..."); | |||||
| } | |||||
| // remember the batch index of the last token for each parallel sequence | |||||
| // we need this to determine which logits to sample from | |||||
| List<int> i_batch = new(); | |||||
| for (var i = 0; i < n_parallel; i++) | |||||
| i_batch.Add(batch.TokenCount - 1); | |||||
| // Create per-stream decoder and sampler | |||||
| var decoders = new StreamingTokenDecoder[n_parallel]; | |||||
| var samplers = new ISamplingPipeline[n_parallel]; | |||||
| for (var i = 0; i < n_parallel; i++) | |||||
| { | |||||
| decoders[i] = new StreamingTokenDecoder(context); | |||||
| samplers[i] = new DefaultSamplingPipeline | |||||
| { | |||||
| Temperature = 0.1f + (float)i / n_parallel, | |||||
| MinP = 0.25f, | |||||
| }; | |||||
| } | |||||
| var n_cur = batch.TokenCount; | |||||
| var n_decode = 0; | |||||
| var timer = new Stopwatch(); | |||||
| timer.Start(); | |||||
| while (n_cur <= n_len) | |||||
| { | |||||
| batch.Clear(); | |||||
| for (var i = 0; i < n_parallel; i++) | |||||
| { | |||||
| // Skip completed streams | |||||
| if (i_batch[i] < 0) | |||||
| continue; | |||||
| // Use the sampling pipeline to select a token | |||||
| var new_token_id = samplers[i].Sample( | |||||
| context.NativeHandle, | |||||
| context.NativeHandle.GetLogitsIth(i_batch[i]), | |||||
| Array.Empty<LLamaToken>() | |||||
| ); | |||||
| // Finish this stream early if necessary | |||||
| if (new_token_id == model.EndOfSentenceToken || new_token_id == model.NewlineToken) | |||||
| { | |||||
| i_batch[i] = -1; | |||||
| Console.WriteLine($"Completed Stream {i} early"); | |||||
| continue; | |||||
| } | |||||
| // Add this token to the decoder, so it will be turned into text | |||||
| decoders[i].Add(new_token_id); | |||||
| i_batch[i] = batch.TokenCount; | |||||
| // push this new token for next evaluation | |||||
| batch.Add(new_token_id, n_cur, (LLamaSeqId)i, true); | |||||
| n_decode++; | |||||
| } | |||||
| // Check if all streams are finished | |||||
| if (batch.TokenCount == 0) | |||||
| { | |||||
| break; | |||||
| } | |||||
| n_cur++; | |||||
| // evaluate the current batch with the transformer model | |||||
| if (await context.DecodeAsync(batch) != 0) | |||||
| { | |||||
| await Console.Error.WriteLineAsync("failed to eval"); | |||||
| return; | |||||
| } | |||||
| } | |||||
| timer.Stop(); | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||||
| Console.WriteLine(); | |||||
| Console.WriteLine($"Decoded {n_decode} tokens in {timer.ElapsedMilliseconds}ms"); | |||||
| Console.WriteLine($"Rate: {n_decode / timer.Elapsed.TotalSeconds:##.000} tokens/second"); | |||||
| var index = 0; | |||||
| foreach (var stream in decoders) | |||||
| { | |||||
| var text = stream.Read(); | |||||
| Console.ForegroundColor = ConsoleColor.Green; | |||||
| Console.Write($"{index++}. {prompt}"); | |||||
| Console.ForegroundColor = ConsoleColor.Red; | |||||
| Console.WriteLine(text); | |||||
| } | |||||
| Console.WriteLine("Press any key to exit demo"); | |||||
| Console.ReadKey(true); | |||||
| } | |||||
| } | |||||
| ``` | |||||
| @@ -0,0 +1,125 @@ | |||||
| # Chat Chinese | |||||
| ```cs | |||||
| using System.Text; | |||||
| using LLama.Common; | |||||
| public class ChatChineseGB2312 | |||||
| { | |||||
| private static string ConvertEncoding(string input, Encoding original, Encoding target) | |||||
| { | |||||
| byte[] bytes = original.GetBytes(input); | |||||
| var convertedBytes = Encoding.Convert(original, target, bytes); | |||||
| return target.GetString(convertedBytes); | |||||
| } | |||||
| public static async Task Run() | |||||
| { | |||||
| // Register provider for GB2312 encoding | |||||
| Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||||
| Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" + | |||||
| " to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers."); | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| Console.Write("Please input your model path: "); | |||||
| var modelPath = Console.ReadLine(); | |||||
| var parameters = new ModelParams(modelPath) | |||||
| { | |||||
| ContextSize = 1024, | |||||
| Seed = 1337, | |||||
| GpuLayerCount = 5, | |||||
| Encoding = Encoding.UTF8 | |||||
| }; | |||||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||||
| using var context = model.CreateContext(parameters); | |||||
| var executor = new InteractiveExecutor(context); | |||||
| ChatSession session; | |||||
| if (Directory.Exists("Assets/chat-with-kunkun-chinese")) | |||||
| { | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||||
| Console.WriteLine("Loading session from disk."); | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| session = new ChatSession(executor); | |||||
| session.LoadSession("Assets/chat-with-kunkun-chinese"); | |||||
| } | |||||
| else | |||||
| { | |||||
| var chatHistoryJson = File.ReadAllText("Assets/chat-with-kunkun-chinese.json"); | |||||
| ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory(); | |||||
| session = new ChatSession(executor, chatHistory); | |||||
| } | |||||
| session | |||||
| .WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户", "坤坤")); | |||||
| InferenceParams inferenceParams = new InferenceParams() | |||||
| { | |||||
| Temperature = 0.9f, | |||||
| AntiPrompts = new List<string> { "用户:" } | |||||
| }; | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||||
| Console.WriteLine("The chat session has started."); | |||||
| // show the prompt | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| Console.Write("用户:"); | |||||
| Console.ForegroundColor = ConsoleColor.Green; | |||||
| string userInput = Console.ReadLine() ?? ""; | |||||
| while (userInput != "exit") | |||||
| { | |||||
| // Convert the encoding from gb2312 to utf8 for the language model | |||||
| // and later saving to the history json file. | |||||
| userInput = ConvertEncoding(userInput, Encoding.GetEncoding("gb2312"), Encoding.UTF8); | |||||
| if (userInput == "save") | |||||
| { | |||||
| session.SaveSession("Assets/chat-with-kunkun-chinese"); | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||||
| Console.WriteLine("Session saved."); | |||||
| } | |||||
| else if (userInput == "regenerate") | |||||
| { | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||||
| Console.WriteLine("Regenerating last response ..."); | |||||
| await foreach ( | |||||
| var text | |||||
| in session.RegenerateAssistantMessageAsync( | |||||
| inferenceParams)) | |||||
| { | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| // Convert the encoding from utf8 to gb2312 for the console output. | |||||
| Console.Write(ConvertEncoding(text, Encoding.UTF8, Encoding.GetEncoding("gb2312"))); | |||||
| } | |||||
| } | |||||
| else | |||||
| { | |||||
| await foreach ( | |||||
| var text | |||||
| in session.ChatAsync( | |||||
| new ChatHistory.Message(AuthorRole.User, userInput), | |||||
| inferenceParams)) | |||||
| { | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| Console.Write(text); | |||||
| } | |||||
| } | |||||
| Console.ForegroundColor = ConsoleColor.Green; | |||||
| userInput = Console.ReadLine() ?? ""; | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| } | |||||
| } | |||||
| } | |||||
| ``` | |||||
| @@ -13,24 +13,54 @@ public class ChatSessionStripRoleName | |||||
| public static void Run() | public static void Run() | ||||
| { | { | ||||
| Console.Write("Please input your model path: "); | Console.Write("Please input your model path: "); | ||||
| string modelPath = Console.ReadLine(); | |||||
| var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim(); | |||||
| InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5))); | |||||
| ChatSession session = new ChatSession(ex).WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { "User:", "Bob:" }, redundancyLength: 8)); | |||||
| var modelPath = Console.ReadLine(); | |||||
| var parameters = new ModelParams(modelPath) | |||||
| { | |||||
| ContextSize = 1024, | |||||
| Seed = 1337, | |||||
| GpuLayerCount = 5 | |||||
| }; | |||||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||||
| using var context = model.CreateContext(parameters); | |||||
| var executor = new InteractiveExecutor(context); | |||||
| var chatHistoryJson = File.ReadAllText("Assets/chat-with-bob.json"); | |||||
| ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory(); | |||||
| ChatSession session = new(executor, chatHistory); | |||||
| session.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform( | |||||
| new string[] { "User:", "Assistant:" }, | |||||
| redundancyLength: 8)); | |||||
| InferenceParams inferenceParams = new InferenceParams() | |||||
| { | |||||
| Temperature = 0.9f, | |||||
| AntiPrompts = new List<string> { "User:" } | |||||
| }; | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | Console.ForegroundColor = ConsoleColor.Yellow; | ||||
| Console.WriteLine("The chat session has started. The role names won't be printed."); | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| Console.WriteLine("The chat session has started."); | |||||
| while (true) | |||||
| // show the prompt | |||||
| Console.ForegroundColor = ConsoleColor.Green; | |||||
| string userInput = Console.ReadLine() ?? ""; | |||||
| while (userInput != "exit") | |||||
| { | { | ||||
| foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } })) | |||||
| await foreach ( | |||||
| var text | |||||
| in session.ChatAsync( | |||||
| new ChatHistory.Message(AuthorRole.User, userInput), | |||||
| inferenceParams)) | |||||
| { | { | ||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| Console.Write(text); | Console.Write(text); | ||||
| } | } | ||||
| Console.ForegroundColor = ConsoleColor.Green; | Console.ForegroundColor = ConsoleColor.Green; | ||||
| prompt = Console.ReadLine(); | |||||
| userInput = Console.ReadLine() ?? ""; | |||||
| Console.ForegroundColor = ConsoleColor.White; | Console.ForegroundColor = ConsoleColor.White; | ||||
| } | } | ||||
| } | } | ||||
| @@ -0,0 +1,104 @@ | |||||
| # Chat session with history | |||||
| ```cs | |||||
| using LLama.Common; | |||||
| namespace LLama.Examples.Examples; | |||||
| public class ChatSessionWithHistory | |||||
| { | |||||
| public static async Task Run() | |||||
| { | |||||
| Console.Write("Please input your model path: "); | |||||
| var modelPath = Console.ReadLine(); | |||||
| var parameters = new ModelParams(modelPath) | |||||
| { | |||||
| ContextSize = 1024, | |||||
| Seed = 1337, | |||||
| GpuLayerCount = 5 | |||||
| }; | |||||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||||
| using var context = model.CreateContext(parameters); | |||||
| var executor = new InteractiveExecutor(context); | |||||
| ChatSession session; | |||||
| if (Directory.Exists("Assets/chat-with-bob")) | |||||
| { | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||||
| Console.WriteLine("Loading session from disk."); | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| session = new ChatSession(executor); | |||||
| session.LoadSession("Assets/chat-with-bob"); | |||||
| } | |||||
| else | |||||
| { | |||||
| var chatHistoryJson = File.ReadAllText("Assets/chat-with-bob.json"); | |||||
| ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory(); | |||||
| session = new ChatSession(executor, chatHistory); | |||||
| } | |||||
| session.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform( | |||||
| new string[] { "User:", "Assistant:" }, | |||||
| redundancyLength: 8)); | |||||
| InferenceParams inferenceParams = new InferenceParams() | |||||
| { | |||||
| Temperature = 0.9f, | |||||
| AntiPrompts = new List<string> { "User:" } | |||||
| }; | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||||
| Console.WriteLine("The chat session has started."); | |||||
| // show the prompt | |||||
| Console.ForegroundColor = ConsoleColor.Green; | |||||
| string userInput = Console.ReadLine() ?? ""; | |||||
| while (userInput != "exit") | |||||
| { | |||||
| if (userInput == "save") | |||||
| { | |||||
| session.SaveSession("Assets/chat-with-bob"); | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||||
| Console.WriteLine("Session saved."); | |||||
| } | |||||
| else if (userInput == "regenerate") | |||||
| { | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||||
| Console.WriteLine("Regenerating last response ..."); | |||||
| await foreach ( | |||||
| var text | |||||
| in session.RegenerateAssistantMessageAsync( | |||||
| inferenceParams)) | |||||
| { | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| Console.Write(text); | |||||
| } | |||||
| } | |||||
| else | |||||
| { | |||||
| await foreach ( | |||||
| var text | |||||
| in session.ChatAsync( | |||||
| new ChatHistory.Message(AuthorRole.User, userInput), | |||||
| inferenceParams)) | |||||
| { | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| Console.Write(text); | |||||
| } | |||||
| } | |||||
| Console.ForegroundColor = ConsoleColor.Green; | |||||
| userInput = Console.ReadLine() ?? ""; | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| } | |||||
| } | |||||
| } | |||||
| ``` | |||||
| @@ -13,26 +13,51 @@ public class ChatSessionWithRoleName | |||||
| public static void Run() | public static void Run() | ||||
| { | { | ||||
| Console.Write("Please input your model path: "); | Console.Write("Please input your model path: "); | ||||
| string modelPath = Console.ReadLine(); | |||||
| var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim(); | |||||
| InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5))); | |||||
| ChatSession session = new ChatSession(ex); // The only change is to remove the transform for the output text stream. | |||||
| var modelPath = Console.ReadLine(); | |||||
| var parameters = new ModelParams(modelPath) | |||||
| { | |||||
| ContextSize = 1024, | |||||
| Seed = 1337, | |||||
| GpuLayerCount = 5 | |||||
| }; | |||||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||||
| using var context = model.CreateContext(parameters); | |||||
| var executor = new InteractiveExecutor(context); | |||||
| var chatHistoryJson = File.ReadAllText("Assets/chat-with-bob.json"); | |||||
| ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory(); | |||||
| ChatSession session = new(executor, chatHistory); | |||||
| InferenceParams inferenceParams = new InferenceParams() | |||||
| { | |||||
| Temperature = 0.9f, | |||||
| AntiPrompts = new List<string> { "User:" } | |||||
| }; | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | Console.ForegroundColor = ConsoleColor.Yellow; | ||||
| Console.WriteLine("The chat session has started. In this example, the prompt is printed for better visual result."); | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| Console.WriteLine("The chat session has started."); | |||||
| // show the prompt | // show the prompt | ||||
| Console.Write(prompt); | |||||
| while (true) | |||||
| Console.ForegroundColor = ConsoleColor.Green; | |||||
| string userInput = Console.ReadLine() ?? ""; | |||||
| while (userInput != "exit") | |||||
| { | { | ||||
| foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } })) | |||||
| await foreach ( | |||||
| var text | |||||
| in session.ChatAsync( | |||||
| new ChatHistory.Message(AuthorRole.User, userInput), | |||||
| inferenceParams)) | |||||
| { | { | ||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| Console.Write(text); | Console.Write(text); | ||||
| } | } | ||||
| Console.ForegroundColor = ConsoleColor.Green; | Console.ForegroundColor = ConsoleColor.Green; | ||||
| prompt = Console.ReadLine(); | |||||
| userInput = Console.ReadLine() ?? ""; | |||||
| Console.ForegroundColor = ConsoleColor.White; | Console.ForegroundColor = ConsoleColor.White; | ||||
| } | } | ||||
| } | } | ||||
| @@ -0,0 +1,97 @@ | |||||
| # Coding Assistant | |||||
| ```cs | |||||
| using LLama.Common; | |||||
| using System; | |||||
| using System.Reflection; | |||||
| internal class CodingAssistant | |||||
| { | |||||
| const string DefaultModelUri = "https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_S.gguf"; | |||||
| // Source paper with example prompts: | |||||
| // https://doi.org/10.48550/arXiv.2308.12950 | |||||
| const string InstructionPrefix = "[INST]"; | |||||
| const string InstructionSuffix = "[/INST]"; | |||||
| const string SystemInstruction = "You're an intelligent, concise coding assistant. Wrap code in ``` for readability. Don't repeat yourself. Use best practice and good coding standards."; | |||||
| private static string ModelsDirectory = Path.Combine(Directory.GetParent(Assembly.GetExecutingAssembly().Location)!.FullName, "Models"); | |||||
| public static async Task Run() | |||||
| { | |||||
| Console.Write("Please input your model path (if left empty, a default model will be downloaded for you): "); | |||||
| var modelPath = Console.ReadLine(); | |||||
| if(string.IsNullOrWhiteSpace(modelPath) ) | |||||
| { | |||||
| modelPath = await GetDefaultModel(); | |||||
| } | |||||
| var parameters = new ModelParams(modelPath) | |||||
| { | |||||
| ContextSize = 4096 | |||||
| }; | |||||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||||
| using var context = model.CreateContext(parameters); | |||||
| var executor = new InstructExecutor(context, InstructionPrefix, InstructionSuffix, null); | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||||
| Console.WriteLine("The executor has been enabled. In this example, the LLM will follow your instructions." + | |||||
| "\nIt's a 7B Code Llama, so it's trained for programming tasks like \"Write a C# function reading a file name from a given URI\" or \"Write some programming interview questions\"." + | |||||
| "\nWrite 'exit' to exit"); | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| var inferenceParams = new InferenceParams() { | |||||
| Temperature = 0.8f, | |||||
| MaxTokens = -1, | |||||
| }; | |||||
| string instruction = $"{SystemInstruction}\n\n"; | |||||
| await Console.Out.WriteAsync("Instruction: "); | |||||
| instruction += Console.ReadLine() ?? "Ask me for instructions."; | |||||
| while (instruction != "exit") | |||||
| { | |||||
| Console.ForegroundColor = ConsoleColor.Green; | |||||
| await foreach (var text in executor.InferAsync(instruction + System.Environment.NewLine, inferenceParams)) | |||||
| { | |||||
| Console.Write(text); | |||||
| } | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| await Console.Out.WriteAsync("Instruction: "); | |||||
| instruction = Console.ReadLine() ?? "Ask me for instructions."; | |||||
| } | |||||
| } | |||||
| private static async Task<string> GetDefaultModel() | |||||
| { | |||||
| var uri = new Uri(DefaultModelUri); | |||||
| var modelName = uri.Segments[^1]; | |||||
| await Console.Out.WriteLineAsync($"The following model will be used: {modelName}"); | |||||
| var modelPath = Path.Combine(ModelsDirectory, modelName); | |||||
| if(!Directory.Exists(ModelsDirectory)) | |||||
| { | |||||
| Directory.CreateDirectory(ModelsDirectory); | |||||
| } | |||||
| if (File.Exists(modelPath)) | |||||
| { | |||||
| await Console.Out.WriteLineAsync($"Existing model found, using {modelPath}"); | |||||
| } | |||||
| else | |||||
| { | |||||
| await Console.Out.WriteLineAsync($"Model not found locally, downloading {DefaultModelUri}..."); | |||||
| using var http = new HttpClient(); | |||||
| await using var downloadStream = await http.GetStreamAsync(uri); | |||||
| await using var fileStream = new FileStream(modelPath, FileMode.Create, FileAccess.Write); | |||||
| await downloadStream.CopyToAsync(fileStream); | |||||
| await Console.Out.WriteLineAsync($"Model downloaded and saved to {modelPath}"); | |||||
| } | |||||
| return modelPath; | |||||
| } | |||||
| } | |||||
| ``` | |||||
| @@ -0,0 +1,55 @@ | |||||
| # Grammer json response | |||||
| ```cs | |||||
| using LLama.Common; | |||||
| using LLama.Grammars; | |||||
| public class GrammarJsonResponse | |||||
| { | |||||
| public static async Task Run() | |||||
| { | |||||
| var gbnf = (await File.ReadAllTextAsync("Assets/json.gbnf")).Trim(); | |||||
| var grammar = Grammar.Parse(gbnf, "root"); | |||||
| Console.Write("Please input your model path: "); | |||||
| var modelPath = Console.ReadLine(); | |||||
| var parameters = new ModelParams(modelPath) | |||||
| { | |||||
| ContextSize = 1024, | |||||
| Seed = 1337, | |||||
| GpuLayerCount = 5 | |||||
| }; | |||||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||||
| var ex = new StatelessExecutor(model, parameters); | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||||
| Console.WriteLine("The executor has been enabled. In this example, the LLM will follow your instructions and always respond in a JSON format. For example, you can input \"Tell me the attributes of a good dish\""); | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| using var grammarInstance = grammar.CreateInstance(); | |||||
| var inferenceParams = new InferenceParams() | |||||
| { | |||||
| Temperature = 0.6f, | |||||
| AntiPrompts = new List<string> { "Question:", "#", "Question: ", ".\n" }, | |||||
| MaxTokens = 50, | |||||
| Grammar = grammarInstance | |||||
| }; | |||||
| while (true) | |||||
| { | |||||
| Console.Write("\nQuestion: "); | |||||
| Console.ForegroundColor = ConsoleColor.Green; | |||||
| var prompt = Console.ReadLine(); | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| Console.Write("Answer: "); | |||||
| prompt = $"Question: {prompt?.Trim()} Answer: "; | |||||
| await foreach (var text in ex.InferAsync(prompt, inferenceParams)) | |||||
| { | |||||
| Console.Write(text); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| ``` | |||||
| @@ -0,0 +1,62 @@ | |||||
| # Kernel memory | |||||
| ```cs | |||||
| using System; | |||||
| using System.Collections.Generic; | |||||
| using System.Linq; | |||||
| using System.Text; | |||||
| using System.Threading.Tasks; | |||||
| using LLamaSharp.KernelMemory; | |||||
| using Microsoft.KernelMemory; | |||||
| using Microsoft.KernelMemory.Configuration; | |||||
| using Microsoft.KernelMemory.Handlers; | |||||
| public class KernelMemory | |||||
| { | |||||
| public static async Task Run() | |||||
| { | |||||
| Console.WriteLine("Example from: https://github.com/microsoft/kernel-memory/blob/main/examples/101-using-core-nuget/Program.cs"); | |||||
| Console.Write("Please input your model path: "); | |||||
| var modelPath = Console.ReadLine(); | |||||
| var searchClientConfig = new SearchClientConfig | |||||
| { | |||||
| MaxMatchesCount = 1, | |||||
| AnswerTokens = 100, | |||||
| }; | |||||
| var memory = new KernelMemoryBuilder() | |||||
| .WithLLamaSharpDefaults(new LLamaSharpConfig(modelPath) | |||||
| { | |||||
| DefaultInferenceParams = new Common.InferenceParams | |||||
| { | |||||
| AntiPrompts = new List<string> { "\n\n" } | |||||
| } | |||||
| }) | |||||
| .WithSearchClientConfig(searchClientConfig) | |||||
| .With(new TextPartitioningOptions | |||||
| { | |||||
| MaxTokensPerParagraph = 300, | |||||
| MaxTokensPerLine = 100, | |||||
| OverlappingTokens = 30 | |||||
| }) | |||||
| .Build(); | |||||
| await memory.ImportDocumentAsync(@"./Assets/sample-SK-Readme.pdf", steps: Constants.PipelineWithoutSummary); | |||||
| var question = "What's Semantic Kernel?"; | |||||
| Console.WriteLine($"\n\nQuestion: {question}"); | |||||
| var answer = await memory.AskAsync(question); | |||||
| Console.WriteLine($"\nAnswer: {answer.Result}"); | |||||
| Console.WriteLine("\n\n Sources:\n"); | |||||
| foreach (var x in answer.RelevantSources) | |||||
| { | |||||
| Console.WriteLine($" - {x.SourceName} - {x.Link} [{x.Partitions.First().LastUpdate:D}]"); | |||||
| } | |||||
| } | |||||
| } | |||||
| ``` | |||||
| @@ -0,0 +1,170 @@ | |||||
| # Semantic kernel memory | |||||
| ```cs | |||||
| using LLama.Common; | |||||
| using Microsoft.SemanticKernel; | |||||
| using Microsoft.SemanticKernel.Memory; | |||||
| using LLamaSharp.SemanticKernel.TextEmbedding; | |||||
| using Microsoft.SemanticKernel.AI.Embeddings; | |||||
| using Microsoft.SemanticKernel.Plugins.Memory; | |||||
| public class SemanticKernelMemory | |||||
| { | |||||
| private const string MemoryCollectionName = "SKGitHub"; | |||||
| public static async Task Run() | |||||
| { | |||||
| var loggerFactory = ConsoleLogger.LoggerFactory; | |||||
| Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example14_SemanticMemory.cs"); | |||||
| Console.Write("Please input your model path: "); | |||||
| var modelPath = Console.ReadLine(); | |||||
| var seed = 1337u; | |||||
| // Load weights into memory | |||||
| var parameters = new ModelParams(modelPath) | |||||
| { | |||||
| Seed = seed, | |||||
| EmbeddingMode = true | |||||
| }; | |||||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||||
| var embedding = new LLamaEmbedder(model, parameters); | |||||
| Console.WriteLine("===================================================="); | |||||
| Console.WriteLine("======== Semantic Memory (volatile, in RAM) ========"); | |||||
| Console.WriteLine("===================================================="); | |||||
| /* You can build your own semantic memory combining an Embedding Generator | |||||
| * with a Memory storage that supports search by similarity (ie semantic search). | |||||
| * | |||||
| * In this example we use a volatile memory, a local simulation of a vector DB. | |||||
| * | |||||
| * You can replace VolatileMemoryStore with Qdrant (see QdrantMemoryStore connector) | |||||
| * or implement your connectors for Pinecone, Vespa, Postgres + pgvector, SQLite VSS, etc. | |||||
| */ | |||||
| var memory = new MemoryBuilder() | |||||
| .WithTextEmbeddingGeneration(new LLamaSharpEmbeddingGeneration(embedding)) | |||||
| .WithMemoryStore(new VolatileMemoryStore()) | |||||
| .Build(); | |||||
| await RunExampleAsync(memory); | |||||
| } | |||||
| private static async Task RunExampleAsync(ISemanticTextMemory memory) | |||||
| { | |||||
| await StoreMemoryAsync(memory); | |||||
| await SearchMemoryAsync(memory, "How do I get started?"); | |||||
| /* | |||||
| Output: | |||||
| Query: How do I get started? | |||||
| Result 1: | |||||
| URL: : https://github.com/microsoft/semantic-kernel/blob/main/README.md | |||||
| Title : README: Installation, getting started, and how to contribute | |||||
| Result 2: | |||||
| URL: : https://github.com/microsoft/semantic-kernel/blob/main/samples/dotnet-jupyter-notebooks/00-getting-started.ipynb | |||||
| Title : Jupyter notebook describing how to get started with the Semantic Kernel | |||||
| */ | |||||
| await SearchMemoryAsync(memory, "Can I build a chat with SK?"); | |||||
| /* | |||||
| Output: | |||||
| Query: Can I build a chat with SK? | |||||
| Result 1: | |||||
| URL: : https://github.com/microsoft/semantic-kernel/tree/main/samples/skills/ChatSkill/ChatGPT | |||||
| Title : Sample demonstrating how to create a chat skill interfacing with ChatGPT | |||||
| Result 2: | |||||
| URL: : https://github.com/microsoft/semantic-kernel/blob/main/samples/apps/chat-summary-webapp-react/README.md | |||||
| Title : README: README associated with a sample chat summary react-based webapp | |||||
| */ | |||||
| await SearchMemoryAsync(memory, "Jupyter notebook"); | |||||
| await SearchMemoryAsync(memory, "README: README associated with a sample chat summary react-based webapp"); | |||||
| await SearchMemoryAsync(memory, "Jupyter notebook describing how to pass prompts from a file to a semantic skill or function"); | |||||
| } | |||||
| private static async Task SearchMemoryAsync(ISemanticTextMemory memory, string query) | |||||
| { | |||||
| Console.WriteLine("\nQuery: " + query + "\n"); | |||||
| var memories = memory.SearchAsync(MemoryCollectionName, query, limit: 10, minRelevanceScore: 0.5); | |||||
| int i = 0; | |||||
| await foreach (MemoryQueryResult result in memories) | |||||
| { | |||||
| Console.WriteLine($"Result {++i}:"); | |||||
| Console.WriteLine(" URL: : " + result.Metadata.Id); | |||||
| Console.WriteLine(" Title : " + result.Metadata.Description); | |||||
| Console.WriteLine(" Relevance: " + result.Relevance); | |||||
| Console.WriteLine(); | |||||
| } | |||||
| Console.WriteLine("----------------------"); | |||||
| } | |||||
| private static async Task StoreMemoryAsync(ISemanticTextMemory memory) | |||||
| { | |||||
| /* Store some data in the semantic memory. | |||||
| * | |||||
| * When using Azure Cognitive Search the data is automatically indexed on write. | |||||
| * | |||||
| * When using the combination of VolatileStore and Embedding generation, SK takes | |||||
| * care of creating and storing the index | |||||
| */ | |||||
| Console.WriteLine("\nAdding some GitHub file URLs and their descriptions to the semantic memory."); | |||||
| var githubFiles = SampleData(); | |||||
| var i = 0; | |||||
| foreach (var entry in githubFiles) | |||||
| { | |||||
| var result = await memory.SaveReferenceAsync( | |||||
| collection: MemoryCollectionName, | |||||
| externalSourceName: "GitHub", | |||||
| externalId: entry.Key, | |||||
| description: entry.Value, | |||||
| text: entry.Value); | |||||
| Console.WriteLine($"#{++i} saved."); | |||||
| Console.WriteLine(result); | |||||
| } | |||||
| Console.WriteLine("\n----------------------"); | |||||
| } | |||||
| private static Dictionary<string, string> SampleData() | |||||
| { | |||||
| return new Dictionary<string, string> | |||||
| { | |||||
| ["https://github.com/microsoft/semantic-kernel/blob/main/README.md"] | |||||
| = "README: Installation, getting started, and how to contribute", | |||||
| ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/notebooks/02-running-prompts-from-file.ipynb"] | |||||
| = "Jupyter notebook describing how to pass prompts from a file to a semantic skill or function", | |||||
| ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/notebooks//00-getting-started.ipynb"] | |||||
| = "Jupyter notebook describing how to get started with the Semantic Kernel", | |||||
| ["https://github.com/microsoft/semantic-kernel/tree/main/samples/skills/ChatSkill/ChatGPT"] | |||||
| = "Sample demonstrating how to create a chat skill interfacing with ChatGPT", | |||||
| ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel/Memory/VolatileMemoryStore.cs"] | |||||
| = "C# class that defines a volatile embedding store", | |||||
| ["https://github.com/microsoft/semantic-kernel/blob/main/samples/dotnet/KernelHttpServer/README.md"] | |||||
| = "README: How to set up a Semantic Kernel Service API using Azure Function Runtime v4", | |||||
| ["https://github.com/microsoft/semantic-kernel/blob/main/samples/apps/chat-summary-webapp-react/README.md"] | |||||
| = "README: README associated with a sample chat summary react-based webapp", | |||||
| }; | |||||
| } | |||||
| } | |||||
| ``` | |||||
| @@ -0,0 +1,55 @@ | |||||
| # Semantic kernel mode | |||||
| ```cs | |||||
| using System.Security.Cryptography; | |||||
| using LLama.Common; | |||||
| using LLamaSharp.SemanticKernel.ChatCompletion; | |||||
| using Microsoft.SemanticKernel; | |||||
| using LLamaSharp.SemanticKernel.TextCompletion; | |||||
| using Microsoft.SemanticKernel.TextGeneration; | |||||
| using Microsoft.Extensions.DependencyInjection; | |||||
| public class SemanticKernelPrompt | |||||
| { | |||||
| public static async Task Run() | |||||
| { | |||||
| Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md"); | |||||
| Console.Write("Please input your model path: "); | |||||
| var modelPath = Console.ReadLine(); | |||||
| // Load weights into memory | |||||
| var parameters = new ModelParams(modelPath); | |||||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||||
| var ex = new StatelessExecutor(model, parameters); | |||||
| var builder = Kernel.CreateBuilder(); | |||||
| builder.Services.AddKeyedSingleton<ITextGenerationService>("local-llama", new LLamaSharpTextCompletion(ex)); | |||||
| var kernel = builder.Build(); | |||||
| var prompt = @"{{$input}} | |||||
| One line TLDR with the fewest words."; | |||||
| ChatRequestSettings settings = new() { MaxTokens = 100 }; | |||||
| var summarize = kernel.CreateFunctionFromPrompt(prompt, settings); | |||||
| string text1 = @" | |||||
| 1st Law of Thermodynamics - Energy cannot be created or destroyed. | |||||
| 2nd Law of Thermodynamics - For a spontaneous process, the entropy of the universe increases. | |||||
| 3rd Law of Thermodynamics - A perfect crystal at zero Kelvin has zero entropy."; | |||||
| string text2 = @" | |||||
| 1. An object at rest remains at rest, and an object in motion remains in motion at constant speed and in a straight line unless acted on by an unbalanced force. | |||||
| 2. The acceleration of an object depends on the mass of the object and the amount of force applied. | |||||
| 3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first."; | |||||
| Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text1 })).GetValue<string>()); | |||||
| Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text2 })).GetValue<string>()); | |||||
| } | |||||
| } | |||||
| ``` | |||||
| @@ -0,0 +1,73 @@ | |||||
| # Talk to yourself | |||||
| ```cs | |||||
| using System.Security.Cryptography; | |||||
| using System.Text; | |||||
| using LLama.Abstractions; | |||||
| using LLama.Common; | |||||
| public class TalkToYourself | |||||
| { | |||||
| public static async Task Run() | |||||
| { | |||||
| Console.Write("Please input your model path: "); | |||||
| var modelPath = Console.ReadLine(); | |||||
| // Load weights into memory | |||||
| var @params = new ModelParams(modelPath); | |||||
| using var weights = LLamaWeights.LoadFromFile(@params); | |||||
| // Create 2 contexts sharing the same weights | |||||
| using var aliceCtx = weights.CreateContext(@params); | |||||
| var alice = new InteractiveExecutor(aliceCtx); | |||||
| using var bobCtx = weights.CreateContext(@params); | |||||
| var bob = new InteractiveExecutor(bobCtx); | |||||
| // Initial alice prompt | |||||
| var alicePrompt = "Transcript of a dialog, where the Alice interacts a person named Bob. Alice is friendly, kind, honest and good at writing.\nAlice: Hello"; | |||||
| var aliceResponse = await Prompt(alice, ConsoleColor.Green, alicePrompt, false, false); | |||||
| // Initial bob prompt | |||||
| var bobPrompt = $"Transcript of a dialog, where the Bob interacts a person named Alice. Bob is smart, intellectual and good at writing.\nAlice: Hello{aliceResponse}"; | |||||
| var bobResponse = await Prompt(bob, ConsoleColor.Red, bobPrompt, true, true); | |||||
| // swap back and forth from Alice to Bob | |||||
| while (true) | |||||
| { | |||||
| aliceResponse = await Prompt(alice, ConsoleColor.Green, bobResponse, false, true); | |||||
| bobResponse = await Prompt(bob, ConsoleColor.Red, aliceResponse, false, true); | |||||
| if (Console.KeyAvailable) | |||||
| break; | |||||
| } | |||||
| } | |||||
| private static async Task<string> Prompt(ILLamaExecutor executor, ConsoleColor color, string prompt, bool showPrompt, bool showResponse) | |||||
| { | |||||
| var inferenceParams = new InferenceParams | |||||
| { | |||||
| Temperature = 0.9f, | |||||
| AntiPrompts = new List<string> { "Alice:", "Bob:", "User:" }, | |||||
| MaxTokens = 128, | |||||
| Mirostat = MirostatType.Mirostat2, | |||||
| MirostatTau = 10, | |||||
| }; | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| if (showPrompt) | |||||
| Console.Write(prompt); | |||||
| Console.ForegroundColor = color; | |||||
| var builder = new StringBuilder(); | |||||
| await foreach (var text in executor.InferAsync(prompt, inferenceParams)) | |||||
| { | |||||
| builder.Append(text); | |||||
| if (showResponse) | |||||
| Console.Write(text); | |||||
| } | |||||
| return builder.ToString(); | |||||
| } | |||||
| } | |||||
| ``` | |||||