diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj index 6a1685ed..a8abe3ae 100644 --- a/LLama.Examples/LLama.Examples.csproj +++ b/LLama.Examples/LLama.Examples.csproj @@ -27,6 +27,11 @@ + + + + + diff --git a/LLama.Examples/NewVersion/SemanticKernelChat.cs b/LLama.Examples/NewVersion/SemanticKernelChat.cs new file mode 100644 index 00000000..9bdbcfec --- /dev/null +++ b/LLama.Examples/NewVersion/SemanticKernelChat.cs @@ -0,0 +1,69 @@ +using System.Reflection.Metadata; +using System.Security.Cryptography; +using System.Text; +using LLama.Abstractions; +using LLama.Common; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.AI.ChatCompletion; +using Microsoft.SemanticKernel.AI.TextCompletion; +using LLamaSharp.SemanticKernel.ChatCompletion; +using LLamaSharp.SemanticKernel.TextCompletion; + +namespace LLama.Examples.NewVersion +{ + public class SemanticKernelChat + { + public static async Task Run() + { + Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md"); + Console.Write("Please input your model path: "); + var modelPath = Console.ReadLine(); + + // Load weights into memory + var parameters = new ModelParams(modelPath) + { + Seed = RandomNumberGenerator.GetInt32(int.MaxValue), + }; + using var model = LLamaWeights.LoadFromFile(parameters); + using var context = model.CreateContext(parameters); + var ex = new InteractiveExecutor(context); + + var chatGPT = new LLamaSharpChatCompletion(ex); + + var chatHistory = chatGPT.CreateNewChat("You are a librarian, expert about books"); + + Console.WriteLine("Chat content:"); + Console.WriteLine("------------------------"); + + chatHistory.AddUserMessage("Hi, I'm looking for book suggestions"); + await MessageOutputAsync(chatHistory); + + // First bot assistant message + string reply = await chatGPT.GenerateMessageAsync(chatHistory); + chatHistory.AddAssistantMessage(reply); + await MessageOutputAsync(chatHistory); + + // Second user message + chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion"); + await MessageOutputAsync(chatHistory); + + // Second bot assistant message + reply = await chatGPT.GenerateMessageAsync(chatHistory); + chatHistory.AddAssistantMessage(reply); + await MessageOutputAsync(chatHistory); + } + + /// + /// Outputs the last message of the chat history + /// + private static Task MessageOutputAsync(Microsoft.SemanticKernel.AI.ChatCompletion.ChatHistory chatHistory) + { + var message = chatHistory.Messages.Last(); + + Console.WriteLine($"{message.Role}: {message.Content}"); + Console.WriteLine("------------------------"); + + return Task.CompletedTask; + } + } +} diff --git a/LLama.Examples/NewVersion/SemanticKernelPrompt.cs b/LLama.Examples/NewVersion/SemanticKernelPrompt.cs new file mode 100644 index 00000000..0482c195 --- /dev/null +++ b/LLama.Examples/NewVersion/SemanticKernelPrompt.cs @@ -0,0 +1,55 @@ +using System.Reflection.Metadata; +using System.Security.Cryptography; +using System.Text; +using LLama.Abstractions; +using LLama.Common; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.AI.ChatCompletion; +using Microsoft.SemanticKernel.AI.TextCompletion; +using LLamaSharp.SemanticKernel.TextCompletion; + +namespace LLama.Examples.NewVersion +{ + public class SemanticKernelPrompt + { + public static async Task Run() + { + Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs"); + Console.Write("Please input your model path: "); + var modelPath = Console.ReadLine(); + + // Load weights into memory + var parameters = new ModelParams(modelPath) + { + Seed = RandomNumberGenerator.GetInt32(int.MaxValue), + }; + using var model = LLamaWeights.LoadFromFile(parameters); + var ex = new StatelessExecutor(model, parameters); + + var builder = new KernelBuilder(); + builder.WithAIService("local-llama", new LLamaSharpTextCompletion(ex), true); + + var kernel = builder.Build(); + + var prompt = @"{{$input}} + +One line TLDR with the fewest words."; + + var summarize = kernel.CreateSemanticFunction(prompt, maxTokens: 100); + + string text1 = @" +1st Law of Thermodynamics - Energy cannot be created or destroyed. +2nd Law of Thermodynamics - For a spontaneous process, the entropy of the universe increases. +3rd Law of Thermodynamics - A perfect crystal at zero Kelvin has zero entropy."; + + string text2 = @" +1. An object at rest remains at rest, and an object in motion remains in motion at constant speed and in a straight line unless acted on by an unbalanced force. +2. The acceleration of an object depends on the mass of the object and the amount of force applied. +3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first."; + + Console.WriteLine(await summarize.InvokeAsync(text1)); + + Console.WriteLine(await summarize.InvokeAsync(text2)); + } + } +} diff --git a/LLama.Examples/NewVersion/TestRunner.cs b/LLama.Examples/NewVersion/TestRunner.cs index f5a10ef4..83316510 100644 --- a/LLama.Examples/NewVersion/TestRunner.cs +++ b/LLama.Examples/NewVersion/TestRunner.cs @@ -8,7 +8,7 @@ Console.WriteLine("Please input a number to choose an example to run:"); Console.WriteLine("0: Run a chat session without stripping the role names."); - Console.WriteLine("1: Run a chat session with the role names strippped."); + Console.WriteLine("1: Run a chat session with the role names stripped."); Console.WriteLine("2: Interactive mode chat by using executor."); Console.WriteLine("3: Instruct mode chat by using executor."); Console.WriteLine("4: Stateless mode chat by using executor."); @@ -18,6 +18,8 @@ Console.WriteLine("8: Quantize the model."); Console.WriteLine("9: Automatic conversation."); Console.WriteLine("10: Constrain response to json format using grammar."); + Console.WriteLine("11: Semantic Kernel Prompt."); + Console.WriteLine("12: Semantic Kernel Chat."); while (true) { @@ -68,6 +70,14 @@ { GrammarJsonResponse.Run(); } + else if (choice == 11) + { + await SemanticKernelPrompt.Run(); + } + else if (choice == 12) + { + await SemanticKernelChat.Run(); + } else { Console.WriteLine("Cannot parse your choice. Please select again."); diff --git a/LLama.SemanticKernel/ChatCompletion/HistoryTransform.cs b/LLama.SemanticKernel/ChatCompletion/HistoryTransform.cs new file mode 100644 index 00000000..759888d0 --- /dev/null +++ b/LLama.SemanticKernel/ChatCompletion/HistoryTransform.cs @@ -0,0 +1,17 @@ +using static LLama.LLamaTransforms; + +namespace LLamaSharp.SemanticKernel.ChatCompletion; + +/// +/// Default HistoryTransform Patch +/// +public class HistoryTransform : DefaultHistoryTransform +{ + /// + public override string HistoryToText(global::LLama.Common.ChatHistory history) + { + var prompt = base.HistoryToText(history); + return prompt + "\nAssistant:"; + + } +} diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs new file mode 100644 index 00000000..7fda3d4f --- /dev/null +++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs @@ -0,0 +1,74 @@ +using LLama; +using Microsoft.SemanticKernel.AI.ChatCompletion; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Text; +using System.Threading; +using System.Threading.Tasks; + +namespace LLamaSharp.SemanticKernel.ChatCompletion; + +/// +/// LLamaSharp ChatCompletion +/// +public sealed class LLamaSharpChatCompletion : IChatCompletion +{ + private const string UserRole = "user:"; + private const string AssistantRole = "assistant:"; + private ChatSession session; + + public LLamaSharpChatCompletion(InteractiveExecutor model) + { + this.session = new ChatSession(model) + .WithHistoryTransform(new HistoryTransform()) + .WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { UserRole, AssistantRole })); + } + + /// + public ChatHistory CreateNewChat(string? instructions = "") + { + var history = new ChatHistory(); + + if (instructions != null && !string.IsNullOrEmpty(instructions)) + { + history.AddSystemMessage(instructions); + } + + return history; + } + + /// + public async Task> GetChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, CancellationToken cancellationToken = default) + { + requestSettings ??= new ChatRequestSettings() + { + MaxTokens = 256, + Temperature = 0, + TopP = 0, + StopSequences = new List { } + }; + + var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken); + + return new List { new LLamaSharpChatResult(result) }.AsReadOnly(); + } + + /// + public async IAsyncEnumerable GetStreamingChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + requestSettings ??= new ChatRequestSettings() + { + MaxTokens = 256, + Temperature = 0, + TopP = 0, + StopSequences = new List { } + }; + + var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken); + + yield return new LLamaSharpChatResult(result); + } +} diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs new file mode 100644 index 00000000..1e54d0a1 --- /dev/null +++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs @@ -0,0 +1,14 @@ +using Microsoft.SemanticKernel.AI.ChatCompletion; + +namespace LLamaSharp.SemanticKernel.ChatCompletion; + +/// +/// LLamaSharp Chat Message +/// +public class LLamaSharpChatMessage : ChatMessageBase +{ + /// + public LLamaSharpChatMessage(AuthorRole role, string content) : base(role, content) + { + } +} diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs new file mode 100644 index 00000000..ec479f42 --- /dev/null +++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs @@ -0,0 +1,38 @@ +using Microsoft.SemanticKernel.AI.ChatCompletion; +using System.Runtime.CompilerServices; +using System.Text; + +namespace LLamaSharp.SemanticKernel.ChatCompletion; + +internal sealed class LLamaSharpChatResult : IChatStreamingResult +{ + private readonly IAsyncEnumerable _stream; + + /// + /// + /// + /// + public LLamaSharpChatResult(IAsyncEnumerable stream) + { + _stream = stream; + } + /// + public async Task GetChatMessageAsync(CancellationToken cancellationToken = default) + { + var sb = new StringBuilder(); + await foreach (var token in _stream) + { + sb.Append(token); + } + return await Task.FromResult(new LLamaSharpChatMessage(AuthorRole.Assistant, sb.ToString())).ConfigureAwait(false); + } + + /// + public async IAsyncEnumerable GetStreamingChatMessageAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) + { + await foreach (var token in _stream) + { + yield return new LLamaSharpChatMessage(AuthorRole.Assistant, token); + } + } +} diff --git a/LLama.SemanticKernel/ExtensionMethods.cs b/LLama.SemanticKernel/ExtensionMethods.cs new file mode 100644 index 00000000..90090ead --- /dev/null +++ b/LLama.SemanticKernel/ExtensionMethods.cs @@ -0,0 +1,72 @@ +using Microsoft.SemanticKernel.AI.ChatCompletion; +using Microsoft.SemanticKernel.AI.TextCompletion; + +namespace LLamaSharp.SemanticKernel; + +internal static class ExtensionMethods +{ + internal static global::LLama.Common.ChatHistory ToLLamaSharpChatHistory(this ChatHistory chatHistory) + { + if (chatHistory is null) + { + throw new ArgumentNullException(nameof(chatHistory)); + } + + var history = new global::LLama.Common.ChatHistory(); + + foreach (var chat in chatHistory) + { + var role = Enum.TryParse(chat.Role.Label, out var _role) ? _role : global::LLama.Common.AuthorRole.Unknown; + history.AddMessage(role, chat.Content); + } + + return history; + } + + /// + /// Convert ChatRequestSettings to LLamaSharp InferenceParams + /// + /// + /// + internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this ChatRequestSettings requestSettings) + { + if (requestSettings is null) + { + throw new ArgumentNullException(nameof(requestSettings)); + } + + var antiPrompts = new List(requestSettings.StopSequences) { AuthorRole.User.ToString() + ":" }; + return new global::LLama.Common.InferenceParams + { + Temperature = (float)requestSettings.Temperature, + TopP = (float)requestSettings.TopP, + PresencePenalty = (float)requestSettings.PresencePenalty, + FrequencyPenalty = (float)requestSettings.FrequencyPenalty, + AntiPrompts = antiPrompts, + MaxTokens = requestSettings.MaxTokens ?? -1 + }; + } + + /// + /// Convert CompleteRequestSettings to LLamaSharp InferenceParams + /// + /// + /// + internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this CompleteRequestSettings requestSettings) + { + if (requestSettings is null) + { + throw new ArgumentNullException(nameof(requestSettings)); + } + + return new global::LLama.Common.InferenceParams + { + Temperature = (float)requestSettings.Temperature, + TopP = (float)requestSettings.TopP, + PresencePenalty = (float)requestSettings.PresencePenalty, + FrequencyPenalty = (float)requestSettings.FrequencyPenalty, + AntiPrompts = requestSettings.StopSequences, + MaxTokens = requestSettings.MaxTokens ?? -1 + }; + } +} diff --git a/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj new file mode 100644 index 00000000..fc5af9b1 --- /dev/null +++ b/LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj @@ -0,0 +1,22 @@ + + + + netstandard2.0;net6.0;net7.0 + LLamaSharp.SemanticKernel + enable + 10 + AnyCPU;x64;Arm64 + True + enable + enable + + + + + + + + + + + diff --git a/LLama.SemanticKernel/README.md b/LLama.SemanticKernel/README.md new file mode 100644 index 00000000..369968b0 --- /dev/null +++ b/LLama.SemanticKernel/README.md @@ -0,0 +1,26 @@ +# LLamaSharp.SemanticKernel + +LLamaSharp.SemanticKernel are connections for [SemanticKernel](https://github.com/microsoft/semantic-kernel): an SDK for intergrating various LLM interfaces into a single implementation. With this, you can add local LLaMa queries as another connection point with your existing connections. + +For reference on how to implement it, view the following examples: + +- [SemanticKernelChat](../LLama.Examples/NewVersion/SemanticKernelChat.cs) +- [SemanticKernelPrompt](../LLama.Examples/NewVersion/SemanticKernelPrompt.cs) + +## ITextCompletion +```csharp +using var model = LLamaWeights.LoadFromFile(parameters); +// LLamaSharpTextCompletion can accept ILLamaExecutor. +var ex = new StatelessExecutor(model, parameters); +var builder = new KernelBuilder(); +builder.WithAIService("local-llama", new LLamaSharpTextCompletion(ex), true); +``` + +## IChatCompletion +```csharp +using var model = LLamaWeights.LoadFromFile(parameters); +using var context = model.CreateContext(parameters); +// LLamaSharpChatCompletion requires InteractiveExecutor, as it's the best fit for the given command. +var ex = new InteractiveExecutor(context); +var chatGPT = new LLamaSharpChatCompletion(ex); +``` diff --git a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs new file mode 100644 index 00000000..40dbd3f8 --- /dev/null +++ b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs @@ -0,0 +1,27 @@ +using LLama; +using LLama.Abstractions; +using Microsoft.SemanticKernel.AI.TextCompletion; + +namespace LLamaSharp.SemanticKernel.TextCompletion; + +public sealed class LLamaSharpTextCompletion : ITextCompletion +{ + public ILLamaExecutor executor; + + public LLamaSharpTextCompletion(ILLamaExecutor executor) + { + this.executor = executor; + } + + public async Task> GetCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default) + { + var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken); + return await Task.FromResult(new List { new LLamaTextResult(result) }.AsReadOnly()).ConfigureAwait(false); + } + + public async IAsyncEnumerable GetStreamingCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default) + { + var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken); + yield return new LLamaTextResult(result); + } +} diff --git a/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs b/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs new file mode 100644 index 00000000..e1643481 --- /dev/null +++ b/LLama.SemanticKernel/TextCompletion/LLamaTextResult.cs @@ -0,0 +1,37 @@ +using Microsoft.SemanticKernel.AI.TextCompletion; +using Microsoft.SemanticKernel.Orchestration; +using System.Runtime.CompilerServices; +using System.Text; + +namespace LLamaSharp.SemanticKernel.TextCompletion; + +internal sealed class LLamaTextResult : ITextStreamingResult +{ + private readonly IAsyncEnumerable _text; + + public LLamaTextResult(IAsyncEnumerable text) + { + _text = text; + ModelResult = new(text); + } + + public ModelResult ModelResult { get; } + + public async Task GetCompletionAsync(CancellationToken cancellationToken = default) + { + var sb = new StringBuilder(); + await foreach (var token in _text) + { + sb.Append(token); + } + return await Task.FromResult(sb.ToString()).ConfigureAwait(false); + } + + public async IAsyncEnumerable GetCompletionStreamingAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) + { + await foreach (string word in _text) + { + yield return word; + } + } +} diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs index 5980d17c..0bb3f669 100644 --- a/LLama/LLamaEmbedder.cs +++ b/LLama/LLamaEmbedder.cs @@ -29,6 +29,11 @@ namespace LLama _ctx = weights.CreateContext(@params); } + public LLamaEmbedder(LLamaWeights weights, IModelParams @params) + { + _ctx = weights.CreateContext(@params); + } + /// /// Get the embeddings of the text. /// diff --git a/LLamaSharp.sln b/LLamaSharp.sln index 2e00196c..2a039d41 100644 --- a/LLamaSharp.sln +++ b/LLamaSharp.sln @@ -11,7 +11,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp", "LLama\LLamaSh EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLama.WebAPI", "LLama.WebAPI\LLama.WebAPI.csproj", "{D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LLama.Web", "LLama.Web\LLama.Web.csproj", "{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLama.Web", "LLama.Web\LLama.Web.csproj", "{C3531DB2-1B2B-433C-8DE6-3541E3620DB1}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LLamaSharp.SemanticKernel", "LLama.SemanticKernel\LLamaSharp.SemanticKernel.csproj", "{D98F93E3-B344-4F9D-86BB-FDBF6768B587}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -83,6 +85,18 @@ Global {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|Any CPU.Build.0 = Release|Any CPU {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|x64.ActiveCfg = Release|Any CPU {C3531DB2-1B2B-433C-8DE6-3541E3620DB1}.Release|x64.Build.0 = Release|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|x64.ActiveCfg = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Debug|x64.Build.0 = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|Any CPU.ActiveCfg = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|Any CPU.Build.0 = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|x64.ActiveCfg = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.GPU|x64.Build.0 = Debug|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|Any CPU.Build.0 = Release|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|x64.ActiveCfg = Release|Any CPU + {D98F93E3-B344-4F9D-86BB-FDBF6768B587}.Release|x64.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE