| @@ -28,7 +28,7 @@ | |||
| <ItemGroup> | |||
| <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="7.0.0" /> | |||
| <PackageReference Include="Microsoft.SemanticKernel" Version="0.21.230828.2-preview" /> | |||
| <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta1" /> | |||
| </ItemGroup> | |||
| <ItemGroup> | |||
| @@ -1,13 +1,9 @@ | |||
| using Microsoft.SemanticKernel.Memory; | |||
| using LLama.Common; | |||
| using Microsoft.SemanticKernel; | |||
| using System; | |||
| using System.Collections.Generic; | |||
| using System.Linq; | |||
| using System.Text; | |||
| using System.Threading.Tasks; | |||
| using LLama.Common; | |||
| using Microsoft.SemanticKernel.Memory; | |||
| using LLamaSharp.SemanticKernel.TextEmbedding; | |||
| using Microsoft.SemanticKernel.AI.Embeddings; | |||
| using Microsoft.SemanticKernel.Plugins.Memory; | |||
| namespace LLama.Examples.NewVersion | |||
| { | |||
| @@ -1,10 +1,7 @@ | |||
| using System.Reflection.Metadata; | |||
| using System.Security.Cryptography; | |||
| using System.Text; | |||
| using LLama.Abstractions; | |||
| using System.Security.Cryptography; | |||
| using LLama.Common; | |||
| using LLamaSharp.SemanticKernel.ChatCompletion; | |||
| using Microsoft.SemanticKernel; | |||
| using Microsoft.SemanticKernel.AI.ChatCompletion; | |||
| using Microsoft.SemanticKernel.AI.TextCompletion; | |||
| using LLamaSharp.SemanticKernel.TextCompletion; | |||
| @@ -35,7 +32,8 @@ namespace LLama.Examples.NewVersion | |||
| One line TLDR with the fewest words."; | |||
| var summarize = kernel.CreateSemanticFunction(prompt, maxTokens: 100); | |||
| ChatRequestSettings settings = new() {MaxTokens = 100}; | |||
| var summarize = kernel.CreateSemanticFunction(prompt, requestSettings: settings); | |||
| string text1 = @" | |||
| 1st Law of Thermodynamics - Energy cannot be created or destroyed. | |||
| @@ -47,9 +45,10 @@ One line TLDR with the fewest words."; | |||
| 2. The acceleration of an object depends on the mass of the object and the amount of force applied. | |||
| 3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first."; | |||
| Console.WriteLine(await summarize.InvokeAsync(text1)); | |||
| Console.WriteLine(await kernel.RunAsync(text1, summarize)); | |||
| Console.WriteLine(await summarize.InvokeAsync(text2)); | |||
| Console.WriteLine(await kernel.RunAsync(text2, summarize)); | |||
| } | |||
| } | |||
| } | |||
| @@ -0,0 +1,54 @@ | |||
| using Microsoft.SemanticKernel.AI; | |||
| namespace LLamaSharp.SemanticKernel.ChatCompletion; | |||
| public class ChatRequestSettings : AIRequestSettings | |||
| { | |||
| /// <summary> | |||
| /// Temperature controls the randomness of the completion. | |||
| /// The higher the temperature, the more random the completion. | |||
| /// </summary> | |||
| public double Temperature { get; set; } = 0; | |||
| /// <summary> | |||
| /// TopP controls the diversity of the completion. | |||
| /// The higher the TopP, the more diverse the completion. | |||
| /// </summary> | |||
| public double TopP { get; set; } = 0; | |||
| /// <summary> | |||
| /// Number between -2.0 and 2.0. Positive values penalize new tokens | |||
| /// based on whether they appear in the text so far, increasing the | |||
| /// model's likelihood to talk about new topics. | |||
| /// </summary> | |||
| public double PresencePenalty { get; set; } = 0; | |||
| /// <summary> | |||
| /// Number between -2.0 and 2.0. Positive values penalize new tokens | |||
| /// based on their existing frequency in the text so far, decreasing | |||
| /// the model's likelihood to repeat the same line verbatim. | |||
| /// </summary> | |||
| public double FrequencyPenalty { get; set; } = 0; | |||
| /// <summary> | |||
| /// Sequences where the completion will stop generating further tokens. | |||
| /// </summary> | |||
| public IList<string> StopSequences { get; set; } = Array.Empty<string>(); | |||
| /// <summary> | |||
| /// How many completions to generate for each prompt. Default is 1. | |||
| /// Note: Because this parameter generates many completions, it can quickly consume your token quota. | |||
| /// Use carefully and ensure that you have reasonable settings for max_tokens and stop. | |||
| /// </summary> | |||
| public int ResultsPerPrompt { get; set; } = 1; | |||
| /// <summary> | |||
| /// The maximum number of tokens to generate in the completion. | |||
| /// </summary> | |||
| public int? MaxTokens { get; set; } | |||
| /// <summary> | |||
| /// Modify the likelihood of specified tokens appearing in the completion. | |||
| /// </summary> | |||
| public IDictionary<int, int> TokenSelectionBiases { get; set; } = new Dictionary<int, int>(); | |||
| } | |||
| @@ -1,4 +1,5 @@ | |||
| using LLama; | |||
| using Microsoft.SemanticKernel.AI; | |||
| using Microsoft.SemanticKernel.AI.ChatCompletion; | |||
| using System.Runtime.CompilerServices; | |||
| @@ -14,30 +15,29 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion | |||
| private ChatSession session; | |||
| private ChatRequestSettings defaultRequestSettings; | |||
| public LLamaSharpChatCompletion(InteractiveExecutor model, ChatRequestSettings? defaultRequestSettings = default) | |||
| static ChatRequestSettings GetDefaultSettings() | |||
| { | |||
| this.session = new ChatSession(model) | |||
| .WithHistoryTransform(new HistoryTransform()) | |||
| .WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { UserRole, AssistantRole })); | |||
| this.defaultRequestSettings = defaultRequestSettings ??= new ChatRequestSettings() | |||
| return new ChatRequestSettings | |||
| { | |||
| MaxTokens = 256, | |||
| Temperature = 0, | |||
| TopP = 0, | |||
| StopSequences = new List<string> { } | |||
| StopSequences = new List<string>() | |||
| }; | |||
| } | |||
| public LLamaSharpChatCompletion(InteractiveExecutor model, ChatRequestSettings? defaultRequestSettings = default) | |||
| { | |||
| this.session = new ChatSession(model) | |||
| .WithHistoryTransform(new HistoryTransform()) | |||
| .WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { UserRole, AssistantRole })); | |||
| this.defaultRequestSettings = defaultRequestSettings ??= GetDefaultSettings(); | |||
| } | |||
| public LLamaSharpChatCompletion(ChatSession session, ChatRequestSettings? defaultRequestSettings = default) | |||
| { | |||
| this.session = session; | |||
| this.defaultRequestSettings = defaultRequestSettings ??= new ChatRequestSettings() | |||
| { | |||
| MaxTokens = 256, | |||
| Temperature = 0, | |||
| TopP = 0, | |||
| StopSequences = new List<string> { } | |||
| }; | |||
| this.defaultRequestSettings = defaultRequestSettings ??= GetDefaultSettings(); | |||
| } | |||
| /// <inheritdoc/> | |||
| @@ -54,21 +54,29 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion | |||
| } | |||
| /// <inheritdoc/> | |||
| public async Task<IReadOnlyList<IChatResult>> GetChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, CancellationToken cancellationToken = default) | |||
| public Task<IReadOnlyList<IChatResult>> GetChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, CancellationToken cancellationToken = default) | |||
| { | |||
| requestSettings = requestSettings ?? this.defaultRequestSettings; | |||
| var settings = requestSettings != null | |||
| ? (ChatRequestSettings)requestSettings | |||
| : defaultRequestSettings; | |||
| var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken); | |||
| // This call is not awaited because LLamaSharpChatResult accepts an IAsyncEnumerable. | |||
| var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), settings.ToLLamaSharpInferenceParams(), cancellationToken); | |||
| return new List<IChatResult> { new LLamaSharpChatResult(result) }.AsReadOnly(); | |||
| return Task.FromResult<IReadOnlyList<IChatResult>>(new List<IChatResult> { new LLamaSharpChatResult(result) }.AsReadOnly()); | |||
| } | |||
| /// <inheritdoc/> | |||
| public async IAsyncEnumerable<IChatStreamingResult> GetStreamingChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) | |||
| #pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously. | |||
| public async IAsyncEnumerable<IChatStreamingResult> GetStreamingChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) | |||
| #pragma warning restore CS1998 | |||
| { | |||
| requestSettings = requestSettings ?? this.defaultRequestSettings; | |||
| var settings = requestSettings != null | |||
| ? (ChatRequestSettings)requestSettings | |||
| : defaultRequestSettings; | |||
| var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken); | |||
| // This call is not awaited because LLamaSharpChatResult accepts an IAsyncEnumerable. | |||
| var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), settings.ToLLamaSharpInferenceParams(), cancellationToken); | |||
| yield return new LLamaSharpChatResult(result); | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| using Microsoft.SemanticKernel.AI.ChatCompletion; | |||
| using Microsoft.SemanticKernel.AI.TextCompletion; | |||
| using LLamaSharp.SemanticKernel.ChatCompletion; | |||
| using Microsoft.SemanticKernel.AI.ChatCompletion; | |||
| namespace LLamaSharp.SemanticKernel; | |||
| @@ -46,27 +46,4 @@ internal static class ExtensionMethods | |||
| MaxTokens = requestSettings.MaxTokens ?? -1 | |||
| }; | |||
| } | |||
| /// <summary> | |||
| /// Convert CompleteRequestSettings to LLamaSharp InferenceParams | |||
| /// </summary> | |||
| /// <param name="requestSettings"></param> | |||
| /// <returns></returns> | |||
| internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this CompleteRequestSettings requestSettings) | |||
| { | |||
| if (requestSettings is null) | |||
| { | |||
| throw new ArgumentNullException(nameof(requestSettings)); | |||
| } | |||
| return new global::LLama.Common.InferenceParams | |||
| { | |||
| Temperature = (float)requestSettings.Temperature, | |||
| TopP = (float)requestSettings.TopP, | |||
| PresencePenalty = (float)requestSettings.PresencePenalty, | |||
| FrequencyPenalty = (float)requestSettings.FrequencyPenalty, | |||
| AntiPrompts = requestSettings.StopSequences, | |||
| MaxTokens = requestSettings.MaxTokens ?? -1 | |||
| }; | |||
| } | |||
| } | |||
| @@ -10,7 +10,7 @@ | |||
| <ImplicitUsings>enable</ImplicitUsings> | |||
| <Nullable>enable</Nullable> | |||
| <Version>0.5.0</Version> | |||
| <Version>0.6.2-beta1</Version> | |||
| <Authors>Tim Miller</Authors> | |||
| <Company>SciSharp STACK</Company> | |||
| <GeneratePackageOnBuild>true</GeneratePackageOnBuild> | |||
| @@ -33,7 +33,7 @@ | |||
| </PropertyGroup> | |||
| <ItemGroup> | |||
| <PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="0.24.230911.2-preview" /> | |||
| <PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.0.0-beta1" /> | |||
| </ItemGroup> | |||
| <ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'"> | |||
| @@ -1,5 +1,6 @@ | |||
| using LLama; | |||
| using LLama.Abstractions; | |||
| using LLama.Abstractions; | |||
| using LLamaSharp.SemanticKernel.ChatCompletion; | |||
| using Microsoft.SemanticKernel.AI; | |||
| using Microsoft.SemanticKernel.AI.TextCompletion; | |||
| namespace LLamaSharp.SemanticKernel.TextCompletion; | |||
| @@ -13,15 +14,17 @@ public sealed class LLamaSharpTextCompletion : ITextCompletion | |||
| this.executor = executor; | |||
| } | |||
| public async Task<IReadOnlyList<ITextResult>> GetCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default) | |||
| public async Task<IReadOnlyList<ITextResult>> GetCompletionsAsync(string text, AIRequestSettings? requestSettings, CancellationToken cancellationToken = default) | |||
| { | |||
| var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken); | |||
| var settings = (ChatRequestSettings)requestSettings; | |||
| var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken); | |||
| return await Task.FromResult(new List<ITextResult> { new LLamaTextResult(result) }.AsReadOnly()).ConfigureAwait(false); | |||
| } | |||
| public async IAsyncEnumerable<ITextStreamingResult> GetStreamingCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default) | |||
| public async IAsyncEnumerable<ITextStreamingResult> GetStreamingCompletionsAsync(string text, AIRequestSettings? requestSettings, CancellationToken cancellationToken = default) | |||
| { | |||
| var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken); | |||
| var settings = (ChatRequestSettings)requestSettings; | |||
| var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken); | |||
| yield return new LLamaTextResult(result); | |||
| } | |||
| } | |||