Browse Source

Support SemanticKernel 1.0.0-beta1

tags/v0.6.0
Daniel Vaughan 2 years ago
parent
commit
f64a54c9c8
8 changed files with 106 additions and 69 deletions
  1. +1
    -1
      LLama.Examples/LLama.Examples.csproj
  2. +3
    -7
      LLama.Examples/NewVersion/SemanticKernelMemory.cs
  3. +7
    -8
      LLama.Examples/NewVersion/SemanticKernelPrompt.cs
  4. +54
    -0
      LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs
  5. +28
    -20
      LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
  6. +2
    -25
      LLama.SemanticKernel/ExtensionMethods.cs
  7. +2
    -2
      LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
  8. +9
    -6
      LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs

+ 1
- 1
LLama.Examples/LLama.Examples.csproj View File

@@ -28,7 +28,7 @@

<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="7.0.0" />
<PackageReference Include="Microsoft.SemanticKernel" Version="0.21.230828.2-preview" />
<PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta1" />
</ItemGroup>

<ItemGroup>


+ 3
- 7
LLama.Examples/NewVersion/SemanticKernelMemory.cs View File

@@ -1,13 +1,9 @@
using Microsoft.SemanticKernel.Memory;
using LLama.Common;
using Microsoft.SemanticKernel;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using LLama.Common;
using Microsoft.SemanticKernel.Memory;
using LLamaSharp.SemanticKernel.TextEmbedding;
using Microsoft.SemanticKernel.AI.Embeddings;
using Microsoft.SemanticKernel.Plugins.Memory;

namespace LLama.Examples.NewVersion
{


+ 7
- 8
LLama.Examples/NewVersion/SemanticKernelPrompt.cs View File

@@ -1,10 +1,7 @@
using System.Reflection.Metadata;
using System.Security.Cryptography;
using System.Text;
using LLama.Abstractions;
using System.Security.Cryptography;
using LLama.Common;
using LLamaSharp.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.AI.ChatCompletion;
using Microsoft.SemanticKernel.AI.TextCompletion;
using LLamaSharp.SemanticKernel.TextCompletion;

@@ -35,7 +32,8 @@ namespace LLama.Examples.NewVersion

One line TLDR with the fewest words.";

var summarize = kernel.CreateSemanticFunction(prompt, maxTokens: 100);
ChatRequestSettings settings = new() {MaxTokens = 100};
var summarize = kernel.CreateSemanticFunction(prompt, requestSettings: settings);

string text1 = @"
1st Law of Thermodynamics - Energy cannot be created or destroyed.
@@ -47,9 +45,10 @@ One line TLDR with the fewest words.";
2. The acceleration of an object depends on the mass of the object and the amount of force applied.
3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first.";

Console.WriteLine(await summarize.InvokeAsync(text1));
Console.WriteLine(await kernel.RunAsync(text1, summarize));

Console.WriteLine(await summarize.InvokeAsync(text2));
Console.WriteLine(await kernel.RunAsync(text2, summarize));
}
}
}

+ 54
- 0
LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs View File

@@ -0,0 +1,54 @@
using Microsoft.SemanticKernel.AI;

namespace LLamaSharp.SemanticKernel.ChatCompletion;

public class ChatRequestSettings : AIRequestSettings
{
/// <summary>
/// Temperature controls the randomness of the completion.
/// The higher the temperature, the more random the completion.
/// </summary>
public double Temperature { get; set; } = 0;

/// <summary>
/// TopP controls the diversity of the completion.
/// The higher the TopP, the more diverse the completion.
/// </summary>
public double TopP { get; set; } = 0;

/// <summary>
/// Number between -2.0 and 2.0. Positive values penalize new tokens
/// based on whether they appear in the text so far, increasing the
/// model's likelihood to talk about new topics.
/// </summary>
public double PresencePenalty { get; set; } = 0;

/// <summary>
/// Number between -2.0 and 2.0. Positive values penalize new tokens
/// based on their existing frequency in the text so far, decreasing
/// the model's likelihood to repeat the same line verbatim.
/// </summary>
public double FrequencyPenalty { get; set; } = 0;

/// <summary>
/// Sequences where the completion will stop generating further tokens.
/// </summary>
public IList<string> StopSequences { get; set; } = Array.Empty<string>();

/// <summary>
/// How many completions to generate for each prompt. Default is 1.
/// Note: Because this parameter generates many completions, it can quickly consume your token quota.
/// Use carefully and ensure that you have reasonable settings for max_tokens and stop.
/// </summary>
public int ResultsPerPrompt { get; set; } = 1;

/// <summary>
/// The maximum number of tokens to generate in the completion.
/// </summary>
public int? MaxTokens { get; set; }

/// <summary>
/// Modify the likelihood of specified tokens appearing in the completion.
/// </summary>
public IDictionary<int, int> TokenSelectionBiases { get; set; } = new Dictionary<int, int>();
}

+ 28
- 20
LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs View File

@@ -1,4 +1,5 @@
using LLama;
using Microsoft.SemanticKernel.AI;
using Microsoft.SemanticKernel.AI.ChatCompletion;
using System.Runtime.CompilerServices;

@@ -14,30 +15,29 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion
private ChatSession session;
private ChatRequestSettings defaultRequestSettings;

public LLamaSharpChatCompletion(InteractiveExecutor model, ChatRequestSettings? defaultRequestSettings = default)
static ChatRequestSettings GetDefaultSettings()
{
this.session = new ChatSession(model)
.WithHistoryTransform(new HistoryTransform())
.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { UserRole, AssistantRole }));
this.defaultRequestSettings = defaultRequestSettings ??= new ChatRequestSettings()
return new ChatRequestSettings
{
MaxTokens = 256,
Temperature = 0,
TopP = 0,
StopSequences = new List<string> { }
StopSequences = new List<string>()
};
}

public LLamaSharpChatCompletion(InteractiveExecutor model, ChatRequestSettings? defaultRequestSettings = default)
{
this.session = new ChatSession(model)
.WithHistoryTransform(new HistoryTransform())
.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { UserRole, AssistantRole }));
this.defaultRequestSettings = defaultRequestSettings ??= GetDefaultSettings();
}

public LLamaSharpChatCompletion(ChatSession session, ChatRequestSettings? defaultRequestSettings = default)
{
this.session = session;
this.defaultRequestSettings = defaultRequestSettings ??= new ChatRequestSettings()
{
MaxTokens = 256,
Temperature = 0,
TopP = 0,
StopSequences = new List<string> { }
};
this.defaultRequestSettings = defaultRequestSettings ??= GetDefaultSettings();
}

/// <inheritdoc/>
@@ -54,21 +54,29 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion
}

/// <inheritdoc/>
public async Task<IReadOnlyList<IChatResult>> GetChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, CancellationToken cancellationToken = default)
public Task<IReadOnlyList<IChatResult>> GetChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, CancellationToken cancellationToken = default)
{
requestSettings = requestSettings ?? this.defaultRequestSettings;
var settings = requestSettings != null
? (ChatRequestSettings)requestSettings
: defaultRequestSettings;

var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
// This call is not awaited because LLamaSharpChatResult accepts an IAsyncEnumerable.
var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), settings.ToLLamaSharpInferenceParams(), cancellationToken);

return new List<IChatResult> { new LLamaSharpChatResult(result) }.AsReadOnly();
return Task.FromResult<IReadOnlyList<IChatResult>>(new List<IChatResult> { new LLamaSharpChatResult(result) }.AsReadOnly());
}

/// <inheritdoc/>
public async IAsyncEnumerable<IChatStreamingResult> GetStreamingChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously.
public async IAsyncEnumerable<IChatStreamingResult> GetStreamingChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
#pragma warning restore CS1998
{
requestSettings = requestSettings ?? this.defaultRequestSettings;
var settings = requestSettings != null
? (ChatRequestSettings)requestSettings
: defaultRequestSettings;

var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
// This call is not awaited because LLamaSharpChatResult accepts an IAsyncEnumerable.
var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), settings.ToLLamaSharpInferenceParams(), cancellationToken);

yield return new LLamaSharpChatResult(result);
}


+ 2
- 25
LLama.SemanticKernel/ExtensionMethods.cs View File

@@ -1,5 +1,5 @@
using Microsoft.SemanticKernel.AI.ChatCompletion;
using Microsoft.SemanticKernel.AI.TextCompletion;
using LLamaSharp.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.AI.ChatCompletion;

namespace LLamaSharp.SemanticKernel;

@@ -46,27 +46,4 @@ internal static class ExtensionMethods
MaxTokens = requestSettings.MaxTokens ?? -1
};
}

/// <summary>
/// Convert CompleteRequestSettings to LLamaSharp InferenceParams
/// </summary>
/// <param name="requestSettings"></param>
/// <returns></returns>
internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this CompleteRequestSettings requestSettings)
{
if (requestSettings is null)
{
throw new ArgumentNullException(nameof(requestSettings));
}

return new global::LLama.Common.InferenceParams
{
Temperature = (float)requestSettings.Temperature,
TopP = (float)requestSettings.TopP,
PresencePenalty = (float)requestSettings.PresencePenalty,
FrequencyPenalty = (float)requestSettings.FrequencyPenalty,
AntiPrompts = requestSettings.StopSequences,
MaxTokens = requestSettings.MaxTokens ?? -1
};
}
}

+ 2
- 2
LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj View File

@@ -10,7 +10,7 @@
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>

<Version>0.5.0</Version>
<Version>0.6.2-beta1</Version>
<Authors>Tim Miller</Authors>
<Company>SciSharp STACK</Company>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
@@ -33,7 +33,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="0.24.230911.2-preview" />
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.0.0-beta1" />
</ItemGroup>

<ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'">


+ 9
- 6
LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs View File

@@ -1,5 +1,6 @@
using LLama;
using LLama.Abstractions;
using LLama.Abstractions;
using LLamaSharp.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.AI;
using Microsoft.SemanticKernel.AI.TextCompletion;

namespace LLamaSharp.SemanticKernel.TextCompletion;
@@ -13,15 +14,17 @@ public sealed class LLamaSharpTextCompletion : ITextCompletion
this.executor = executor;
}

public async Task<IReadOnlyList<ITextResult>> GetCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default)
public async Task<IReadOnlyList<ITextResult>> GetCompletionsAsync(string text, AIRequestSettings? requestSettings, CancellationToken cancellationToken = default)
{
var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
var settings = (ChatRequestSettings)requestSettings;
var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
return await Task.FromResult(new List<ITextResult> { new LLamaTextResult(result) }.AsReadOnly()).ConfigureAwait(false);
}

public async IAsyncEnumerable<ITextStreamingResult> GetStreamingCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default)
public async IAsyncEnumerable<ITextStreamingResult> GetStreamingCompletionsAsync(string text, AIRequestSettings? requestSettings, CancellationToken cancellationToken = default)
{
var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
var settings = (ChatRequestSettings)requestSettings;
var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
yield return new LLamaTextResult(result);
}
}

Loading…
Cancel
Save