Browse Source

Support SemanticKernel 1.0.0-beta1

tags/v0.6.0
Daniel Vaughan 2 years ago
parent
commit
f64a54c9c8
8 changed files with 106 additions and 69 deletions
  1. +1
    -1
      LLama.Examples/LLama.Examples.csproj
  2. +3
    -7
      LLama.Examples/NewVersion/SemanticKernelMemory.cs
  3. +7
    -8
      LLama.Examples/NewVersion/SemanticKernelPrompt.cs
  4. +54
    -0
      LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs
  5. +28
    -20
      LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
  6. +2
    -25
      LLama.SemanticKernel/ExtensionMethods.cs
  7. +2
    -2
      LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
  8. +9
    -6
      LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs

+ 1
- 1
LLama.Examples/LLama.Examples.csproj View File

@@ -28,7 +28,7 @@


<ItemGroup> <ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="7.0.0" /> <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="7.0.0" />
<PackageReference Include="Microsoft.SemanticKernel" Version="0.21.230828.2-preview" />
<PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta1" />
</ItemGroup> </ItemGroup>


<ItemGroup> <ItemGroup>


+ 3
- 7
LLama.Examples/NewVersion/SemanticKernelMemory.cs View File

@@ -1,13 +1,9 @@
using Microsoft.SemanticKernel.Memory;
using LLama.Common;
using Microsoft.SemanticKernel; using Microsoft.SemanticKernel;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using LLama.Common;
using Microsoft.SemanticKernel.Memory;
using LLamaSharp.SemanticKernel.TextEmbedding; using LLamaSharp.SemanticKernel.TextEmbedding;
using Microsoft.SemanticKernel.AI.Embeddings; using Microsoft.SemanticKernel.AI.Embeddings;
using Microsoft.SemanticKernel.Plugins.Memory;


namespace LLama.Examples.NewVersion namespace LLama.Examples.NewVersion
{ {


+ 7
- 8
LLama.Examples/NewVersion/SemanticKernelPrompt.cs View File

@@ -1,10 +1,7 @@
using System.Reflection.Metadata;
using System.Security.Cryptography;
using System.Text;
using LLama.Abstractions;
using System.Security.Cryptography;
using LLama.Common; using LLama.Common;
using LLamaSharp.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel; using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.AI.ChatCompletion;
using Microsoft.SemanticKernel.AI.TextCompletion; using Microsoft.SemanticKernel.AI.TextCompletion;
using LLamaSharp.SemanticKernel.TextCompletion; using LLamaSharp.SemanticKernel.TextCompletion;


@@ -35,7 +32,8 @@ namespace LLama.Examples.NewVersion


One line TLDR with the fewest words."; One line TLDR with the fewest words.";


var summarize = kernel.CreateSemanticFunction(prompt, maxTokens: 100);
ChatRequestSettings settings = new() {MaxTokens = 100};
var summarize = kernel.CreateSemanticFunction(prompt, requestSettings: settings);


string text1 = @" string text1 = @"
1st Law of Thermodynamics - Energy cannot be created or destroyed. 1st Law of Thermodynamics - Energy cannot be created or destroyed.
@@ -47,9 +45,10 @@ One line TLDR with the fewest words.";
2. The acceleration of an object depends on the mass of the object and the amount of force applied. 2. The acceleration of an object depends on the mass of the object and the amount of force applied.
3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first."; 3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first.";


Console.WriteLine(await summarize.InvokeAsync(text1));
Console.WriteLine(await kernel.RunAsync(text1, summarize));


Console.WriteLine(await summarize.InvokeAsync(text2));
Console.WriteLine(await kernel.RunAsync(text2, summarize));
} }
} }
} }

+ 54
- 0
LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs View File

@@ -0,0 +1,54 @@
using Microsoft.SemanticKernel.AI;

namespace LLamaSharp.SemanticKernel.ChatCompletion;

public class ChatRequestSettings : AIRequestSettings
{
/// <summary>
/// Temperature controls the randomness of the completion.
/// The higher the temperature, the more random the completion.
/// </summary>
public double Temperature { get; set; } = 0;

/// <summary>
/// TopP controls the diversity of the completion.
/// The higher the TopP, the more diverse the completion.
/// </summary>
public double TopP { get; set; } = 0;

/// <summary>
/// Number between -2.0 and 2.0. Positive values penalize new tokens
/// based on whether they appear in the text so far, increasing the
/// model's likelihood to talk about new topics.
/// </summary>
public double PresencePenalty { get; set; } = 0;

/// <summary>
/// Number between -2.0 and 2.0. Positive values penalize new tokens
/// based on their existing frequency in the text so far, decreasing
/// the model's likelihood to repeat the same line verbatim.
/// </summary>
public double FrequencyPenalty { get; set; } = 0;

/// <summary>
/// Sequences where the completion will stop generating further tokens.
/// </summary>
public IList<string> StopSequences { get; set; } = Array.Empty<string>();

/// <summary>
/// How many completions to generate for each prompt. Default is 1.
/// Note: Because this parameter generates many completions, it can quickly consume your token quota.
/// Use carefully and ensure that you have reasonable settings for max_tokens and stop.
/// </summary>
public int ResultsPerPrompt { get; set; } = 1;

/// <summary>
/// The maximum number of tokens to generate in the completion.
/// </summary>
public int? MaxTokens { get; set; }

/// <summary>
/// Modify the likelihood of specified tokens appearing in the completion.
/// </summary>
public IDictionary<int, int> TokenSelectionBiases { get; set; } = new Dictionary<int, int>();
}

+ 28
- 20
LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs View File

@@ -1,4 +1,5 @@
using LLama; using LLama;
using Microsoft.SemanticKernel.AI;
using Microsoft.SemanticKernel.AI.ChatCompletion; using Microsoft.SemanticKernel.AI.ChatCompletion;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;


@@ -14,30 +15,29 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion
private ChatSession session; private ChatSession session;
private ChatRequestSettings defaultRequestSettings; private ChatRequestSettings defaultRequestSettings;


public LLamaSharpChatCompletion(InteractiveExecutor model, ChatRequestSettings? defaultRequestSettings = default)
static ChatRequestSettings GetDefaultSettings()
{ {
this.session = new ChatSession(model)
.WithHistoryTransform(new HistoryTransform())
.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { UserRole, AssistantRole }));
this.defaultRequestSettings = defaultRequestSettings ??= new ChatRequestSettings()
return new ChatRequestSettings
{ {
MaxTokens = 256, MaxTokens = 256,
Temperature = 0, Temperature = 0,
TopP = 0, TopP = 0,
StopSequences = new List<string> { }
StopSequences = new List<string>()
}; };
} }


public LLamaSharpChatCompletion(InteractiveExecutor model, ChatRequestSettings? defaultRequestSettings = default)
{
this.session = new ChatSession(model)
.WithHistoryTransform(new HistoryTransform())
.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { UserRole, AssistantRole }));
this.defaultRequestSettings = defaultRequestSettings ??= GetDefaultSettings();
}

public LLamaSharpChatCompletion(ChatSession session, ChatRequestSettings? defaultRequestSettings = default) public LLamaSharpChatCompletion(ChatSession session, ChatRequestSettings? defaultRequestSettings = default)
{ {
this.session = session; this.session = session;
this.defaultRequestSettings = defaultRequestSettings ??= new ChatRequestSettings()
{
MaxTokens = 256,
Temperature = 0,
TopP = 0,
StopSequences = new List<string> { }
};
this.defaultRequestSettings = defaultRequestSettings ??= GetDefaultSettings();
} }


/// <inheritdoc/> /// <inheritdoc/>
@@ -54,21 +54,29 @@ public sealed class LLamaSharpChatCompletion : IChatCompletion
} }


/// <inheritdoc/> /// <inheritdoc/>
public async Task<IReadOnlyList<IChatResult>> GetChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, CancellationToken cancellationToken = default)
public Task<IReadOnlyList<IChatResult>> GetChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, CancellationToken cancellationToken = default)
{ {
requestSettings = requestSettings ?? this.defaultRequestSettings;
var settings = requestSettings != null
? (ChatRequestSettings)requestSettings
: defaultRequestSettings;


var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
// This call is not awaited because LLamaSharpChatResult accepts an IAsyncEnumerable.
var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), settings.ToLLamaSharpInferenceParams(), cancellationToken);


return new List<IChatResult> { new LLamaSharpChatResult(result) }.AsReadOnly();
return Task.FromResult<IReadOnlyList<IChatResult>>(new List<IChatResult> { new LLamaSharpChatResult(result) }.AsReadOnly());
} }


/// <inheritdoc/> /// <inheritdoc/>
public async IAsyncEnumerable<IChatStreamingResult> GetStreamingChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously.
public async IAsyncEnumerable<IChatStreamingResult> GetStreamingChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
#pragma warning restore CS1998
{ {
requestSettings = requestSettings ?? this.defaultRequestSettings;
var settings = requestSettings != null
? (ChatRequestSettings)requestSettings
: defaultRequestSettings;


var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
// This call is not awaited because LLamaSharpChatResult accepts an IAsyncEnumerable.
var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), settings.ToLLamaSharpInferenceParams(), cancellationToken);


yield return new LLamaSharpChatResult(result); yield return new LLamaSharpChatResult(result);
} }


+ 2
- 25
LLama.SemanticKernel/ExtensionMethods.cs View File

@@ -1,5 +1,5 @@
using Microsoft.SemanticKernel.AI.ChatCompletion;
using Microsoft.SemanticKernel.AI.TextCompletion;
using LLamaSharp.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.AI.ChatCompletion;


namespace LLamaSharp.SemanticKernel; namespace LLamaSharp.SemanticKernel;


@@ -46,27 +46,4 @@ internal static class ExtensionMethods
MaxTokens = requestSettings.MaxTokens ?? -1 MaxTokens = requestSettings.MaxTokens ?? -1
}; };
} }

/// <summary>
/// Convert CompleteRequestSettings to LLamaSharp InferenceParams
/// </summary>
/// <param name="requestSettings"></param>
/// <returns></returns>
internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this CompleteRequestSettings requestSettings)
{
if (requestSettings is null)
{
throw new ArgumentNullException(nameof(requestSettings));
}

return new global::LLama.Common.InferenceParams
{
Temperature = (float)requestSettings.Temperature,
TopP = (float)requestSettings.TopP,
PresencePenalty = (float)requestSettings.PresencePenalty,
FrequencyPenalty = (float)requestSettings.FrequencyPenalty,
AntiPrompts = requestSettings.StopSequences,
MaxTokens = requestSettings.MaxTokens ?? -1
};
}
} }

+ 2
- 2
LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj View File

@@ -10,7 +10,7 @@
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>


<Version>0.5.0</Version>
<Version>0.6.2-beta1</Version>
<Authors>Tim Miller</Authors> <Authors>Tim Miller</Authors>
<Company>SciSharp STACK</Company> <Company>SciSharp STACK</Company>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild> <GeneratePackageOnBuild>true</GeneratePackageOnBuild>
@@ -33,7 +33,7 @@
</PropertyGroup> </PropertyGroup>


<ItemGroup> <ItemGroup>
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="0.24.230911.2-preview" />
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.0.0-beta1" />
</ItemGroup> </ItemGroup>


<ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'"> <ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'">


+ 9
- 6
LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs View File

@@ -1,5 +1,6 @@
using LLama;
using LLama.Abstractions;
using LLama.Abstractions;
using LLamaSharp.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.AI;
using Microsoft.SemanticKernel.AI.TextCompletion; using Microsoft.SemanticKernel.AI.TextCompletion;


namespace LLamaSharp.SemanticKernel.TextCompletion; namespace LLamaSharp.SemanticKernel.TextCompletion;
@@ -13,15 +14,17 @@ public sealed class LLamaSharpTextCompletion : ITextCompletion
this.executor = executor; this.executor = executor;
} }


public async Task<IReadOnlyList<ITextResult>> GetCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default)
public async Task<IReadOnlyList<ITextResult>> GetCompletionsAsync(string text, AIRequestSettings? requestSettings, CancellationToken cancellationToken = default)
{ {
var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
var settings = (ChatRequestSettings)requestSettings;
var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
return await Task.FromResult(new List<ITextResult> { new LLamaTextResult(result) }.AsReadOnly()).ConfigureAwait(false); return await Task.FromResult(new List<ITextResult> { new LLamaTextResult(result) }.AsReadOnly()).ConfigureAwait(false);
} }


public async IAsyncEnumerable<ITextStreamingResult> GetStreamingCompletionsAsync(string text, CompleteRequestSettings requestSettings, CancellationToken cancellationToken = default)
public async IAsyncEnumerable<ITextStreamingResult> GetStreamingCompletionsAsync(string text, AIRequestSettings? requestSettings, CancellationToken cancellationToken = default)
{ {
var result = executor.InferAsync(text, requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
var settings = (ChatRequestSettings)requestSettings;
var result = executor.InferAsync(text, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
yield return new LLamaTextResult(result); yield return new LLamaTextResult(result);
} }
} }

Loading…
Cancel
Save