using LLama; using Microsoft.SemanticKernel.AI; using Microsoft.SemanticKernel.AI.ChatCompletion; using System.Runtime.CompilerServices; namespace LLamaSharp.SemanticKernel.ChatCompletion; /// /// LLamaSharp ChatCompletion /// public sealed class LLamaSharpChatCompletion : IChatCompletion { private const string UserRole = "user:"; private const string AssistantRole = "assistant:"; private ChatSession session; private ChatRequestSettings defaultRequestSettings; static ChatRequestSettings GetDefaultSettings() { return new ChatRequestSettings { MaxTokens = 256, Temperature = 0, TopP = 0, StopSequences = new List() }; } public LLamaSharpChatCompletion(InteractiveExecutor model, ChatRequestSettings? defaultRequestSettings = default) { this.session = new ChatSession(model) .WithHistoryTransform(new HistoryTransform()) .WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { UserRole, AssistantRole })); this.defaultRequestSettings = defaultRequestSettings ??= GetDefaultSettings(); } public LLamaSharpChatCompletion(ChatSession session, ChatRequestSettings? defaultRequestSettings = default) { this.session = session; this.defaultRequestSettings = defaultRequestSettings ??= GetDefaultSettings(); } /// public ChatHistory CreateNewChat(string? instructions = "") { var history = new ChatHistory(); if (instructions != null && !string.IsNullOrEmpty(instructions)) { history.AddSystemMessage(instructions); } return history; } /// public Task> GetChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, CancellationToken cancellationToken = default) { var settings = requestSettings != null ? (ChatRequestSettings)requestSettings : defaultRequestSettings; // This call is not awaited because LLamaSharpChatResult accepts an IAsyncEnumerable. var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), settings.ToLLamaSharpInferenceParams(), cancellationToken); return Task.FromResult>(new List { new LLamaSharpChatResult(result) }.AsReadOnly()); } /// #pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously. public async IAsyncEnumerable GetStreamingChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) #pragma warning restore CS1998 { var settings = requestSettings != null ? (ChatRequestSettings)requestSettings : defaultRequestSettings; // This call is not awaited because LLamaSharpChatResult accepts an IAsyncEnumerable. var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), settings.ToLLamaSharpInferenceParams(), cancellationToken); yield return new LLamaSharpChatResult(result); } }