using LLama; using LLama.Abstractions; using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel.Services; using System; using System.IO; using System.Runtime.CompilerServices; using System.Text; using static LLama.InteractiveExecutor; using static LLama.LLamaTransforms; namespace LLamaSharp.SemanticKernel.ChatCompletion; /// /// LLamaSharp ChatCompletion /// public sealed class LLamaSharpChatCompletion : IChatCompletionService { private readonly ILLamaExecutor _model; private LLamaSharpPromptExecutionSettings defaultRequestSettings; private readonly IHistoryTransform historyTransform; private readonly ITextStreamTransform outputTransform; private readonly Dictionary _attributes = new(); private readonly bool _isStatefulExecutor; public IReadOnlyDictionary Attributes => this._attributes; static LLamaSharpPromptExecutionSettings GetDefaultSettings() { return new LLamaSharpPromptExecutionSettings { MaxTokens = 256, Temperature = 0, TopP = 0, StopSequences = new List() }; } public LLamaSharpChatCompletion(ILLamaExecutor model, LLamaSharpPromptExecutionSettings? defaultRequestSettings = default, IHistoryTransform? historyTransform = null, ITextStreamTransform? outputTransform = null) { this._model = model; this._isStatefulExecutor = this._model is StatefulExecutorBase; this.defaultRequestSettings = defaultRequestSettings ?? GetDefaultSettings(); this.historyTransform = historyTransform ?? new HistoryTransform(); this.outputTransform = outputTransform ?? new KeywordTextOutputStreamTransform(new[] { $"{LLama.Common.AuthorRole.User}:", $"{LLama.Common.AuthorRole.Assistant}:", $"{LLama.Common.AuthorRole.System}:"}); } public ChatHistory CreateNewChat(string? instructions = "") { var history = new ChatHistory(); if (instructions != null && !string.IsNullOrEmpty(instructions)) { history.AddSystemMessage(instructions); } return history; } /// public async Task> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) { var settings = executionSettings != null ? LLamaSharpPromptExecutionSettings.FromRequestSettings(executionSettings) : defaultRequestSettings; string prompt = this._getFormattedPrompt(chatHistory); var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken); var output = outputTransform.TransformAsync(result); var sb = new StringBuilder(); await foreach (var token in output) { sb.Append(token); } return new List { new(AuthorRole.Assistant, sb.ToString()) }.AsReadOnly(); } /// public async IAsyncEnumerable GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { var settings = executionSettings != null ? LLamaSharpPromptExecutionSettings.FromRequestSettings(executionSettings) : defaultRequestSettings; string prompt = this._getFormattedPrompt(chatHistory); var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken); var output = outputTransform.TransformAsync(result); await foreach (var token in output) { yield return new StreamingChatMessageContent(AuthorRole.Assistant, token); } } /// /// Return either the entire formatted chatHistory or just the most recent message based on /// whether the model extends StatefulExecutorBase or not. /// /// /// The formatted prompt private string _getFormattedPrompt(ChatHistory chatHistory){ string prompt; if (this._isStatefulExecutor){ InteractiveExecutorState state = (InteractiveExecutorState)((StatefulExecutorBase)this._model).GetStateData(); if (state.IsPromptRun) { prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory()); } else { ChatHistory temp_history = new(); temp_history.AddUserMessage(chatHistory.Last().Content); prompt = historyTransform.HistoryToText(temp_history.ToLLamaSharpChatHistory()); } } else { prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory()); } return prompt; } }