You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaSharpChatCompletion.cs 3.8 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. using LLama;
  2. using LLama.Abstractions;
  3. using Microsoft.SemanticKernel.AI;
  4. using Microsoft.SemanticKernel.AI.ChatCompletion;
  5. using System.Runtime.CompilerServices;
  6. using static LLama.LLamaTransforms;
  7. namespace LLamaSharp.SemanticKernel.ChatCompletion;
  8. /// <summary>
  9. /// LLamaSharp ChatCompletion
  10. /// </summary>
  11. public sealed class LLamaSharpChatCompletion : IChatCompletion
  12. {
  13. private readonly StatelessExecutor _model;
  14. private ChatRequestSettings defaultRequestSettings;
  15. private readonly IHistoryTransform historyTransform;
  16. private readonly ITextStreamTransform outputTransform;
  17. private readonly Dictionary<string, string> _attributes = new();
  18. public IReadOnlyDictionary<string, string> Attributes => this._attributes;
  19. static ChatRequestSettings GetDefaultSettings()
  20. {
  21. return new ChatRequestSettings
  22. {
  23. MaxTokens = 256,
  24. Temperature = 0,
  25. TopP = 0,
  26. StopSequences = new List<string>()
  27. };
  28. }
  29. public LLamaSharpChatCompletion(StatelessExecutor model,
  30. ChatRequestSettings? defaultRequestSettings = default,
  31. IHistoryTransform? historyTransform = null,
  32. ITextStreamTransform? outputTransform = null)
  33. {
  34. this._model = model;
  35. this.defaultRequestSettings = defaultRequestSettings ?? GetDefaultSettings();
  36. this.historyTransform = historyTransform ?? new HistoryTransform();
  37. this.outputTransform = outputTransform ?? new KeywordTextOutputStreamTransform(new[] { $"{LLama.Common.AuthorRole.User}:",
  38. $"{LLama.Common.AuthorRole.Assistant}:",
  39. $"{LLama.Common.AuthorRole.System}:"});
  40. }
  41. /// <inheritdoc/>
  42. public ChatHistory CreateNewChat(string? instructions = "")
  43. {
  44. var history = new ChatHistory();
  45. if (instructions != null && !string.IsNullOrEmpty(instructions))
  46. {
  47. history.AddSystemMessage(instructions);
  48. }
  49. return history;
  50. }
  51. /// <inheritdoc/>
  52. public Task<IReadOnlyList<IChatResult>> GetChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, CancellationToken cancellationToken = default)
  53. {
  54. var settings = requestSettings != null
  55. ? ChatRequestSettings.FromRequestSettings(requestSettings)
  56. : defaultRequestSettings;
  57. var prompt = historyTransform.HistoryToText(chat.ToLLamaSharpChatHistory());
  58. var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken);
  59. return Task.FromResult<IReadOnlyList<IChatResult>>(new List<IChatResult> { new LLamaSharpChatResult(outputTransform.TransformAsync(result)) }.AsReadOnly());
  60. }
  61. /// <inheritdoc/>
  62. #pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously.
  63. public async IAsyncEnumerable<IChatStreamingResult> GetStreamingChatCompletionsAsync(ChatHistory chat, AIRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
  64. #pragma warning restore CS1998
  65. {
  66. var settings = requestSettings != null
  67. ? ChatRequestSettings.FromRequestSettings(requestSettings)
  68. : defaultRequestSettings;
  69. var prompt = historyTransform.HistoryToText(chat.ToLLamaSharpChatHistory());
  70. // This call is not awaited because LLamaSharpChatResult accepts an IAsyncEnumerable.
  71. var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken);
  72. yield return new LLamaSharpChatResult(outputTransform.TransformAsync(result));
  73. }
  74. }