You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaSharpChatCompletion.cs 4.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. using LLama;
  2. using LLama.Abstractions;
  3. using Microsoft.SemanticKernel;
  4. using Microsoft.SemanticKernel.ChatCompletion;
  5. using Microsoft.SemanticKernel.Services;
  6. using System;
  7. using System.IO;
  8. using System.Runtime.CompilerServices;
  9. using System.Text;
  10. using static LLama.LLamaTransforms;
  11. namespace LLamaSharp.SemanticKernel.ChatCompletion;
  12. /// <summary>
  13. /// LLamaSharp ChatCompletion
  14. /// </summary>
  15. public sealed class LLamaSharpChatCompletion : IChatCompletionService
  16. {
  17. private readonly StatelessExecutor _model;
  18. private ChatRequestSettings defaultRequestSettings;
  19. private readonly IHistoryTransform historyTransform;
  20. private readonly ITextStreamTransform outputTransform;
  21. private readonly Dictionary<string, object?> _attributes = new();
  22. public IReadOnlyDictionary<string, object?> Attributes => this._attributes;
  23. static ChatRequestSettings GetDefaultSettings()
  24. {
  25. return new ChatRequestSettings
  26. {
  27. MaxTokens = 256,
  28. Temperature = 0,
  29. TopP = 0,
  30. StopSequences = new List<string>()
  31. };
  32. }
  33. public LLamaSharpChatCompletion(StatelessExecutor model,
  34. ChatRequestSettings? defaultRequestSettings = default,
  35. IHistoryTransform? historyTransform = null,
  36. ITextStreamTransform? outputTransform = null)
  37. {
  38. this._model = model;
  39. this.defaultRequestSettings = defaultRequestSettings ?? GetDefaultSettings();
  40. this.historyTransform = historyTransform ?? new HistoryTransform();
  41. this.outputTransform = outputTransform ?? new KeywordTextOutputStreamTransform(new[] { $"{LLama.Common.AuthorRole.User}:",
  42. $"{LLama.Common.AuthorRole.Assistant}:",
  43. $"{LLama.Common.AuthorRole.System}:"});
  44. }
  45. public ChatHistory CreateNewChat(string? instructions = "")
  46. {
  47. var history = new ChatHistory();
  48. if (instructions != null && !string.IsNullOrEmpty(instructions))
  49. {
  50. history.AddSystemMessage(instructions);
  51. }
  52. return history;
  53. }
  54. /// <inheritdoc/>
  55. public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
  56. {
  57. var settings = executionSettings != null
  58. ? ChatRequestSettings.FromRequestSettings(executionSettings)
  59. : defaultRequestSettings;
  60. var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory());
  61. var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken);
  62. var output = outputTransform.TransformAsync(result);
  63. var sb = new StringBuilder();
  64. await foreach (var token in output)
  65. {
  66. sb.Append(token);
  67. }
  68. return new List<ChatMessageContent> { new(AuthorRole.Assistant, sb.ToString()) }.AsReadOnly();
  69. }
  70. /// <inheritdoc/>
  71. public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
  72. {
  73. var settings = executionSettings != null
  74. ? ChatRequestSettings.FromRequestSettings(executionSettings)
  75. : defaultRequestSettings;
  76. var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory());
  77. var result = _model.InferAsync(prompt, settings.ToLLamaSharpInferenceParams(), cancellationToken);
  78. var output = outputTransform.TransformAsync(result);
  79. await foreach (var token in output)
  80. {
  81. yield return new StreamingChatMessageContent(AuthorRole.Assistant, token);
  82. }
  83. }
  84. }