You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LlamaSharpTextGenerator.cs 5.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. using LLama;
  2. using LLama.Abstractions;
  3. using LLama.Common;
  4. using LLama.Native;
  5. using Microsoft.KernelMemory.AI;
  6. using System;
  7. using System.Collections.Generic;
  8. using System.Linq;
  9. using System.Text;
  10. using System.Threading.Tasks;
  11. namespace LLamaSharp.KernelMemory
  12. {
  13. /// <summary>
  14. /// Provides text generation for LLamaSharp.
  15. /// </summary>
  16. public class LlamaSharpTextGenerator : ITextGenerator, IDisposable
  17. {
  18. private readonly LLamaWeights _weights;
  19. private readonly StatelessExecutor _executor;
  20. private readonly LLamaContext _context;
  21. private readonly InferenceParams? _defaultInferenceParams;
  22. private bool _ownsContext = false;
  23. private bool _ownsWeights = false;
  24. public int MaxTokenTotal { get; }
  25. /// <summary>
  26. /// Initializes a new instance of the <see cref="LlamaSharpTextGenerator"/> class.
  27. /// </summary>
  28. /// <param name="config">The configuration for LLamaSharp.</param>
  29. public LlamaSharpTextGenerator(LLamaSharpConfig config)
  30. {
  31. var parameters = new ModelParams(config.ModelPath)
  32. {
  33. ContextSize = config?.ContextSize ?? 2048,
  34. Seed = config?.Seed ?? 0,
  35. GpuLayerCount = config?.GpuLayerCount ?? 20,
  36. MainGpu = config?.MainGpu ?? 0,
  37. SplitMode = config?.SplitMode ?? GPUSplitMode.None
  38. };
  39. _weights = LLamaWeights.LoadFromFile(parameters);
  40. _context = _weights.CreateContext(parameters);
  41. _executor = new StatelessExecutor(_weights, parameters);
  42. _defaultInferenceParams = config?.DefaultInferenceParams;
  43. _ownsWeights = _ownsContext = true;
  44. MaxTokenTotal = (int)parameters.ContextSize;
  45. }
  46. /// <summary>
  47. /// Initializes a new instance of the <see cref="LlamaSharpTextGenerator"/> class from reused weights, context and executor.
  48. /// If executor is not specified, then a StatelessExecutor will be created with `context.Params`. So far only `StatelessExecutor` is expected.
  49. /// </summary>
  50. /// <param name="weights">A LLamaWeights object.</param>
  51. /// <param name="context">A LLamaContext object.</param>
  52. /// <param name="executor">An executor. Currently only StatelessExecutor is expected.</param>
  53. public LlamaSharpTextGenerator(LLamaWeights weights, LLamaContext context, StatelessExecutor? executor = null, InferenceParams? inferenceParams = null)
  54. {
  55. _weights = weights;
  56. _context = context;
  57. _executor = executor ?? new StatelessExecutor(_weights, _context.Params);
  58. _defaultInferenceParams = inferenceParams;
  59. MaxTokenTotal = (int)_context.Params.ContextSize;
  60. }
  61. /// <inheritdoc/>
  62. public void Dispose()
  63. {
  64. if (_ownsWeights)
  65. {
  66. _weights?.Dispose();
  67. }
  68. if (_ownsContext)
  69. {
  70. _context.Dispose();
  71. }
  72. }
  73. /// <inheritdoc/>
  74. public IAsyncEnumerable<string> GenerateTextAsync(string prompt, TextGenerationOptions options, CancellationToken cancellationToken = default)
  75. {
  76. return _executor.InferAsync(prompt, OptionsToParams(options, this._defaultInferenceParams), cancellationToken: cancellationToken);
  77. }
  78. private static InferenceParams OptionsToParams(TextGenerationOptions options, InferenceParams? defaultParams)
  79. {
  80. if (defaultParams != null)
  81. {
  82. return defaultParams with
  83. {
  84. AntiPrompts = defaultParams.AntiPrompts.Concat(options.StopSequences).ToList().AsReadOnly(),
  85. Temperature = options.Temperature == defaultParams.Temperature ? defaultParams.Temperature : (float)options.Temperature,
  86. MaxTokens = options.MaxTokens ?? defaultParams.MaxTokens,
  87. FrequencyPenalty = options.FrequencyPenalty == defaultParams.FrequencyPenalty ? defaultParams.FrequencyPenalty : (float)options.FrequencyPenalty,
  88. PresencePenalty = options.PresencePenalty == defaultParams.PresencePenalty ? defaultParams.PresencePenalty : (float)options.PresencePenalty,
  89. TopP = options.TopP == defaultParams.TopP ? defaultParams.TopP : (float)options.TopP
  90. };
  91. }
  92. else
  93. {
  94. return new InferenceParams()
  95. {
  96. AntiPrompts = options.StopSequences.ToList().AsReadOnly(),
  97. Temperature = (float)options.Temperature,
  98. MaxTokens = options.MaxTokens ?? 1024,
  99. FrequencyPenalty = (float)options.FrequencyPenalty,
  100. PresencePenalty = (float)options.PresencePenalty,
  101. TopP = (float)options.TopP,
  102. };
  103. }
  104. }
  105. /// <inheritdoc/>
  106. public int CountTokens(string text) => _context.Tokenize(text).Length;
  107. }
  108. }