You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

Prefill.cs 4.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. #pragma warning disable CS8618
  2. using System.Text;
  3. using BenchmarkDotNet.Attributes;
  4. using BenchmarkDotNet.Engines;
  5. using BenchmarkDotNet.Jobs;
  6. using LLama.Abstractions;
  7. using LLama.Common;
  8. namespace LLama.Benchmark.LLamaExecutorBenchmark
  9. {
  10. #if WINDOWS
  11. [BenchmarkDotNet.Diagnostics.Windows.Configs.NativeMemoryProfiler]
  12. #endif
  13. [BenchmarkCategory("Executor", "LLama")]
  14. [SimpleJob(RunStrategy.Monitoring, runtimeMoniker: RuntimeMoniker.Net80)]
  15. [MemoryDiagnoser]
  16. [MinIterationCount(1)]
  17. [MaxIterationCount(16)]
  18. [RPlotExporter]
  19. public class PrefillBenchmark
  20. {
  21. /// <summary>
  22. /// (prompt length, context length)
  23. /// </summary>
  24. public IEnumerable<(int, uint)> PromptAndContextLengths => new (int, uint)[]
  25. {
  26. (512, 2048),
  27. (2024, 2048)
  28. };
  29. /// <summary>
  30. /// (model path, gpu layer count)
  31. /// </summary>
  32. public IEnumerable<(string, int)> ModelAndGpuLayerCounts => new (string, int)[]
  33. // TODO: specify the native library to load here to test cpu case better.
  34. {
  35. (Path.Combine(Constants.ModelDir, Constants.Generative7BModelPath), 0),
  36. (Path.Combine(Constants.ModelDir, Constants.Generative7BModelPath), 10),
  37. (Path.Combine(Constants.ModelDir, Constants.Generative7BModelPath), 20)
  38. };
  39. public IEnumerable<ExecutorType> ExecutorTypes => new ExecutorType[]
  40. {
  41. ExecutorType.Interactive,
  42. ExecutorType.Stateless
  43. };
  44. [ParamsSource(nameof(PromptAndContextLengths))]
  45. public (int, uint) PromptAndContextLength { get; set; }
  46. [ParamsSource(nameof(ModelAndGpuLayerCounts))]
  47. public (string, int) ModelAndGpuLayerCount { get; set; }
  48. [ParamsSource(nameof(ExecutorTypes))]
  49. public ExecutorType ExecutorType { get; set; }
  50. /// <summary>
  51. /// Params used to create a model.
  52. /// </summary>
  53. public ModelParams ModelParams { get; set; }
  54. /// <summary>
  55. /// Params used in inference.
  56. /// </summary>
  57. public InferenceParams InferenceParams { get; set; }
  58. /// <summary>
  59. /// Prompt used to run text generation.
  60. /// </summary>
  61. public string Prompt { get; set; }
  62. public ILLamaExecutor Executor { get; set; }
  63. private void InitializeParamsAndModel()
  64. {
  65. ModelParams = new ModelParams(ModelAndGpuLayerCount.Item1)
  66. {
  67. ContextSize = PromptAndContextLength.Item2,
  68. GpuLayerCount = ModelAndGpuLayerCount.Item2
  69. };
  70. Prompt = File.ReadAllText(Constants.TextCompletionPromptsFilePath).Substring(0, PromptAndContextLength.Item1);
  71. InferenceParams = new InferenceParams()
  72. {
  73. Temperature = 0.6f,
  74. MaxTokens = 1 // Only prefill, no generation here.
  75. };
  76. LLamaWeights weights = LLamaWeights.LoadFromFile(ModelParams);
  77. LLamaContext context = weights.CreateContext(ModelParams);
  78. Executor = ExecutorType switch
  79. {
  80. ExecutorType.Interactive => new InteractiveExecutor(context),
  81. ExecutorType.Instruct => new InstructExecutor(context),
  82. ExecutorType.Stateless => new StatelessExecutor(weights, ModelParams),
  83. _ => throw new NotSupportedException()
  84. };
  85. }
  86. [GlobalSetup(Targets = [nameof(Basic)])]
  87. public void GlobalSetup()
  88. {
  89. InitializeParamsAndModel();
  90. }
  91. [IterationCleanup(Targets = [nameof(Basic)])]
  92. public void GlobalCleanup()
  93. {
  94. if(ExecutorType != ExecutorType.Stateless) // stateless executor always dispose its `Context` property
  95. {
  96. Executor.Context.NativeHandle.KvCacheClear();
  97. }
  98. }
  99. [Benchmark]
  100. public async Task<string> Basic()
  101. {
  102. StringBuilder sb = new();
  103. await foreach(var text in Executor.InferAsync(Prompt, InferenceParams))
  104. {
  105. sb.Append(text);
  106. }
  107. return sb.ToString();
  108. }
  109. }
  110. }