You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

BatchedExecutorFork.cs 4.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. using LLama.Batched;
  2. using LLama.Common;
  3. using LLama.Native;
  4. using LLama.Sampling;
  5. using Spectre.Console;
  6. namespace LLama.Examples.Examples;
  7. /// <summary>
  8. /// This demonstrates generating multiple replies to the same prompt, with a shared cache
  9. /// </summary>
  10. public class BatchedExecutorFork
  11. {
  12. private const int n_split = 16;
  13. private const int n_len = 72;
  14. public static async Task Run()
  15. {
  16. string modelPath = UserSettings.GetModelPath();
  17. var parameters = new ModelParams(modelPath);
  18. using var model = LLamaWeights.LoadFromFile(parameters);
  19. var prompt = AnsiConsole.Ask("Prompt (or ENTER for default):", "Not many people know that");
  20. // Create an executor that can evaluate a batch of conversations together
  21. using var executor = new BatchedExecutor(model, parameters);
  22. // Print some info
  23. var name = executor.Model.Metadata.GetValueOrDefault("general.name", "unknown model name");
  24. Console.WriteLine($"Created executor with model: {name}");
  25. // Evaluate the initial prompt to create one conversation
  26. using var start = executor.Create();
  27. start.Prompt(prompt);
  28. await executor.Infer();
  29. // Create the root node of the tree
  30. var root = new Node(start);
  31. await AnsiConsole
  32. .Progress()
  33. .StartAsync(async progress =>
  34. {
  35. var reporter = progress.AddTask("Running Inference (1)", maxValue: n_len);
  36. // Run inference loop
  37. for (var i = 0; i < n_len; i++)
  38. {
  39. if (i != 0)
  40. await executor.Infer();
  41. // Occasionally fork all the active conversations
  42. if (i != 0 && i % n_split == 0)
  43. root.Split();
  44. // Sample all active conversations
  45. root.Sample();
  46. // Update progress bar
  47. reporter.Increment(1);
  48. reporter.Description($"Running Inference ({root.ActiveConversationCount})");
  49. }
  50. // Display results
  51. var display = new Tree(prompt);
  52. root.Display(display);
  53. AnsiConsole.Write(display);
  54. });
  55. }
  56. private class Node
  57. {
  58. private readonly StreamingTokenDecoder _decoder;
  59. private readonly DefaultSamplingPipeline _sampler;
  60. private Conversation? _conversation;
  61. private Node? _left;
  62. private Node? _right;
  63. public int ActiveConversationCount => _conversation != null ? 1 : _left!.ActiveConversationCount + _right!.ActiveConversationCount;
  64. public Node(Conversation conversation)
  65. {
  66. _sampler = new DefaultSamplingPipeline();
  67. _conversation = conversation;
  68. _decoder = new StreamingTokenDecoder(conversation.Executor.Context);
  69. }
  70. public void Sample()
  71. {
  72. if (_conversation == null)
  73. {
  74. _left?.Sample();
  75. _right?.Sample();
  76. return;
  77. }
  78. if (_conversation.RequiresInference)
  79. return;
  80. // Sample one token
  81. var ctx = _conversation.Executor.Context.NativeHandle;
  82. var token = _sampler.Sample(ctx, _conversation.Sample(), Array.Empty<LLamaToken>());
  83. _sampler.Accept(ctx, token);
  84. _decoder.Add(token);
  85. // Prompt the conversation with this token, to continue generating from there
  86. _conversation.Prompt(token);
  87. }
  88. public void Split()
  89. {
  90. if (_conversation != null)
  91. {
  92. _left = new Node(_conversation.Fork());
  93. _right = new Node(_conversation.Fork());
  94. _conversation.Dispose();
  95. _conversation = null;
  96. }
  97. else
  98. {
  99. _left?.Split();
  100. _right?.Split();
  101. }
  102. }
  103. public void Display<T>(T tree, int depth = 0)
  104. where T : IHasTreeNodes
  105. {
  106. var colors = new[] { "red", "green", "blue", "yellow", "white" };
  107. var color = colors[depth % colors.Length];
  108. var message = Markup.Escape(_decoder.Read().ReplaceLineEndings(""));
  109. var n = tree.AddNode($"[{color}]{message}[/]");
  110. _left?.Display(n, depth + 1);
  111. _right?.Display(n, depth + 1);
  112. }
  113. }
  114. }