You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

BatchedExecutorFork.cs 4.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. using LLama.Batched;
  2. using LLama.Common;
  3. using LLama.Native;
  4. using LLama.Sampling;
  5. using Spectre.Console;
  6. namespace LLama.Examples.Examples;
  7. /// <summary>
  8. /// This demonstrates generating multiple replies to the same prompt, with a shared cache
  9. /// </summary>
  10. public class BatchedExecutorFork
  11. {
  12. private const int n_split = 16;
  13. private const int n_len = 72;
  14. public static async Task Run()
  15. {
  16. string modelPath = UserSettings.GetModelPath();
  17. var parameters = new ModelParams(modelPath);
  18. using var model = LLamaWeights.LoadFromFile(parameters);
  19. var prompt = AnsiConsole.Ask("Prompt (or ENTER for default):", "Not many people know that");
  20. // Create an executor that can evaluate a batch of conversations together
  21. using var executor = new BatchedExecutor(model, parameters);
  22. // Print some info
  23. var name = executor.Model.Metadata.GetValueOrDefault("general.name", "unknown model name");
  24. Console.WriteLine($"Created executor with model: {name}");
  25. // Evaluate the initial prompt to create one conversation
  26. using var start = executor.Prompt(prompt);
  27. await executor.Infer();
  28. // Create the root node of the tree
  29. var root = new Node(start);
  30. await AnsiConsole
  31. .Progress()
  32. .StartAsync(async progress =>
  33. {
  34. var reporter = progress.AddTask("Running Inference (1)", maxValue: n_len);
  35. // Run inference loop
  36. for (var i = 0; i < n_len; i++)
  37. {
  38. if (i != 0)
  39. await executor.Infer();
  40. // Occasionally fork all the active conversations
  41. if (i != 0 && i % n_split == 0)
  42. root.Split();
  43. // Sample all active conversations
  44. root.Sample();
  45. // Update progress bar
  46. reporter.Increment(1);
  47. reporter.Description($"Running Inference ({root.ActiveConversationCount})");
  48. }
  49. // Display results
  50. var display = new Tree(prompt);
  51. root.Display(display);
  52. AnsiConsole.Write(display);
  53. });
  54. }
  55. private class Node
  56. {
  57. private readonly StreamingTokenDecoder _decoder;
  58. private readonly DefaultSamplingPipeline _sampler;
  59. private Conversation? _conversation;
  60. private Node? _left;
  61. private Node? _right;
  62. public int ActiveConversationCount => _conversation != null ? 1 : _left!.ActiveConversationCount + _right!.ActiveConversationCount;
  63. public Node(Conversation conversation)
  64. {
  65. _sampler = new DefaultSamplingPipeline();
  66. _conversation = conversation;
  67. _decoder = new StreamingTokenDecoder(conversation.Executor.Context);
  68. }
  69. public void Sample()
  70. {
  71. if (_conversation == null)
  72. {
  73. _left?.Sample();
  74. _right?.Sample();
  75. return;
  76. }
  77. if (_conversation.RequiresInference)
  78. return;
  79. // Sample one token
  80. var ctx = _conversation.Executor.Context.NativeHandle;
  81. var token = _sampler.Sample(ctx, _conversation.Sample(), Array.Empty<LLamaToken>());
  82. _sampler.Accept(ctx, token);
  83. _decoder.Add(token);
  84. // Prompt the conversation with this token, to continue generating from there
  85. _conversation.Prompt(token);
  86. }
  87. public void Split()
  88. {
  89. if (_conversation != null)
  90. {
  91. _left = new Node(_conversation.Fork());
  92. _right = new Node(_conversation.Fork());
  93. _conversation.Dispose();
  94. _conversation = null;
  95. }
  96. else
  97. {
  98. _left?.Split();
  99. _right?.Split();
  100. }
  101. }
  102. public void Display<T>(T tree, int depth = 0)
  103. where T : IHasTreeNodes
  104. {
  105. var colors = new[] { "red", "green", "blue", "yellow", "white" };
  106. var color = colors[depth % colors.Length];
  107. var message = _decoder.Read().ReplaceLineEndings("");
  108. var n = tree.AddNode($"[{color}]{message}[/]");
  109. _left?.Display(n, depth + 1);
  110. _right?.Display(n, depth + 1);
  111. }
  112. }
  113. }