You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

BatchedExecutorFork.cs 4.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. using LLama.Batched;
  2. using LLama.Common;
  3. using LLama.Native;
  4. using LLama.Sampling;
  5. namespace LLama.Examples.Examples;
  6. /// <summary>
  7. /// This demonstrates generating multiple replies to the same prompt, with a shared cache
  8. /// </summary>
  9. public class BatchedExecutorFork
  10. {
  11. private const int n_split = 16;
  12. private const int n_len = 64;
  13. public static async Task Run()
  14. {
  15. Console.Write("Please input your model path: ");
  16. var modelPath = Console.ReadLine();
  17. var parameters = new ModelParams(modelPath);
  18. using var model = LLamaWeights.LoadFromFile(parameters);
  19. Console.WriteLine("Prompt (leave blank to select automatically):");
  20. var prompt = Console.ReadLine();
  21. if (string.IsNullOrWhiteSpace(prompt))
  22. prompt = "Not many people know that";
  23. // Create an executor that can evaluate a batch of conversations together
  24. var executor = new BatchedExecutor(model, parameters);
  25. // Print some info
  26. var name = executor.Model.Metadata.GetValueOrDefault("general.name", "unknown model name");
  27. Console.WriteLine($"Created executor with model: {name}");
  28. // Evaluate the initial prompt to create one conversation
  29. var start = executor.Prompt(prompt);
  30. await executor.Infer();
  31. // Create the root node of the tree
  32. var root = new Node(start);
  33. // Run inference loop
  34. for (var i = 0; i < n_len; i++)
  35. {
  36. if (i != 0)
  37. await executor.Infer();
  38. // Occasionally fork all the active conversations
  39. if (i != 0 && i % n_split == 0)
  40. root.Split();
  41. // Sample all active conversations
  42. root.Sample();
  43. }
  44. Console.WriteLine($"{prompt}...");
  45. root.Print(1);
  46. Console.WriteLine("Press any key to exit demo");
  47. Console.ReadKey(true);
  48. }
  49. class Node
  50. {
  51. private readonly StreamingTokenDecoder _decoder;
  52. private readonly DefaultSamplingPipeline _sampler;
  53. private Conversation? _conversation;
  54. private Node? _left;
  55. private Node? _right;
  56. public int ActiveConversationCount => _conversation != null ? 1 : _left!.ActiveConversationCount + _right!.ActiveConversationCount;
  57. public Node(Conversation conversation)
  58. {
  59. _sampler = new DefaultSamplingPipeline();
  60. _conversation = conversation;
  61. _decoder = new StreamingTokenDecoder(conversation.Executor.Context);
  62. }
  63. public void Sample()
  64. {
  65. if (_conversation == null)
  66. {
  67. _left?.Sample();
  68. _right?.Sample();
  69. return;
  70. }
  71. if (_conversation.RequiresInference)
  72. return;
  73. // Sample one token
  74. var ctx = _conversation.Executor.Context.NativeHandle;
  75. var logitsCopy = _conversation.Sample().ToArray();
  76. var token = _sampler.Sample(ctx, logitsCopy, Array.Empty<LLamaToken>());
  77. _sampler.Accept(ctx, token);
  78. _decoder.Add(token);
  79. // Prompt the conversation with this token, to continue generating from there
  80. _conversation.Prompt(token);
  81. }
  82. public void Split()
  83. {
  84. if (_conversation != null)
  85. {
  86. _left = new Node(_conversation.Fork());
  87. _right = new Node(_conversation.Fork());
  88. _conversation.Dispose();
  89. _conversation = null;
  90. }
  91. else
  92. {
  93. _left?.Split();
  94. _right?.Split();
  95. }
  96. }
  97. public void Print(int indendation)
  98. {
  99. var colors = new[] { ConsoleColor.Red, ConsoleColor.Green, ConsoleColor.Blue, ConsoleColor.Yellow, ConsoleColor.White };
  100. Console.ForegroundColor = colors[indendation % colors.Length];
  101. var message = _decoder.Read().ReplaceLineEndings("");
  102. var prefix = new string(' ', indendation * 3);
  103. var suffix = _conversation == null ? "..." : "";
  104. Console.WriteLine($"{prefix}...{message}{suffix}");
  105. _left?.Print(indendation + 2);
  106. _right?.Print(indendation + 2);
  107. }
  108. }
  109. }