You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

SpeechChat.cs 3.6 kB

1 year ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. using LLama.Common;
  2. using static LLama.Examples.Examples.SpeechTranscription;
  3. namespace LLama.Examples.Examples
  4. {
  5. public class SpeechChat
  6. {
  7. public static async Task Run()
  8. {
  9. if (ConsoleStyleHelpers.SelectAudioModel() is not string model) { return; }
  10. bool loadFinished = false;
  11. var loading = ConsoleStyleHelpers.LoadPrint("Loading transcription model...", () => loadFinished);
  12. using var audioServer = new AudioServer(model);
  13. loadFinished = true; loading.Wait();
  14. Console.WriteLine("Audio model loaded. Insert path for language model.");
  15. using var llamaServer = new LlamaServer(audioServer);
  16. await ConsoleStyleHelpers.WaitUntilExit();
  17. }
  18. class LlamaServer : IAudioServiceUser, IDisposable
  19. {
  20. bool isModelResponding;
  21. AudioServer audioServer;
  22. LLamaWeights model;
  23. LLamaContext context;
  24. InteractiveExecutor executor;
  25. string fullPrompt = "";
  26. bool canceled;
  27. public LlamaServer(AudioServer server)
  28. {
  29. var parameters = new ModelParams(UserSettings.GetModelPath()) { ContextSize = 1024, Seed = 1337, GpuLayerCount = 99 };
  30. model = LLamaWeights.LoadFromFile(parameters);
  31. context = model.CreateContext(parameters);
  32. executor = new InteractiveExecutor(context);
  33. (audioServer = server).ServiceUsers.Add(this);
  34. }
  35. // Whisper is struggling with single words and very short phrases without context, so it's actually better to say something like "Ok, Stop!" to have it work better.
  36. bool IAudioServiceUser.IsOfInterest(string AudioTranscription) => !isModelResponding || AudioTranscription.Contains("stop", StringComparison.CurrentCultureIgnoreCase);
  37. void IAudioServiceUser.ProcessText(string AudioTranscription)
  38. {
  39. if (isModelResponding && AudioTranscription.Contains("stop", StringComparison.CurrentCultureIgnoreCase)) { canceled = true; }
  40. else if (!isModelResponding) { _ = SendMessage(AudioTranscription); }
  41. }
  42. async Task SendMessage(string newMessage)
  43. {
  44. // While a response is queried, we want to detect short phrases/commands like 'stop',
  45. audioServer.detectionSettings = (1, 1); // ..so we lower the min Speech Detection time.
  46. isModelResponding = true;
  47. AddToPrompt($"\n{newMessage}\n", ConsoleColor.Blue);
  48. await foreach (var token in executor.InferAsync(fullPrompt))
  49. {
  50. AddToPrompt(token, ConsoleColor.Yellow);
  51. if (canceled) { AddToPrompt("[...stopped]", ConsoleColor.Red); break; }
  52. }
  53. audioServer.detectionSettings = (2, 3); // Reset back to default detection settings to avoid false positives.
  54. (isModelResponding, canceled) = (false, false); // Reset the state variables to their default.
  55. }
  56. void AddToPrompt(string msg, ConsoleColor color = ConsoleColor.Yellow)
  57. {
  58. fullPrompt += msg;
  59. Console.ForegroundColor = color;
  60. Console.Write(msg);
  61. Console.ForegroundColor = ConsoleColor.White;
  62. }
  63. void IDisposable.Dispose()
  64. {
  65. model.Dispose();
  66. context.Dispose();
  67. }
  68. }
  69. }
  70. }