You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

Grammar.cs 5.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Text;
  4. using LLama.Exceptions;
  5. using LLama.Native;
  6. namespace LLama.Grammars
  7. {
  8. /// <summary>
  9. /// A grammar is a set of <see cref="GrammarRule"/>s for deciding which characters are valid next. Can be used to constrain
  10. /// output to certain formats - e.g. force the model to output JSON
  11. /// </summary>
  12. public sealed class Grammar
  13. {
  14. /// <summary>
  15. /// Index of the initial rule to start from
  16. /// </summary>
  17. public ulong StartRuleIndex { get; set; }
  18. /// <summary>
  19. /// The rules which make up this grammar
  20. /// </summary>
  21. public IReadOnlyList<GrammarRule> Rules { get; }
  22. /// <summary>
  23. /// Create a new grammar from a set of rules
  24. /// </summary>
  25. /// <param name="rules">The rules which make up this grammar</param>
  26. /// <param name="startRuleIndex">Index of the initial rule to start from</param>
  27. /// <exception cref="ArgumentOutOfRangeException"></exception>
  28. public Grammar(IReadOnlyList<GrammarRule> rules, ulong startRuleIndex)
  29. {
  30. if (startRuleIndex >= (uint)rules.Count)
  31. throw new ArgumentOutOfRangeException(nameof(startRuleIndex), "startRule must be less than the number of rules");
  32. StartRuleIndex = startRuleIndex;
  33. Rules = rules;
  34. }
  35. /// <summary>
  36. /// Create a `SafeLLamaGrammarHandle` instance to use for parsing
  37. /// </summary>
  38. /// <returns></returns>
  39. public SafeLLamaGrammarHandle CreateInstance()
  40. {
  41. return SafeLLamaGrammarHandle.Create(Rules, StartRuleIndex);
  42. }
  43. /// <summary>
  44. /// Parse a string of <a href="https://github.com/ggerganov/llama.cpp/tree/master/grammars">GGML BNF</a> into a Grammar
  45. /// </summary>
  46. /// <param name="gbnf">The string to parse</param>
  47. /// <param name="startRule">Name of the start rule of this grammar</param>
  48. /// <exception cref="GrammarFormatException">Thrown if input is malformed</exception>
  49. /// <returns>A Grammar which can be converted into a SafeLLamaGrammarHandle for sampling</returns>
  50. public static Grammar Parse(string gbnf, string startRule)
  51. {
  52. var parser = new GBNFGrammarParser();
  53. return parser.Parse(gbnf, startRule);
  54. }
  55. /// <inheritdoc />
  56. public override string ToString()
  57. {
  58. var builder = new StringBuilder();
  59. PrintGrammar(builder);
  60. return builder.ToString();
  61. }
  62. private void PrintGrammar(StringBuilder output)
  63. {
  64. for (var i = 0; i < Rules.Count; i++)
  65. PrintRule(output, Rules[i]);
  66. }
  67. private void PrintRule(StringBuilder output, GrammarRule rule)
  68. {
  69. output.Append($"{rule.Name} ::= ");
  70. for (int i = 0, end = rule.Elements.Count - 1; i < end; i++)
  71. {
  72. var elem = rule.Elements[i];
  73. switch (elem.Type)
  74. {
  75. // GrammarRule has already verified that END is not being misused, no need to check again
  76. case LLamaGrammarElementType.END:
  77. break;
  78. case LLamaGrammarElementType.ALT:
  79. output.Append("| ");
  80. break;
  81. case LLamaGrammarElementType.RULE_REF:
  82. output.Append($"{Rules[(int)elem.Value].Name} ");
  83. break;
  84. case LLamaGrammarElementType.CHAR:
  85. output.Append('[');
  86. PrintGrammarChar(output, elem.Value);
  87. break;
  88. case LLamaGrammarElementType.CHAR_NOT:
  89. output.Append("[^");
  90. PrintGrammarChar(output, elem.Value);
  91. break;
  92. case LLamaGrammarElementType.CHAR_RNG_UPPER:
  93. output.Append('-');
  94. PrintGrammarChar(output, elem.Value);
  95. break;
  96. case LLamaGrammarElementType.CHAR_ALT:
  97. PrintGrammarChar(output, elem.Value);
  98. break;
  99. }
  100. if (elem.IsCharElement())
  101. {
  102. switch (rule.Elements[i + 1].Type)
  103. {
  104. case LLamaGrammarElementType.CHAR_ALT:
  105. case LLamaGrammarElementType.CHAR_RNG_UPPER:
  106. break;
  107. default:
  108. output.Append("] ");
  109. break;
  110. }
  111. }
  112. }
  113. output.AppendLine();
  114. }
  115. private static void PrintGrammarChar(StringBuilder output, uint c)
  116. {
  117. if (c >= 0x20 && c <= 0x7F)
  118. {
  119. output.Append((char)c);
  120. }
  121. else
  122. {
  123. // cop out of encoding UTF-8
  124. output.Append($"<U+{c:X4}>");
  125. }
  126. }
  127. }
  128. }