You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

Grammar.cs 5.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Text;
  4. using LLama.Exceptions;
  5. using LLama.Native;
  6. namespace LLama.Grammars
  7. {
  8. /// <summary>
  9. /// A grammar is a set of <see cref="GrammarRule"/>s for deciding which characters are valid next. Can be used to constrain
  10. /// output to certain formats - e.g. force the model to output JSON
  11. /// </summary>
  12. public sealed class Grammar
  13. {
  14. /// <summary>
  15. /// Index of the initial rule to start from
  16. /// </summary>
  17. public ulong StartRuleIndex { get; set; }
  18. /// <summary>
  19. /// The rules which make up this grammar
  20. /// </summary>
  21. public IReadOnlyList<GrammarRule> Rules { get; }
  22. /// <summary>
  23. /// Create a new grammar from a set of rules
  24. /// </summary>
  25. /// <param name="rules">The rules which make up this grammar</param>
  26. /// <param name="startRuleIndex">Index of the initial rule to start from</param>
  27. /// <exception cref="ArgumentOutOfRangeException"></exception>
  28. public Grammar(IReadOnlyList<GrammarRule> rules, ulong startRuleIndex)
  29. {
  30. if (startRuleIndex >= (uint)rules.Count)
  31. throw new ArgumentOutOfRangeException(nameof(startRuleIndex), "startRule must be less than the number of rules");
  32. StartRuleIndex = startRuleIndex;
  33. Rules = rules;
  34. }
  35. /// <summary>
  36. /// Create a `SafeLLamaGrammarHandle` instance to use for parsing
  37. /// </summary>
  38. /// <returns></returns>
  39. public SafeLLamaGrammarHandle CreateInstance()
  40. {
  41. return SafeLLamaGrammarHandle.Create(Rules, StartRuleIndex);
  42. }
  43. /// <summary>
  44. /// Parse a string of <a href="https://github.com/ggerganov/llama.cpp/tree/master/grammars">GGML BNF</a> into a Grammar
  45. /// </summary>
  46. /// <param name="gbnf">The string to parse</param>
  47. /// <param name="startRule">Name of the start rule of this grammar</param>
  48. /// <exception cref="GrammarFormatException">Thrown if input is malformed</exception>
  49. /// <returns>A Grammar which can be converted into a SafeLLamaGrammarHandle for sampling</returns>
  50. public static Grammar Parse(string gbnf, string startRule)
  51. {
  52. var parser = new GBNFGrammarParser();
  53. return parser.Parse(gbnf, startRule);
  54. }
  55. /// <inheritdoc />
  56. public override string ToString()
  57. {
  58. var builder = new StringBuilder();
  59. PrintGrammar(builder);
  60. return builder.ToString();
  61. }
  62. private void PrintGrammar(StringBuilder output)
  63. {
  64. for (var i = 0; i < Rules.Count; i++)
  65. PrintRule(output, (uint)i, Rules[i]);
  66. }
  67. private void PrintRule(StringBuilder output, uint ruleId, GrammarRule rule)
  68. {
  69. output.Append($"{rule.Name} ::= ");
  70. for (int i = 0, end = rule.Elements.Count - 1; i < end; i++)
  71. {
  72. var elem = rule.Elements[i];
  73. switch (elem.Type)
  74. {
  75. case LLamaGrammarElementType.END:
  76. throw new GrammarFormatException($"Unexpected end of rule: {ruleId}, {i}");
  77. case LLamaGrammarElementType.ALT:
  78. output.Append("| ");
  79. break;
  80. case LLamaGrammarElementType.RULE_REF:
  81. output.Append($"{Rules[(int)elem.Value].Name} ");
  82. break;
  83. case LLamaGrammarElementType.CHAR:
  84. output.Append('[');
  85. PrintGrammarChar(output, elem.Value);
  86. break;
  87. case LLamaGrammarElementType.CHAR_NOT:
  88. output.Append("[^");
  89. PrintGrammarChar(output, elem.Value);
  90. break;
  91. case LLamaGrammarElementType.CHAR_RNG_UPPER:
  92. if (i == 0 || !rule.Elements[i - 1].IsCharElement())
  93. {
  94. throw new GrammarFormatException(
  95. $"LLamaGrammarElementType.CHAR_RNG_UPPER without preceding char: {ruleId},{i}");
  96. }
  97. output.Append('-');
  98. PrintGrammarChar(output, elem.Value);
  99. break;
  100. case LLamaGrammarElementType.CHAR_ALT:
  101. if (i == 0 || !rule.Elements[i - 1].IsCharElement())
  102. {
  103. throw new GrammarFormatException(
  104. $"LLamaGrammarElementType.CHAR_ALT without preceding char: {ruleId},{i}");
  105. }
  106. PrintGrammarChar(output, elem.Value);
  107. break;
  108. }
  109. if (elem.IsCharElement())
  110. {
  111. switch (rule.Elements[i + 1].Type)
  112. {
  113. case LLamaGrammarElementType.CHAR_ALT:
  114. case LLamaGrammarElementType.CHAR_RNG_UPPER:
  115. break;
  116. default:
  117. output.Append("] ");
  118. break;
  119. }
  120. }
  121. }
  122. output.AppendLine();
  123. }
  124. private static void PrintGrammarChar(StringBuilder output, uint c)
  125. {
  126. if (c >= 0x20 && c <= 0x7F)
  127. {
  128. output.Append((char)c);
  129. }
  130. else
  131. {
  132. // cop out of encoding UTF-8
  133. output.Append($"<U+{c:X4}>");
  134. }
  135. }
  136. }
  137. }