You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

GrammarParserTest.cs 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. using LLama.Exceptions;
  2. using LLama.Native;
  3. using LLama.Grammars;
  4. namespace LLama.Unittest
  5. {
  6. /// <summary>
  7. /// Source:
  8. /// https://github.com/ggerganov/llama.cpp/blob/6381d4e110bd0ec02843a60bbeb8b6fc37a9ace9/tests/test-grammar-parser.cpp
  9. ///
  10. /// The commit hash from URL is the actual commit hash that reflects current C# code.
  11. /// </summary>
  12. public sealed class GrammarParserTest
  13. {
  14. [Fact]
  15. public void ParseComplexGrammar()
  16. {
  17. GBNFGrammarParser parsedGrammar = new GBNFGrammarParser();
  18. string grammarBytes = @"root ::= (expr ""="" term ""\n"")+
  19. expr ::= term ([-+*/] term)*
  20. term ::= [0-9]+";
  21. var state = parsedGrammar.Parse(grammarBytes, "root");
  22. Assert.Equal(0ul, state.StartRuleIndex);
  23. var expected = new List<KeyValuePair<string, uint>>
  24. {
  25. new KeyValuePair<string, uint>("expr", 2),
  26. new KeyValuePair<string, uint>("expr_5", 5),
  27. new KeyValuePair<string, uint>("expr_6", 6),
  28. new KeyValuePair<string, uint>("root", 0),
  29. new KeyValuePair<string, uint>("root_1", 1),
  30. new KeyValuePair<string, uint>("root_4", 4),
  31. new KeyValuePair<string, uint>("term", 3),
  32. new KeyValuePair<string, uint>("term_7", 7),
  33. };
  34. foreach (var symbol in expected)
  35. {
  36. var rule = state.Rules[(int)symbol.Value];
  37. Assert.Equal(symbol.Key, rule.Name);
  38. }
  39. var expectedRules = new List<LLamaGrammarElement>
  40. {
  41. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 4),
  42. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  43. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 2),
  44. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 61),
  45. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  46. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 10),
  47. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  48. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  49. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 6),
  50. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  51. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 7),
  52. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  53. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 1),
  54. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 4),
  55. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  56. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 1),
  57. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  58. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 45),
  59. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 43),
  60. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 42),
  61. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 47),
  62. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  63. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  64. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 5),
  65. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 6),
  66. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  67. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  68. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 48),
  69. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 57),
  70. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 7),
  71. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  72. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 48),
  73. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 57),
  74. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  75. };
  76. uint index = 0;
  77. foreach (var rule in state.Rules)
  78. {
  79. // compare rule to expected rule
  80. for (uint i = 0; i < rule.Elements.Count; i++)
  81. {
  82. var element = rule.Elements[(int)i];
  83. var expectedElement = expectedRules[(int)index];
  84. // Pretty print error message before asserting
  85. if (expectedElement.Type != element.Type || expectedElement.Value != element.Value)
  86. {
  87. Console.Error.WriteLine($"index: {index}");
  88. Console.Error.WriteLine($"expected_element: {expectedElement.Type}, {expectedElement.Value}");
  89. Console.Error.WriteLine($"actual_element: {element.Type}, {element.Value}");
  90. Console.Error.WriteLine("expected_element != actual_element");
  91. }
  92. Assert.Equal(expectedElement.Type, element.Type);
  93. Assert.Equal(expectedElement.Value, element.Value);
  94. index++;
  95. }
  96. }
  97. Assert.NotEmpty(state.Rules);
  98. }
  99. [Fact]
  100. public void ParseExtraComplexGrammar()
  101. {
  102. GBNFGrammarParser parsedGrammar = new GBNFGrammarParser();
  103. string grammarBytes = @"
  104. root ::= (expr ""="" ws term ""\n"")+
  105. expr ::= term ([-+*/] term)*
  106. term ::= ident | num | ""("" ws expr "")"" ws
  107. ident ::= [a-z] [a-z0-9_]* ws
  108. num ::= [0-9]+ ws
  109. ws ::= [ \t\n]*
  110. ";
  111. var state = parsedGrammar.Parse(grammarBytes, "root");
  112. Assert.Equal(0ul, state.StartRuleIndex);
  113. var expected = new List<KeyValuePair<string, uint>>
  114. {
  115. new KeyValuePair<string, uint>("expr", 2),
  116. new KeyValuePair<string, uint>("expr_6", 6),
  117. new KeyValuePair<string, uint>("expr_7", 7),
  118. new KeyValuePair<string, uint>("ident", 8),
  119. new KeyValuePair<string, uint>("ident_10", 10),
  120. new KeyValuePair<string, uint>("num", 9),
  121. new KeyValuePair<string, uint>("num_11", 11),
  122. new KeyValuePair<string, uint>("root", 0),
  123. new KeyValuePair<string, uint>("root_1", 1),
  124. new KeyValuePair<string, uint>("root_5", 5),
  125. new KeyValuePair<string, uint>("term", 4),
  126. new KeyValuePair<string, uint>("ws", 3),
  127. new KeyValuePair<string, uint>("ws_12", 12),
  128. };
  129. foreach (var symbol in expected)
  130. {
  131. var rule = state.Rules[(int)symbol.Value];
  132. Assert.Equal(symbol.Key, rule.Name);
  133. }
  134. var expectedRules = new List<LLamaGrammarElement>
  135. {
  136. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 5),
  137. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  138. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 2),
  139. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 61),
  140. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  141. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 4),
  142. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 10),
  143. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  144. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 4),
  145. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 7),
  146. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  147. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 12),
  148. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  149. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 8),
  150. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  151. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 9),
  152. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  153. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 40),
  154. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  155. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 2),
  156. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 41),
  157. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  158. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  159. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 1),
  160. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 5),
  161. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  162. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 1),
  163. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  164. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 45),
  165. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 43),
  166. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 42),
  167. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 47),
  168. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 4),
  169. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  170. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 6),
  171. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 7),
  172. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  173. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  174. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 97),
  175. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 122),
  176. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 10),
  177. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  178. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  179. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 11),
  180. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  181. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  182. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 97),
  183. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 122),
  184. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 48),
  185. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 57),
  186. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 95),
  187. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 10),
  188. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  189. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  190. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 48),
  191. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 57),
  192. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 11),
  193. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  194. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 48),
  195. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 57),
  196. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  197. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 32),
  198. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 9),
  199. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 10),
  200. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 12),
  201. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  202. new LLamaGrammarElement(LLamaGrammarElementType.END, 0)
  203. };
  204. uint index = 0;
  205. foreach (var rule in state.Rules)
  206. {
  207. // compare rule to expected rule
  208. for (uint i = 0; i < rule.Elements.Count; i++)
  209. {
  210. var element = rule.Elements[(int)i];
  211. var expectedElement = expectedRules[(int)index];
  212. // Pretty print error message before asserting
  213. if (expectedElement.Type != element.Type || expectedElement.Value != element.Value)
  214. {
  215. Console.Error.WriteLine($"index: {index}");
  216. Console.Error.WriteLine($"expected_element: {expectedElement.Type}, {expectedElement.Value}");
  217. Console.Error.WriteLine($"actual_element: {element.Type}, {element.Value}");
  218. Console.Error.WriteLine("expected_element != actual_element");
  219. }
  220. Assert.Equal(expectedElement.Type, element.Type);
  221. Assert.Equal(expectedElement.Value, element.Value);
  222. index++;
  223. }
  224. }
  225. Assert.NotEmpty(state.Rules);
  226. }
  227. [Fact]
  228. public void InvalidRuleNoElements()
  229. {
  230. Assert.Throws<ArgumentException>(() =>
  231. {
  232. // ReSharper disable once ObjectCreationAsStatement
  233. new GrammarRule("name", Array.Empty<LLamaGrammarElement>());
  234. });
  235. }
  236. [Fact]
  237. public void InvalidRuleNoEndElement()
  238. {
  239. Assert.Throws<ArgumentException>(() =>
  240. {
  241. // ReSharper disable once ObjectCreationAsStatement
  242. new GrammarRule("name", new[]
  243. {
  244. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0)
  245. });
  246. });
  247. }
  248. [Fact]
  249. public void InvalidRuleExtraEndElement()
  250. {
  251. Assert.Throws<GrammarUnexpectedEndElement>(() =>
  252. {
  253. // ReSharper disable once ObjectCreationAsStatement
  254. new GrammarRule("name", new[]
  255. {
  256. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  257. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  258. new LLamaGrammarElement(LLamaGrammarElementType.END, 0)
  259. });
  260. });
  261. }
  262. [Fact]
  263. public void InvalidRuleMalformedRange()
  264. {
  265. Assert.Throws<GrammarUnexpectedCharRngElement>(() =>
  266. {
  267. // ReSharper disable once ObjectCreationAsStatement
  268. new GrammarRule("name", new[]
  269. {
  270. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  271. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 0),
  272. new LLamaGrammarElement(LLamaGrammarElementType.END, 0)
  273. });
  274. });
  275. }
  276. [Fact]
  277. public void InvalidRuleMalformedCharAlt()
  278. {
  279. Assert.Throws<GrammarUnexpectedCharAltElement>(() =>
  280. {
  281. // ReSharper disable once ObjectCreationAsStatement
  282. new GrammarRule("name", new[]
  283. {
  284. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 0),
  285. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 0),
  286. new LLamaGrammarElement(LLamaGrammarElementType.END, 0)
  287. });
  288. });
  289. }
  290. [Fact]
  291. public void InvalidRuleElement()
  292. {
  293. Assert.Throws<ArgumentException>(() =>
  294. {
  295. // ReSharper disable once ObjectCreationAsStatement
  296. new GrammarRule("name", new[]
  297. {
  298. new LLamaGrammarElement((LLamaGrammarElementType)99999, 0),
  299. new LLamaGrammarElement(LLamaGrammarElementType.END, 0)
  300. });
  301. });
  302. }
  303. }
  304. }