You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

GrammarParserTest.cs 20 kB


  1. using LLama.Exceptions;
  2. using LLama.Native;
  3. using LLama.Grammars;
  4. namespace LLama.Unittest
  5. {
  6. /// <summary>
  7. /// Source:
  8. /// https://github.com/ggerganov/llama.cpp/blob/6381d4e110bd0ec02843a60bbeb8b6fc37a9ace9/tests/test-grammar-parser.cpp
  9. ///
  10. /// The commit hash from URL is the actual commit hash that reflects current C# code.
  11. /// </summary>
  12. public sealed class GrammarParserTest
  13. {
  14. private static void CheckGrammar(string grammar, string rootRule, List<KeyValuePair<string, uint>> expected, List<LLamaGrammarElement> expectedRules)
  15. {
  16. var state = Grammar.Parse(grammar, rootRule);
  17. Assert.Equal(0ul, state.StartRuleIndex);
  18. foreach (var symbol in expected)
  19. {
  20. var rule = state.Rules[(int)symbol.Value];
  21. Assert.Equal(symbol.Key, rule.Name);
  22. }
  23. uint index = 0;
  24. foreach (var rule in state.Rules)
  25. {
  26. // compare rule to expected rule
  27. for (uint i = 0; i < rule.Elements.Count; i++)
  28. {
  29. var element = rule.Elements[(int)i];
  30. var expectedElement = expectedRules[(int)index];
  31. // Pretty print error message before asserting
  32. if (expectedElement.Type != element.Type || expectedElement.Value != element.Value)
  33. {
  34. Console.Error.WriteLine($"index: {index}");
  35. Console.Error.WriteLine($"expected_element: {expectedElement.Type}, {expectedElement.Value}");
  36. Console.Error.WriteLine($"actual_element: {element.Type}, {element.Value}");
  37. Console.Error.WriteLine("expected_element != actual_element");
  38. }
  39. Assert.Equal(expectedElement.Type, element.Type);
  40. Assert.Equal(expectedElement.Value, element.Value);
  41. index++;
  42. }
  43. }
  44. Assert.NotEmpty(state.Rules);
  45. }
  46. [Fact]
  47. public void ParseComplexGrammar()
  48. {
  49. var grammarBytes = @"root ::= (expr ""="" term ""\n"")+
  50. expr ::= term ([-\x2b\x2A/] term)*
  51. term ::= [\x30-\x39]+";
  52. var expected = new List<KeyValuePair<string, uint>>
  53. {
  54. new KeyValuePair<string, uint>("expr", 2),
  55. new KeyValuePair<string, uint>("expr_5", 5),
  56. new KeyValuePair<string, uint>("expr_6", 6),
  57. new KeyValuePair<string, uint>("root", 0),
  58. new KeyValuePair<string, uint>("root_1", 1),
  59. new KeyValuePair<string, uint>("root_4", 4),
  60. new KeyValuePair<string, uint>("term", 3),
  61. new KeyValuePair<string, uint>("term_7", 7),
  62. };
  63. var expectedRules = new List<LLamaGrammarElement>
  64. {
  65. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 4),
  66. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  67. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 2),
  68. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 61),
  69. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  70. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 10),
  71. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  72. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  73. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 6),
  74. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  75. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 7),
  76. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  77. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 1),
  78. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 4),
  79. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  80. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 1),
  81. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  82. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 45),
  83. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 43),
  84. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 42),
  85. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 47),
  86. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  87. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  88. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 5),
  89. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 6),
  90. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  91. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  92. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 48),
  93. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 57),
  94. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 7),
  95. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  96. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 48),
  97. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 57),
  98. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  99. };
  100. CheckGrammar(grammarBytes, "root", expected, expectedRules);
  101. }
  102. [Fact]
  103. public void ParseExtraComplexGrammar()
  104. {
  105. string grammarBytes = @"
  106. root ::= (expr ""="" ws term ""\n"")+
  107. expr ::= term ([-+*/] term)*
  108. term ::= ident | num | ""("" ws expr "")"" ws
  109. ident ::= [a-z] [a-z0-9_]* ws
  110. num ::= [0-9]+ ws
  111. ws ::= [ \t\n]*
  112. ";
  113. var expected = new List<KeyValuePair<string, uint>>
  114. {
  115. new KeyValuePair<string, uint>("expr", 2),
  116. new KeyValuePair<string, uint>("expr_6", 6),
  117. new KeyValuePair<string, uint>("expr_7", 7),
  118. new KeyValuePair<string, uint>("ident", 8),
  119. new KeyValuePair<string, uint>("ident_10", 10),
  120. new KeyValuePair<string, uint>("num", 9),
  121. new KeyValuePair<string, uint>("num_11", 11),
  122. new KeyValuePair<string, uint>("root", 0),
  123. new KeyValuePair<string, uint>("root_1", 1),
  124. new KeyValuePair<string, uint>("root_5", 5),
  125. new KeyValuePair<string, uint>("term", 4),
  126. new KeyValuePair<string, uint>("ws", 3),
  127. new KeyValuePair<string, uint>("ws_12", 12),
  128. };
  129. var expectedRules = new List<LLamaGrammarElement>
  130. {
  131. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 5),
  132. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  133. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 2),
  134. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 61),
  135. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  136. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 4),
  137. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 10),
  138. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  139. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 4),
  140. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 7),
  141. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  142. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 12),
  143. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  144. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 8),
  145. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  146. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 9),
  147. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  148. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 40),
  149. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  150. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 2),
  151. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 41),
  152. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  153. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  154. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 1),
  155. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 5),
  156. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  157. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 1),
  158. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  159. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 45),
  160. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 43),
  161. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 42),
  162. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 47),
  163. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 4),
  164. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  165. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 6),
  166. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 7),
  167. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  168. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  169. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 97),
  170. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 122),
  171. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 10),
  172. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  173. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  174. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 11),
  175. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  176. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  177. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 97),
  178. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 122),
  179. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 48),
  180. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 57),
  181. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 95),
  182. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 10),
  183. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  184. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  185. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 48),
  186. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 57),
  187. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 11),
  188. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  189. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 48),
  190. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 57),
  191. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  192. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 32),
  193. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 9),
  194. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 10),
  195. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 12),
  196. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  197. new LLamaGrammarElement(LLamaGrammarElementType.END, 0)
  198. };
  199. CheckGrammar(grammarBytes, "root", expected, expectedRules);
  200. }
  201. [Fact]
  202. public void ParseGrammarNotSequence()
  203. {
  204. var grammarBytes = @"root ::= [^a]";
  205. var expected = new List<KeyValuePair<string, uint>>
  206. {
  207. new KeyValuePair<string, uint>("root", 0),
  208. };
  209. var expectedRules = new List<LLamaGrammarElement>
  210. {
  211. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_NOT, 97),
  212. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  213. };
  214. CheckGrammar(grammarBytes, "root", expected, expectedRules);
  215. }
  216. [Fact]
  217. public void ParseGrammarWithMultibyteCharacter()
  218. {
  219. var grammarBytes = @"root ::= [罗]*";
  220. var expected = new List<KeyValuePair<string, uint>>
  221. {
  222. new KeyValuePair<string, uint>("root", 0),
  223. new KeyValuePair<string, uint>("root_1", 1),
  224. };
  225. var expectedRules = new List<LLamaGrammarElement>
  226. {
  227. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 1),
  228. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  229. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 32599),
  230. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 1),
  231. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  232. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  233. };
  234. CheckGrammar(grammarBytes, "root", expected, expectedRules);
  235. }
  236. [Fact]
  237. public void InvalidGrammarMissingRuleDefinition()
  238. {
  239. var parsedGrammar = new GBNFGrammarParser();
  240. var grammarBytes = @"root := [^a]";
  241. Assert.Throws<GrammarExpectedNext>(() =>
  242. {
  243. parsedGrammar.Parse(grammarBytes, "root");
  244. });
  245. }
  246. [Fact]
  247. public void InvalidGrammarNoClosingBracket()
  248. {
  249. var parsedGrammar = new GBNFGrammarParser();
  250. var grammarBytes = @"
  251. root ::= (expr ""="" ws term ""\n""+ ## <--- Mismatched brackets on this line
  252. expr ::= term ([-+*/] term)*
  253. term ::= ident | num | ""("" ws expr "")"" ws
  254. ident ::= [a-z] [a-z0-9_]* ws
  255. num ::= [0-9]+ ws
  256. ws ::= [ \t\n]*
  257. ";
  258. Assert.Throws<GrammarExpectedNext>(() =>
  259. {
  260. parsedGrammar.Parse(grammarBytes, "root");
  261. });
  262. }
  263. [Fact]
  264. public void InvalidGrammarNoName()
  265. {
  266. var parsedGrammar = new GBNFGrammarParser();
  267. var grammarBytes = @"
  268. root ::= (expr ""="" ws term ""\n"")+
  269. ::= term ([-+*/] term)* ## <--- Missing a name for this rule!
  270. term ::= ident | num | ""("" ws expr "")"" ws
  271. ident ::= [a-z] [a-z0-9_]* ws
  272. num ::= [0-9]+ ws
  273. ws ::= [ \t\n]*
  274. ";
  275. Assert.Throws<GrammarExpectedName>(() =>
  276. {
  277. parsedGrammar.Parse(grammarBytes, "root");
  278. });
  279. }
  280. [Fact]
  281. public void InvalidGrammarBadHex()
  282. {
  283. var parsedGrammar = new GBNFGrammarParser();
  284. var grammarBytes = @"
  285. root ::= (expr ""="" ws term ""\n"")+
  286. expr ::= term ([-+*/] term)*
  287. term ::= ident | num | ""("" ws expr "")"" ws
  288. ident ::= [a-z] [a-z0-9_]* ws
  289. num ::= [0-\xQQ]+ ws ## <--- `\xQQ` is not valid hex!
  290. ws ::= [ \t\n]*
  291. ";
  292. Assert.Throws<GrammarUnexpectedHexCharsCount>(() =>
  293. {
  294. parsedGrammar.Parse(grammarBytes, "root");
  295. });
  296. }
  297. [Fact]
  298. public void InvalidGrammarBadEscapeCharacter()
  299. {
  300. var parsedGrammar = new GBNFGrammarParser();
  301. var grammarBytes = @"
  302. root ::= (expr ""="" ws term ""\z"")+ ## <--- `\z` is not a valid escape character
  303. expr ::= term ([-+*/] term)*
  304. term ::= ident | num | ""("" ws expr "")"" ws
  305. ident ::= [a-z] [a-z0-9_]* ws
  306. num ::= [0-9]+ ws
  307. ws ::= [ \t\n]*
  308. ";
  309. Assert.Throws<GrammarUnknownEscapeCharacter>(() =>
  310. {
  311. parsedGrammar.Parse(grammarBytes, "root");
  312. });
  313. }
  314. [Fact]
  315. public void InvalidGrammarUnexpectedEndOfInput()
  316. {
  317. var parsedGrammar = new GBNFGrammarParser();
  318. var grammarBytes = @"root ::= (expr ""="" ws term ""\";
  319. Assert.Throws<GrammarUnexpectedEndOfInput>(() =>
  320. {
  321. parsedGrammar.Parse(grammarBytes, "root");
  322. });
  323. }
  324. [Fact]
  325. public void InvalidRuleNoElements()
  326. {
  327. Assert.Throws<ArgumentException>(() =>
  328. {
  329. // ReSharper disable once ObjectCreationAsStatement
  330. new GrammarRule("name", Array.Empty<LLamaGrammarElement>());
  331. });
  332. }
  333. [Fact]
  334. public void InvalidRuleNoEndElement()
  335. {
  336. Assert.Throws<ArgumentException>(() =>
  337. {
  338. // ReSharper disable once ObjectCreationAsStatement
  339. new GrammarRule("name", new[]
  340. {
  341. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0)
  342. });
  343. });
  344. }
  345. [Fact]
  346. public void InvalidRuleExtraEndElement()
  347. {
  348. Assert.Throws<GrammarUnexpectedEndElement>(() =>
  349. {
  350. // ReSharper disable once ObjectCreationAsStatement
  351. new GrammarRule("name", new[]
  352. {
  353. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  354. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  355. new LLamaGrammarElement(LLamaGrammarElementType.END, 0)
  356. });
  357. });
  358. }
  359. [Fact]
  360. public void InvalidRuleMalformedRange()
  361. {
  362. Assert.Throws<GrammarUnexpectedCharRngElement>(() =>
  363. {
  364. // ReSharper disable once ObjectCreationAsStatement
  365. new GrammarRule("name", new[]
  366. {
  367. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  368. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 0),
  369. new LLamaGrammarElement(LLamaGrammarElementType.END, 0)
  370. });
  371. });
  372. }
  373. [Fact]
  374. public void InvalidRuleMalformedCharAlt()
  375. {
  376. Assert.Throws<GrammarUnexpectedCharAltElement>(() =>
  377. {
  378. // ReSharper disable once ObjectCreationAsStatement
  379. new GrammarRule("name", new[]
  380. {
  381. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 0),
  382. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 0),
  383. new LLamaGrammarElement(LLamaGrammarElementType.END, 0)
  384. });
  385. });
  386. }
  387. [Fact]
  388. public void InvalidRuleElement()
  389. {
  390. Assert.Throws<ArgumentException>(() =>
  391. {
  392. // ReSharper disable once ObjectCreationAsStatement
  393. new GrammarRule("name", new[]
  394. {
  395. new LLamaGrammarElement((LLamaGrammarElementType)99999, 0),
  396. new LLamaGrammarElement(LLamaGrammarElementType.END, 0)
  397. });
  398. });
  399. }
  400. }
  401. }