You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

GrammarParserTest.cs 20 kB


  1. using System.Text;
  2. using LLama.Exceptions;
  3. using LLama.Native;
  4. using LLama.Grammars;
  5. namespace LLama.Unittest
  6. {
  7. /// <summary>
  8. /// Source:
  9. /// https://github.com/ggerganov/llama.cpp/blob/6381d4e110bd0ec02843a60bbeb8b6fc37a9ace9/tests/test-grammar-parser.cpp
  10. ///
  11. /// The commit hash from URL is the actual commit hash that reflects current C# code.
  12. /// </summary>
  13. public sealed class GrammarParserTest
  14. {
  15. private static void CheckGrammar(string grammar, string rootRule, List<KeyValuePair<string, uint>> expected, List<LLamaGrammarElement> expectedRules)
  16. {
  17. var state = Grammar.Parse(grammar, rootRule);
  18. Assert.Equal(0ul, state.StartRuleIndex);
  19. foreach (var symbol in expected)
  20. {
  21. var rule = state.Rules[(int)symbol.Value];
  22. Assert.Equal(symbol.Key, rule.Name);
  23. }
  24. uint index = 0;
  25. foreach (var rule in state.Rules)
  26. {
  27. // compare rule to expected rule
  28. for (uint i = 0; i < rule.Elements.Count; i++)
  29. {
  30. var element = rule.Elements[(int)i];
  31. var expectedElement = expectedRules[(int)index];
  32. // Pretty print error message before asserting
  33. if (expectedElement.Type != element.Type || expectedElement.Value != element.Value)
  34. {
  35. Console.Error.WriteLine($"index: {index}");
  36. Console.Error.WriteLine($"expected_element: {expectedElement.Type}, {expectedElement.Value}");
  37. Console.Error.WriteLine($"actual_element: {element.Type}, {element.Value}");
  38. Console.Error.WriteLine("expected_element != actual_element");
  39. }
  40. Assert.Equal(expectedElement.Type, element.Type);
  41. Assert.Equal(expectedElement.Value, element.Value);
  42. index++;
  43. }
  44. }
  45. Assert.NotEmpty(state.Rules);
  46. }
  47. [Fact]
  48. public void ParseComplexGrammar()
  49. {
  50. var grammarBytes = @"root ::= (expr ""="" term ""\n"")+
  51. expr ::= term ([-\x2b\x2A/] term)*
  52. term ::= [\x30-\x39]+";
  53. var expected = new List<KeyValuePair<string, uint>>
  54. {
  55. new KeyValuePair<string, uint>("expr", 2),
  56. new KeyValuePair<string, uint>("expr_5", 5),
  57. new KeyValuePair<string, uint>("expr_6", 6),
  58. new KeyValuePair<string, uint>("root", 0),
  59. new KeyValuePair<string, uint>("root_1", 1),
  60. new KeyValuePair<string, uint>("root_4", 4),
  61. new KeyValuePair<string, uint>("term", 3),
  62. new KeyValuePair<string, uint>("term_7", 7),
  63. };
  64. var expectedRules = new List<LLamaGrammarElement>
  65. {
  66. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 4),
  67. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  68. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 2),
  69. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 61),
  70. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  71. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 10),
  72. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  73. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  74. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 6),
  75. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  76. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 7),
  77. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  78. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 1),
  79. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 4),
  80. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  81. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 1),
  82. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  83. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 45),
  84. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 43),
  85. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 42),
  86. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 47),
  87. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  88. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  89. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 5),
  90. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 6),
  91. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  92. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  93. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 48),
  94. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 57),
  95. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 7),
  96. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  97. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 48),
  98. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 57),
  99. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  100. };
  101. CheckGrammar(grammarBytes, "root", expected, expectedRules);
  102. }
  103. [Fact]
  104. public void ParseExtraComplexGrammar()
  105. {
  106. string grammarBytes = @"
  107. root ::= (expr ""="" ws term ""\n"")+
  108. expr ::= term ([-+*/] term)*
  109. term ::= ident | num | ""("" ws expr "")"" ws
  110. ident ::= [a-z] [a-z0-9_]* ws
  111. num ::= [0-9]+ ws
  112. ws ::= [ \t\n]*
  113. ";
  114. var expected = new List<KeyValuePair<string, uint>>
  115. {
  116. new KeyValuePair<string, uint>("expr", 2),
  117. new KeyValuePair<string, uint>("expr_6", 6),
  118. new KeyValuePair<string, uint>("expr_7", 7),
  119. new KeyValuePair<string, uint>("ident", 8),
  120. new KeyValuePair<string, uint>("ident_10", 10),
  121. new KeyValuePair<string, uint>("num", 9),
  122. new KeyValuePair<string, uint>("num_11", 11),
  123. new KeyValuePair<string, uint>("root", 0),
  124. new KeyValuePair<string, uint>("root_1", 1),
  125. new KeyValuePair<string, uint>("root_5", 5),
  126. new KeyValuePair<string, uint>("term", 4),
  127. new KeyValuePair<string, uint>("ws", 3),
  128. new KeyValuePair<string, uint>("ws_12", 12),
  129. };
  130. var expectedRules = new List<LLamaGrammarElement>
  131. {
  132. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 5),
  133. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  134. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 2),
  135. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 61),
  136. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  137. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 4),
  138. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 10),
  139. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  140. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 4),
  141. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 7),
  142. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  143. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 12),
  144. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  145. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 8),
  146. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  147. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 9),
  148. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  149. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 40),
  150. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  151. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 2),
  152. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 41),
  153. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  154. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  155. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 1),
  156. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 5),
  157. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  158. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 1),
  159. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  160. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 45),
  161. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 43),
  162. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 42),
  163. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 47),
  164. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 4),
  165. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  166. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 6),
  167. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 7),
  168. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  169. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  170. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 97),
  171. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 122),
  172. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 10),
  173. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  174. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  175. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 11),
  176. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 3),
  177. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  178. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 97),
  179. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 122),
  180. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 48),
  181. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 57),
  182. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 95),
  183. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 10),
  184. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  185. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  186. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 48),
  187. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 57),
  188. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 11),
  189. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  190. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 48),
  191. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 57),
  192. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  193. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 32),
  194. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 9),
  195. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 10),
  196. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 12),
  197. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  198. new LLamaGrammarElement(LLamaGrammarElementType.END, 0)
  199. };
  200. CheckGrammar(grammarBytes, "root", expected, expectedRules);
  201. }
  202. [Fact]
  203. public void ParseGrammarNotSequence()
  204. {
  205. var grammarBytes = @"root ::= [^a]";
  206. var expected = new List<KeyValuePair<string, uint>>
  207. {
  208. new KeyValuePair<string, uint>("root", 0),
  209. };
  210. var expectedRules = new List<LLamaGrammarElement>
  211. {
  212. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_NOT, 97),
  213. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  214. };
  215. CheckGrammar(grammarBytes, "root", expected, expectedRules);
  216. }
  217. [Fact]
  218. public void ParseGrammarWithMultibyteCharacter()
  219. {
  220. var grammarBytes = @"root ::= [罗]*";
  221. var expected = new List<KeyValuePair<string, uint>>
  222. {
  223. new KeyValuePair<string, uint>("root", 0),
  224. new KeyValuePair<string, uint>("root_1", 1),
  225. };
  226. var expectedRules = new List<LLamaGrammarElement>
  227. {
  228. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 1),
  229. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  230. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 32599),
  231. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 1),
  232. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  233. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  234. };
  235. CheckGrammar(grammarBytes, "root", expected, expectedRules);
  236. }
  237. [Fact]
  238. public void InvalidGrammarMissingRuleDefinition()
  239. {
  240. var parsedGrammar = new GBNFGrammarParser();
  241. var grammarBytes = @"root := [^a]";
  242. Assert.Throws<GrammarExpectedNext>(() =>
  243. {
  244. parsedGrammar.Parse(grammarBytes, "root");
  245. });
  246. }
  247. [Fact]
  248. public void InvalidGrammarNoClosingBracket()
  249. {
  250. var parsedGrammar = new GBNFGrammarParser();
  251. var grammarBytes = @"
  252. root ::= (expr ""="" ws term ""\n""+ ## <--- Mismatched brackets on this line
  253. expr ::= term ([-+*/] term)*
  254. term ::= ident | num | ""("" ws expr "")"" ws
  255. ident ::= [a-z] [a-z0-9_]* ws
  256. num ::= [0-9]+ ws
  257. ws ::= [ \t\n]*
  258. ";
  259. Assert.Throws<GrammarExpectedNext>(() =>
  260. {
  261. parsedGrammar.Parse(grammarBytes, "root");
  262. });
  263. }
  264. [Fact]
  265. public void InvalidGrammarNoName()
  266. {
  267. var parsedGrammar = new GBNFGrammarParser();
  268. var grammarBytes = @"
  269. root ::= (expr ""="" ws term ""\n"")+
  270. ::= term ([-+*/] term)* ## <--- Missing a name for this rule!
  271. term ::= ident | num | ""("" ws expr "")"" ws
  272. ident ::= [a-z] [a-z0-9_]* ws
  273. num ::= [0-9]+ ws
  274. ws ::= [ \t\n]*
  275. ";
  276. Assert.Throws<GrammarExpectedName>(() =>
  277. {
  278. parsedGrammar.Parse(grammarBytes, "root");
  279. });
  280. }
  281. [Fact]
  282. public void InvalidGrammarBadHex()
  283. {
  284. var parsedGrammar = new GBNFGrammarParser();
  285. var grammarBytes = @"
  286. root ::= (expr ""="" ws term ""\n"")+
  287. expr ::= term ([-+*/] term)*
  288. term ::= ident | num | ""("" ws expr "")"" ws
  289. ident ::= [a-z] [a-z0-9_]* ws
  290. num ::= [0-\xQQ]+ ws ## <--- `\xQQ` is not valid hex!
  291. ws ::= [ \t\n]*
  292. ";
  293. Assert.Throws<GrammarUnexpectedHexCharsCount>(() =>
  294. {
  295. parsedGrammar.Parse(grammarBytes, "root");
  296. });
  297. }
  298. [Fact]
  299. public void InvalidGrammarBadEscapeCharacter()
  300. {
  301. var parsedGrammar = new GBNFGrammarParser();
  302. var grammarBytes = @"
  303. root ::= (expr ""="" ws term ""\z"")+ ## <--- `\z` is not a valid escape character
  304. expr ::= term ([-+*/] term)*
  305. term ::= ident | num | ""("" ws expr "")"" ws
  306. ident ::= [a-z] [a-z0-9_]* ws
  307. num ::= [0-9]+ ws
  308. ws ::= [ \t\n]*
  309. ";
  310. Assert.Throws<GrammarUnknownEscapeCharacter>(() =>
  311. {
  312. parsedGrammar.Parse(grammarBytes, "root");
  313. });
  314. }
  315. [Fact]
  316. public void InvalidGrammarUnexpectedEndOfInput()
  317. {
  318. var parsedGrammar = new GBNFGrammarParser();
  319. var grammarBytes = @"root ::= (expr ""="" ws term ""\";
  320. Assert.Throws<GrammarUnexpectedEndOfInput>(() =>
  321. {
  322. parsedGrammar.Parse(grammarBytes, "root");
  323. });
  324. }
  325. [Fact]
  326. public void InvalidRuleNoElements()
  327. {
  328. Assert.Throws<ArgumentException>(() =>
  329. {
  330. // ReSharper disable once ObjectCreationAsStatement
  331. new GrammarRule("name", Array.Empty<LLamaGrammarElement>());
  332. });
  333. }
  334. [Fact]
  335. public void InvalidRuleNoEndElement()
  336. {
  337. Assert.Throws<ArgumentException>(() =>
  338. {
  339. // ReSharper disable once ObjectCreationAsStatement
  340. new GrammarRule("name", new[]
  341. {
  342. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0)
  343. });
  344. });
  345. }
  346. [Fact]
  347. public void InvalidRuleExtraEndElement()
  348. {
  349. Assert.Throws<GrammarUnexpectedEndElement>(() =>
  350. {
  351. // ReSharper disable once ObjectCreationAsStatement
  352. new GrammarRule("name", new[]
  353. {
  354. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  355. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  356. new LLamaGrammarElement(LLamaGrammarElementType.END, 0)
  357. });
  358. });
  359. }
  360. [Fact]
  361. public void InvalidRuleMalformedRange()
  362. {
  363. Assert.Throws<GrammarUnexpectedCharRngElement>(() =>
  364. {
  365. // ReSharper disable once ObjectCreationAsStatement
  366. new GrammarRule("name", new[]
  367. {
  368. new LLamaGrammarElement(LLamaGrammarElementType.ALT, 0),
  369. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 0),
  370. new LLamaGrammarElement(LLamaGrammarElementType.END, 0)
  371. });
  372. });
  373. }
  374. [Fact]
  375. public void InvalidRuleMalformedCharAlt()
  376. {
  377. Assert.Throws<GrammarUnexpectedCharAltElement>(() =>
  378. {
  379. // ReSharper disable once ObjectCreationAsStatement
  380. new GrammarRule("name", new[]
  381. {
  382. new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, 0),
  383. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_ALT, 0),
  384. new LLamaGrammarElement(LLamaGrammarElementType.END, 0)
  385. });
  386. });
  387. }
  388. [Fact]
  389. public void InvalidRuleElement()
  390. {
  391. Assert.Throws<ArgumentException>(() =>
  392. {
  393. // ReSharper disable once ObjectCreationAsStatement
  394. new GrammarRule("name", new[]
  395. {
  396. new LLamaGrammarElement((LLamaGrammarElementType)99999, 0),
  397. new LLamaGrammarElement(LLamaGrammarElementType.END, 0)
  398. });
  399. });
  400. }
  401. }
  402. }