You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

GrammarTest.cs 5.2 kB

April 2024 Binary Update (#662) * Updated binaries, using [this build](https://github.com/SciSharp/LLamaSharp/actions/runs/8654672719/job/23733195669) for llama.cpp commit `f7001ccc5aa359fcf41bba19d1c99c3d25c9bcc7`. - Added all new functions. - Moved some functions (e.g. `SafeLlamaModelHandle` specific functions) into `SafeLlamaModelHandle.cs` - Exposed tokens on `SafeLlamaModelHandle` and `LLamaWeights` through a `Tokens` property. As new special tokens are added in the future they can be added here. - Changed all token properties to return nullable tokens, to handle some models not having some tokens. - Fixed `DefaultSamplingPipeline` to handle no newline token in some models. * Moved native methods to more specific locations. - Context specific things have been moved into `SafeLLamaContextHandle.cs` and made private - they're exposed through C# properties and methods already. - Checking that GPU layer count is zero if GPU offload is not supported. - Moved methods for creating default structs (`llama_model_quantize_default_params` and `llama_context_default_params`) into relevant structs. * Removed exception if `GpuLayerCount > 0` when GPU is not supported. * - Added low level wrapper methods for new per-sequence state load/save in `SafeLLamaContextHandle` - Added high level wrapper methods (save/load with `State` object or memory mapped file) in `LLamaContext` - Moved native methods for per-sequence state load/save into `SafeLLamaContextHandle` * Added update and defrag methods for KV cache in `SafeLLamaContextHandle` * Updated submodule to `f7001ccc5aa359fcf41bba19d1c99c3d25c9bcc7` * Passing the sequence ID when saving a single sequence state
1 year ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. using LLama.Common;
  2. using LLama.Grammars;
  3. using LLama.Native;
  4. namespace LLama.Unittest
  5. {
  6. public sealed class GrammarTest
  7. : IDisposable
  8. {
  9. private readonly ModelParams _params;
  10. private readonly LLamaWeights _model;
  11. public GrammarTest()
  12. {
  13. _params = new ModelParams(Constants.GenerativeModelPath)
  14. {
  15. ContextSize = 2048,
  16. Seed = 92,
  17. GpuLayerCount = Constants.CIGpuLayerCount,
  18. };
  19. _model = LLamaWeights.LoadFromFile(_params);
  20. }
  21. public void Dispose()
  22. {
  23. _model.Dispose();
  24. }
  25. [Fact]
  26. public void CreateBasicGrammar()
  27. {
  28. var rules = new List<GrammarRule>
  29. {
  30. new GrammarRule("alpha", new[]
  31. {
  32. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 'a'),
  33. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 'z'),
  34. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  35. }),
  36. };
  37. using var handle = SafeLLamaGrammarHandle.Create(rules, 0);
  38. }
  39. [Fact]
  40. public void CreateGrammar_StartIndexOutOfRange()
  41. {
  42. var rules = new List<GrammarRule>
  43. {
  44. new GrammarRule("alpha", new[]
  45. {
  46. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 'a'),
  47. new LLamaGrammarElement(LLamaGrammarElementType.CHAR_RNG_UPPER, 'z'),
  48. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  49. }),
  50. };
  51. Assert.Throws<ArgumentOutOfRangeException>(() => new Grammar(rules, 3));
  52. }
  53. [Fact]
  54. public async Task SampleWithTrivialGrammar()
  55. {
  56. // Create a grammar that constrains the output to be "cat" and nothing else. This is a nonsense answer, so
  57. // we can be confident it's not what the LLM would say if not constrained by the grammar!
  58. var rules = new List<GrammarRule>
  59. {
  60. new GrammarRule("feline", new []
  61. {
  62. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 'c'),
  63. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 'a'),
  64. new LLamaGrammarElement(LLamaGrammarElementType.CHAR, 't'),
  65. new LLamaGrammarElement(LLamaGrammarElementType.END, 0),
  66. }),
  67. };
  68. var grammar = new Grammar(rules, 0);
  69. using var grammarInstance = grammar.CreateInstance();
  70. using var grammarInstance2 = grammarInstance.Clone();
  71. var executor = new StatelessExecutor(_model, _params);
  72. var inferenceParams = new InferenceParams
  73. {
  74. MaxTokens = 3,
  75. AntiPrompts = new [] { ".", "Input:", "\n" },
  76. Grammar = grammarInstance2,
  77. };
  78. var result = await executor.InferAsync("Q. 7 + 12\nA. ", inferenceParams).ToListAsync();
  79. Assert.Equal("cat", result[0]);
  80. }
  81. //this test is flakey - it reproduces an error which appears to be a bug in llama.cpp
  82. //[Fact]
  83. //public async Task SampleTwiceWithGrammar()
  84. // {
  85. // var executor = new StatelessExecutor(_model, _params);
  86. // var grammar = Grammar.Parse("""
  87. //root ::= (object | array) endline?
  88. //endline ::= "<|im_end|>" ws
  89. //value ::= object | array | string | number | ("true" | "false" | "null") ws
  90. //object ::=
  91. //"{" ws (
  92. // string ":" ws value
  93. // ("," ws string ":" ws value)*
  94. //)? "}" ws
  95. //array ::=
  96. //"[" ws (
  97. // value
  98. // ("," ws value)*
  99. //)? "]" ws
  100. //string ::=
  101. //"\"" (
  102. // [^"\\] |
  103. // "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
  104. //)* "\"" ws
  105. //number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
  106. //# Optional space: by convention, applied in this grammar after literal chars when allowed
  107. //ws ::= ([ \t\n] ws)?
  108. //""",
  109. // "root");
  110. // using (var grammarInstance = grammar.CreateInstance())
  111. // {
  112. // var inferenceParams = new InferenceParams
  113. // {
  114. // MaxTokens = 20,
  115. // AntiPrompts = new[] { ".", "Input:", "\n", "<|im_end|>" },
  116. // Grammar = grammarInstance,
  117. // };
  118. // var result = await executor.InferAsync("Write a JSON array with the first 6 positive numbers", inferenceParams).ToListAsync();
  119. // }
  120. // using (var grammarInstance2 = grammar.CreateInstance())
  121. // {
  122. // var inferenceParams2 = new InferenceParams
  123. // {
  124. // MaxTokens = 20,
  125. // AntiPrompts = new[] { ".", "Input:", "\n" },
  126. // Grammar = grammarInstance2,
  127. // };
  128. // var result2 = await executor.InferAsync("Write a JSON array with the first 6 positive numbers", inferenceParams2).ToListAsync();
  129. // }
  130. // }
  131. }
  132. }