You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

TokenTests.cs 5.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. using System.Text;
  2. using LLama.Common;
  3. using LLama.Extensions;
  4. namespace LLama.Unittest;
  5. public sealed class TokenTests
  6. : IDisposable
  7. {
  8. private readonly ModelParams _params;
  9. private readonly LLamaWeights _model;
  10. public TokenTests()
  11. {
  12. _params = new ModelParams(Constants.ModelPath)
  13. {
  14. ContextSize = 2048
  15. };
  16. _model = LLamaWeights.LoadFromFile(_params);
  17. }
  18. public void Dispose()
  19. {
  20. _model.Dispose();
  21. }
  22. [Fact]
  23. public void TokensEndWith()
  24. {
  25. var tokens = _model.NativeHandle.Tokenize("The cat sat on the edge of the mat", false, true, Encoding.UTF8);
  26. var result = tokens.TokensEndsWithAnyString(new[]
  27. {
  28. "a fish",
  29. "the mat",
  30. "this is an improbably long query to be using for this method"
  31. }, _model.NativeHandle, Encoding.UTF8);
  32. Assert.True(result);
  33. }
  34. [Fact]
  35. public void TokensEndSubstring()
  36. {
  37. var tokens = _model.NativeHandle.Tokenize("The cat sat on the edge of the mat", false, true, Encoding.UTF8);
  38. var result = tokens.TokensEndsWithAnyString((IList<string>)new[]
  39. {
  40. "at",
  41. }, _model.NativeHandle, Encoding.UTF8);
  42. Assert.True(result);
  43. }
  44. [Fact]
  45. public void TokensNotEndWith()
  46. {
  47. var tokens = _model.NativeHandle.Tokenize("The cat sat on the edge of the mat", false, true, Encoding.UTF8);
  48. var result = tokens.TokensEndsWithAnyString((IList<string>)new[]
  49. {
  50. "a fish",
  51. "The cat sat on the edge of the ma",
  52. "this is an improbably long query to be using for this method"
  53. }, _model.NativeHandle, Encoding.UTF8);
  54. Assert.False(result);
  55. }
  56. [Fact]
  57. public void TokensNotEndWithNothing()
  58. {
  59. var tokens = _model.NativeHandle.Tokenize("The cat sat on the edge of the mat", false, true, Encoding.UTF8);
  60. var result = tokens.TokensEndsWithAnyString((IList<string>)Array.Empty<string>(), _model.NativeHandle, Encoding.UTF8);
  61. Assert.False(result);
  62. }
  63. [Fact]
  64. public void TokensEndWith2()
  65. {
  66. var tokens = _model.NativeHandle.Tokenize("The cat sat on the edge of the mat", false, true, Encoding.UTF8);
  67. var decoder = new StreamingTokenDecoder(Encoding.UTF8, _model);
  68. decoder.AddRange(tokens);
  69. var processor = new AntipromptProcessor(new[]
  70. {
  71. "a fish",
  72. "the mat",
  73. "this is an improbably long query to be using for this method"
  74. });
  75. var result = processor.Add(decoder.Read());
  76. Assert.True(result);
  77. }
  78. [Fact]
  79. public void TokensEndSubstring2()
  80. {
  81. var tokens = _model.NativeHandle.Tokenize("The cat sat on the edge of the mat", false, true, Encoding.UTF8);
  82. var decoder = new StreamingTokenDecoder(Encoding.UTF8, _model);
  83. decoder.AddRange(tokens);
  84. var processor = new AntipromptProcessor(new[] { "at" });
  85. var result = processor.Add(decoder.Read());
  86. Assert.True(result);
  87. }
  88. [Fact]
  89. public void TokensNotEndWith2()
  90. {
  91. var tokens = _model.NativeHandle.Tokenize("The cat sat on the edge of the mat", false, true, Encoding.UTF8);
  92. var decoder = new StreamingTokenDecoder(Encoding.UTF8, _model);
  93. decoder.AddRange(tokens);
  94. var processor = new AntipromptProcessor(new[]
  95. {
  96. "a fish",
  97. "The cat sat on the edge of the ma",
  98. "this is an improbably long query to be using for this method"
  99. });
  100. var result = processor.Add(decoder.Read());
  101. Assert.False(result);
  102. }
  103. [Fact]
  104. public void TokensNotEndWithNothing2()
  105. {
  106. var tokens = _model.NativeHandle.Tokenize("The cat sat on the edge of the mat", false, true, Encoding.UTF8);
  107. var decoder = new StreamingTokenDecoder(Encoding.UTF8, _model);
  108. decoder.AddRange(tokens);
  109. var processor = new AntipromptProcessor();
  110. var result = processor.Add(decoder.Read());
  111. Assert.False(result);
  112. }
  113. [Fact]
  114. public void RoundTrip()
  115. {
  116. var strings = new[]
  117. {
  118. "Hello world",
  119. "철수",
  120. "😀 😃 😄 😁 😆철수😅 😂 😊 😇 🙂 ",
  121. };
  122. var charsArr = new char[1024];
  123. foreach (var input in strings)
  124. {
  125. // Convert into llama tokens
  126. var tokens = _model.NativeHandle.Tokenize(input, false, false, Encoding.UTF8);
  127. // Convert tokens back into characters
  128. var chars = _model.NativeHandle.TokensToSpan(tokens, charsArr.AsSpan(), Encoding.UTF8);
  129. // llama.cpp adds a space to the start of strings, remove that
  130. var output = new string(chars).TrimStart(' ');
  131. // Check that the input equals the output
  132. Assert.Equal(input, output);
  133. }
  134. }
  135. [Fact]
  136. public void StreamingDecoderRoundTrip()
  137. {
  138. var decoder = new StreamingTokenDecoder(Encoding.UTF8, _model);
  139. var strings = new[]
  140. {
  141. "Hello world",
  142. "철수",
  143. "😀 😃 😄 😁 😆철수😅 😂 😊 😇 🙂 ",
  144. };
  145. foreach (var input in strings)
  146. {
  147. decoder.Reset();
  148. // Convert into llama tokens
  149. var tokens = _model.NativeHandle.Tokenize(input, false, false, Encoding.UTF8);
  150. // Add tokens to decoder
  151. foreach (var token in tokens)
  152. decoder.Add(token);
  153. // llama.cpp adds a space to the start of strings, remove that
  154. var output = decoder.Read().TrimStart(' ');
  155. // Check that the input equals the output
  156. Assert.Equal(input, output);
  157. }
  158. }
  159. }