diff --git a/LLama.Unittest/LLamaContextTests.cs b/LLama.Unittest/LLamaContextTests.cs index 2edf3a62..7fdc8280 100644 --- a/LLama.Unittest/LLamaContextTests.cs +++ b/LLama.Unittest/LLamaContextTests.cs @@ -37,7 +37,7 @@ namespace LLama.Unittest { var tokens = _context.Tokenize("The quick brown fox", true); - Assert.Equal(new[] { 1, 450, 4996, 17354, 1701, 29916 }, tokens); + Assert.Equal(new[] { 1, 1576, 4996, 17354, 1701, 29916 }, tokens); } [Fact] @@ -45,7 +45,7 @@ namespace LLama.Unittest { var tokens = _context.Tokenize("The quick brown fox", false); - Assert.Equal(new[] { 450, 4996, 17354, 1701, 29916 }, tokens); + Assert.Equal(new[] { 1576, 4996, 17354, 1701, 29916 }, tokens); } [Fact] diff --git a/LLama.Unittest/StatelessExecutorTest.cs b/LLama.Unittest/StatelessExecutorTest.cs index 54bafedb..a28d9eb6 100644 --- a/LLama.Unittest/StatelessExecutorTest.cs +++ b/LLama.Unittest/StatelessExecutorTest.cs @@ -48,13 +48,13 @@ namespace LLama.Unittest { var executor = new StatelessExecutor(_weights, _params); - const string question = " Question. why is a cat the best pet?\nAnswer: "; + const string question = " Question. cats or dogs?\nAnswer: "; // The context size is set to 60. Generate more than that, forcing it to generate a coherent response // with a modified context var @params = new InferenceParams() { - MaxTokens = 80, + MaxTokens = 70, TokensKeep = question.Length, }; diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs index ad47541e..c9714dcd 100644 --- a/LLama/LLamaStatelessExecutor.cs +++ b/LLama/LLamaStatelessExecutor.cs @@ -112,9 +112,6 @@ namespace LLama NativeApi.llama_kv_cache_seq_shift(Context.NativeHandle, (LLamaSeqId)0, inferenceParams.TokensKeep + 1 + n_discard, n_past, -n_discard); n_past -= n_discard; - - tokens.Clear(); - tokens.AddRange(lastTokens.Skip(lastTokens.Count - n_left / 2).Take(n_left / 2)); } n_past = Context.Eval(tokens, n_past); diff --git a/LLama/runtimes/libllama-cuda11.dll b/LLama/runtimes/libllama-cuda11.dll index 412a23cf..e5fc7dad 100644 Binary files a/LLama/runtimes/libllama-cuda11.dll and b/LLama/runtimes/libllama-cuda11.dll differ diff --git a/LLama/runtimes/libllama-cuda11.so b/LLama/runtimes/libllama-cuda11.so index bb525d90..3532fe99 100644 Binary files a/LLama/runtimes/libllama-cuda11.so and b/LLama/runtimes/libllama-cuda11.so differ diff --git a/LLama/runtimes/libllama-cuda12.dll b/LLama/runtimes/libllama-cuda12.dll index 423c51f2..89f27e24 100644 Binary files a/LLama/runtimes/libllama-cuda12.dll and b/LLama/runtimes/libllama-cuda12.dll differ diff --git a/LLama/runtimes/libllama-cuda12.so b/LLama/runtimes/libllama-cuda12.so index 65c8b953..81b4aa99 100644 Binary files a/LLama/runtimes/libllama-cuda12.so and b/LLama/runtimes/libllama-cuda12.so differ diff --git a/LLama/runtimes/libllama.dll b/LLama/runtimes/libllama.dll index 2973126c..62d071ec 100644 Binary files a/LLama/runtimes/libllama.dll and b/LLama/runtimes/libllama.dll differ diff --git a/LLama/runtimes/libllama.dylib b/LLama/runtimes/libllama.dylib index ff4bc0ba..c2ca7ec8 100755 Binary files a/LLama/runtimes/libllama.dylib and b/LLama/runtimes/libllama.dylib differ diff --git a/LLama/runtimes/libllama.so b/LLama/runtimes/libllama.so index d826301c..b9ef4c1d 100644 Binary files a/LLama/runtimes/libllama.so and b/LLama/runtimes/libllama.so differ