diff --git a/.github/prepare_release.sh b/.github/prepare_release.sh index bfc42ed7..a4c96bbf 100755 --- a/.github/prepare_release.sh +++ b/.github/prepare_release.sh @@ -23,11 +23,7 @@ fi mkdir ./temp; mkdir ./temp/runtimes; # For sure it could be done better but cp -R did not work on osx -mkdir ./temp/runtimes/osx-arm64 -mkdir ./temp/runtimes/osx-x64 cp ./LLama/runtimes/*.* ./temp/runtimes/; -cp ./LLama/runtimes/osx-arm64/*.* ./temp/runtimes/osx-arm64/; -cp ./LLama/runtimes/osx-x64/*.* ./temp/runtimes/osx-x64; cp ./LLama/runtimes/build/*.* ./temp/; # get the current version diff --git a/LLama.Examples/Assets/chat-with-kunkun-chinese.txt b/LLama.Examples/Assets/chat-with-kunkun-chinese.txt new file mode 100644 index 00000000..295e24d5 --- /dev/null +++ b/LLama.Examples/Assets/chat-with-kunkun-chinese.txt @@ -0,0 +1,8 @@ +Ö¸ÁÏÂÃæÊÇÒ»¶ÎÄãºÍÓû§µÄ¶Ô»°£¬Äã½ÐÀ¤À¤£¬ÊÇÒ»¸öÔÚ¸÷·½Ãæ¶¼ÓµÓзḻ¾­ÑéµÄÖúÀí£¬Äã·Ç³£ÀÖÓڻشðÓû§µÄÎÊÌâºÍ°ïÖúÓû§¡£ + +ÓÑô£ºÄãºÃ£¬À¤À¤¡£ +À¤À¤£ºÄãºÃ£¬ÓÐʲôÎÒÄܰïÖúÄãµÄÂ𣿠+ÓÑô£ºÖйúµÄÊ×¶¼ÊÇÄÄ×ù³ÇÊУ¿ +À¤À¤£ºÖйúµÄÊ×¶¼ÊDZ±¾©ÊС£ +ÓÑô£ºÌØÀÊÆÕÊÇË­£¿ +À¤À¤£º \ No newline at end of file diff --git a/LLama.Examples/Examples/ChatChineseGB2312.cs b/LLama.Examples/Examples/ChatChineseGB2312.cs new file mode 100644 index 00000000..ff27b962 --- /dev/null +++ b/LLama.Examples/Examples/ChatChineseGB2312.cs @@ -0,0 +1,69 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using LLama.Common; + +namespace LLama.Examples.Examples +{ + public class ChatChineseGB2312 + { + private static string ConvertFromEncodingToAnother(string input, Encoding original, Encoding target) + { + byte[] bytes = original.GetBytes(input); + var convertedBytes = Encoding.Convert(original, target, bytes); + return target.GetString(convertedBytes); + } + + public static async Task Run() + { + Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); // Register gb2312 encoding + Console.Write("Please input your model path: "); + var modelPath = Console.ReadLine(); + var prompt = File.ReadAllText("Assets/chat-with-kunkun-chinese.txt", encoding: Encoding.GetEncoding("gb2312")).Trim(); + prompt = ConvertFromEncodingToAnother(prompt, Encoding.GetEncoding("gb2312"), Encoding.UTF8); + + var parameters = new ModelParams(modelPath) + { + ContextSize = 1024, + Seed = 1337, + GpuLayerCount = 20, + Encoding = Encoding.UTF8 + }; + using var model = LLamaWeights.LoadFromFile(parameters); + using var context = model.CreateContext(parameters); + var executor = new InteractiveExecutor(context); + + var session = new ChatSession(executor).WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户")); + + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" + + " to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers."); + Console.ForegroundColor = ConsoleColor.White; + + // show the prompt + Console.Write(prompt); + while (true) + { + await foreach (var text in session.ChatAsync(prompt, new InferenceParams() + { + Temperature = 0.3f, + TopK = 5, + TopP = 0.85f, + AntiPrompts = new List { "用户:" }, + MaxTokens = 2048, + RepeatPenalty = 1.05f + })) + { + //Console.Write(text); + Console.Write(ConvertFromEncodingToAnother(text, Encoding.UTF8, Encoding.GetEncoding("gb2312"))); + } + + Console.ForegroundColor = ConsoleColor.Green; + prompt = Console.ReadLine(); + Console.ForegroundColor = ConsoleColor.White; + } + } + } +} diff --git a/LLama.Examples/Examples/Runner.cs b/LLama.Examples/Examples/Runner.cs index d7653657..0a37dcba 100644 --- a/LLama.Examples/Examples/Runner.cs +++ b/LLama.Examples/Examples/Runner.cs @@ -24,6 +24,7 @@ public class Runner { "Coding Assistant.", CodingAssistant.Run }, { "Batch Decoding.", BatchedDecoding.Run }, { "SK Kernel Memory.", KernelMemory.Run }, + { "Chinese gb2312 chat", ChatChineseGB2312.Run }, { "Exit", async () => Environment.Exit(0) } }; diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj index c2491218..958bb6c4 100644 --- a/LLama.Examples/LLama.Examples.csproj +++ b/LLama.Examples/LLama.Examples.csproj @@ -31,7 +31,7 @@ - + @@ -71,6 +71,9 @@ PreserveNewest + + PreserveNewest + diff --git a/LLama.Unittest/LLama.Unittest.csproj b/LLama.Unittest/LLama.Unittest.csproj index 148f7660..fbaee5ed 100644 --- a/LLama.Unittest/LLama.Unittest.csproj +++ b/LLama.Unittest/LLama.Unittest.csproj @@ -16,7 +16,7 @@ - + runtime; build; native; contentfiles; analyzers; buildtransitive all diff --git a/LLama.WebAPI/LLama.WebAPI.csproj b/LLama.WebAPI/LLama.WebAPI.csproj index bdb2ad8a..a8c1179c 100644 --- a/LLama.WebAPI/LLama.WebAPI.csproj +++ b/LLama.WebAPI/LLama.WebAPI.csproj @@ -7,7 +7,7 @@ - + diff --git a/LLama/Native/NativeApi.Load.cs b/LLama/Native/NativeApi.Load.cs index 148f1735..d8a88725 100644 --- a/LLama/Native/NativeApi.Load.cs +++ b/LLama/Native/NativeApi.Load.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; +using System.Linq; using System.Runtime.InteropServices; using System.Text.Json; @@ -258,6 +259,7 @@ namespace LLama.Native enableLogging = configuration.Logging; // We move the flag to avoid loading library when the variable is called else where. NativeLibraryConfig.LibraryHasLoaded = true; + Log(configuration.ToString(), LogLevel.Information); if (!string.IsNullOrEmpty(configuration.Path)) { @@ -273,6 +275,7 @@ namespace LLama.Native var libraryTryLoadOrder = GetLibraryTryOrder(configuration); + string[] preferredPaths = configuration.SearchDirectories; string[] possiblePathPrefix = new string[] { System.AppDomain.CurrentDomain.BaseDirectory, Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) ?? "" @@ -280,19 +283,22 @@ namespace LLama.Native var tryFindPath = (string filename) => { - int i = 0; - while (!File.Exists(filename)) + foreach(var path in preferredPaths) { - if (i < possiblePathPrefix.Length) + if (File.Exists(Path.Combine(path, filename))) { - filename = Path.Combine(possiblePathPrefix[i], filename); - i++; + return Path.Combine(path, filename); } - else + } + + foreach(var path in possiblePathPrefix) + { + if (File.Exists(Path.Combine(path, filename))) { - break; + return Path.Combine(path, filename); } } + return filename; }; diff --git a/LLama/Native/NativeApi.cs b/LLama/Native/NativeApi.cs index a4f97a00..ca6027fc 100644 --- a/LLama/Native/NativeApi.cs +++ b/LLama/Native/NativeApi.cs @@ -9,6 +9,17 @@ namespace LLama.Native { using llama_token = Int32; + public enum LLamaTokenType + { + LLAMA_TOKEN_TYPE_UNDEFINED = 0, + LLAMA_TOKEN_TYPE_NORMAL = 1, + LLAMA_TOKEN_TYPE_UNKNOWN = 2, + LLAMA_TOKEN_TYPE_CONTROL = 3, + LLAMA_TOKEN_TYPE_USER_DEFINED = 4, + LLAMA_TOKEN_TYPE_UNUSED = 5, + LLAMA_TOKEN_TYPE_BYTE = 6, + } + /// /// Callback from llama.cpp with log messages /// @@ -243,6 +254,9 @@ namespace LLama.Native } } + [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] + public static extern LLamaTokenType llama_token_get_type(SafeLlamaModelHandle model, llama_token token); + /// /// Get the size of the context window for the model for this context /// diff --git a/LLama/Native/NativeLibraryConfig.cs b/LLama/Native/NativeLibraryConfig.cs index 76f89357..e5135970 100644 --- a/LLama/Native/NativeLibraryConfig.cs +++ b/LLama/Native/NativeLibraryConfig.cs @@ -1,4 +1,6 @@ using System; +using System.Collections.Generic; +using System.Linq; namespace LLama.Native { @@ -27,6 +29,10 @@ namespace LLama.Native private bool _allowFallback = true; private bool _skipCheck = false; private bool _logging = false; + /// + /// search directory -> priority level, 0 is the lowest. + /// + private List _searchDirectories = new List(); private static void ThrowIfLoaded() { @@ -120,13 +126,50 @@ namespace LLama.Native return this; } + /// + /// Add self-defined search directories. Note that the file stucture of the added + /// directories must be the same as the default directory. Besides, the directory + /// won't be used recursively. + /// + /// + /// + public NativeLibraryConfig WithSearchDirectories(IEnumerable directories) + { + ThrowIfLoaded(); + + _searchDirectories.AddRange(directories); + return this; + } + + /// + /// Add self-defined search directories. Note that the file stucture of the added + /// directories must be the same as the default directory. Besides, the directory + /// won't be used recursively. + /// + /// + /// + public NativeLibraryConfig WithSearchDirectory(string directory) + { + ThrowIfLoaded(); + + _searchDirectories.Add(directory); + return this; + } + internal static Description CheckAndGatherDescription() { if (Instance._allowFallback && Instance._skipCheck) { throw new ArgumentException("Cannot skip the check when fallback is allowed."); } - return new Description(Instance._libraryPath, Instance._useCuda, Instance._avxLevel, Instance._allowFallback, Instance._skipCheck, Instance._logging); + return new Description( + Instance._libraryPath, + Instance._useCuda, + Instance._avxLevel, + Instance._allowFallback, + Instance._skipCheck, + Instance._logging, + Instance._searchDirectories.Concat(new string[] { "./" }).ToArray()); } internal static string AvxLevelToString(AvxLevel level) @@ -183,7 +226,31 @@ namespace LLama.Native Avx512, } - internal record Description(string Path, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, bool Logging); + internal record Description(string Path, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, bool Logging, string[] SearchDirectories) + { + public override string ToString() + { + string avxLevelString = AvxLevel switch + { + AvxLevel.None => "NoAVX", + AvxLevel.Avx => "AVX", + AvxLevel.Avx2 => "AVX2", + AvxLevel.Avx512 => "AVX512", + _ => "Unknown" + }; + + string searchDirectoriesString = "{ " + string.Join(", ", SearchDirectories) + " }"; + + return $"NativeLibraryConfig Description:\n" + + $"- Path: {Path}\n" + + $"- PreferCuda: {UseCuda}\n" + + $"- PreferredAvxLevel: {avxLevelString}\n" + + $"- AllowFallback: {AllowFallback}\n" + + $"- SkipCheck: {SkipCheck}\n" + + $"- Logging: {Logging}\n" + + $"- SearchDirectories and Priorities: {searchDirectoriesString}"; + } + } } #endif - } +} diff --git a/README.md b/README.md index a3d2643e..af517804 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ **The C#/.NET binding of [llama.cpp](https://github.com/ggerganov/llama.cpp). It provides higher-level APIs to inference the LLaMA Models and deploy it on local device with C#/.NET. It works on -both Windows, Linux and MAC without requirment for compiling llama.cpp yourself. Even without GPU or not enought GPU memory, you can still apply LLaMA models well with this repo. 🤗** +both Windows, Linux and MAC without requirement for compiling llama.cpp yourself. Even without a GPU or not enough GPU memory, you can still apply LLaMA models well with this repo. 🤗** **Furthermore, it provides integrations with other projects such as [semantic-kernel](https://github.com/microsoft/semantic-kernel), [kernel-memory](https://github.com/microsoft/kernel-memory) and [BotSharp](https://github.com/SciSharp/BotSharp) to provide higher-level applications.** @@ -129,7 +129,7 @@ Console.Write(prompt); // run the inference in a loop to chat with LLM while (prompt != "stop") { - foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List { "User:" } })) + await foreach (var text in session.ChatAsync(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List { "User:" } })) { Console.Write(text); } @@ -246,6 +246,7 @@ The llama.cpp commit id will help if you want to compile a DLL yourself. | v0.5.1 | [Llama2 7b GGUF](https://huggingface.co/TheBloke/llama-2-7B-Guanaco-QLoRA-GGUF)| 6b73ef1 | | v0.6.0 | | [cb33f43](https://github.com/ggerganov/llama.cpp/commit/cb33f43a2a9f5a5a5f8d290dd97c625d9ba97a2f) | | v0.7.0, v0.8.0 | [Thespis-13B](https://huggingface.co/TheBloke/Thespis-13B-v0.5-GGUF/tree/main?not-for-all-audiences=true), [LLaMA2-7B](https://huggingface.co/TheBloke/llama-2-7B-Guanaco-QLoRA-GGUF) | [207b519](https://github.com/ggerganov/llama.cpp/commit/207b51900e15cc7f89763a3bb1c565fe11cbb45d) | +| v0.8.1 | | [e937066](https://github.com/ggerganov/llama.cpp/commit/e937066420b79a757bf80e9836eb12b88420a218) | ## License