| @@ -23,11 +23,7 @@ fi | |||||
| mkdir ./temp; | mkdir ./temp; | ||||
| mkdir ./temp/runtimes; | mkdir ./temp/runtimes; | ||||
| # For sure it could be done better but cp -R did not work on osx | # For sure it could be done better but cp -R did not work on osx | ||||
| mkdir ./temp/runtimes/osx-arm64 | |||||
| mkdir ./temp/runtimes/osx-x64 | |||||
| cp ./LLama/runtimes/*.* ./temp/runtimes/; | cp ./LLama/runtimes/*.* ./temp/runtimes/; | ||||
| cp ./LLama/runtimes/osx-arm64/*.* ./temp/runtimes/osx-arm64/; | |||||
| cp ./LLama/runtimes/osx-x64/*.* ./temp/runtimes/osx-x64; | |||||
| cp ./LLama/runtimes/build/*.* ./temp/; | cp ./LLama/runtimes/build/*.* ./temp/; | ||||
| # get the current version | # get the current version | ||||
| @@ -0,0 +1,8 @@ | |||||
| 指令:下面是一段你和用户的对话,你叫坤坤,是一个在各方面都拥有丰富经验的助理,你非常乐于回答用户的问题和帮助用户。 | |||||
| 用戶:你好,坤坤。 | |||||
| 坤坤:你好,有什么我能帮助你的吗? | |||||
| 用戶:中国的首都是哪座城市? | |||||
| 坤坤:中国的首都是北京市。 | |||||
| 用戶:特朗普是谁? | |||||
| 坤坤: | |||||
| @@ -0,0 +1,69 @@ | |||||
| using System; | |||||
| using System.Collections.Generic; | |||||
| using System.Linq; | |||||
| using System.Text; | |||||
| using System.Threading.Tasks; | |||||
| using LLama.Common; | |||||
| namespace LLama.Examples.Examples | |||||
| { | |||||
| public class ChatChineseGB2312 | |||||
| { | |||||
| private static string ConvertFromEncodingToAnother(string input, Encoding original, Encoding target) | |||||
| { | |||||
| byte[] bytes = original.GetBytes(input); | |||||
| var convertedBytes = Encoding.Convert(original, target, bytes); | |||||
| return target.GetString(convertedBytes); | |||||
| } | |||||
| public static async Task Run() | |||||
| { | |||||
| Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); // Register gb2312 encoding | |||||
| Console.Write("Please input your model path: "); | |||||
| var modelPath = Console.ReadLine(); | |||||
| var prompt = File.ReadAllText("Assets/chat-with-kunkun-chinese.txt", encoding: Encoding.GetEncoding("gb2312")).Trim(); | |||||
| prompt = ConvertFromEncodingToAnother(prompt, Encoding.GetEncoding("gb2312"), Encoding.UTF8); | |||||
| var parameters = new ModelParams(modelPath) | |||||
| { | |||||
| ContextSize = 1024, | |||||
| Seed = 1337, | |||||
| GpuLayerCount = 20, | |||||
| Encoding = Encoding.UTF8 | |||||
| }; | |||||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||||
| using var context = model.CreateContext(parameters); | |||||
| var executor = new InteractiveExecutor(context); | |||||
| var session = new ChatSession(executor).WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户")); | |||||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||||
| Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" + | |||||
| " to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers."); | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| // show the prompt | |||||
| Console.Write(prompt); | |||||
| while (true) | |||||
| { | |||||
| await foreach (var text in session.ChatAsync(prompt, new InferenceParams() | |||||
| { | |||||
| Temperature = 0.3f, | |||||
| TopK = 5, | |||||
| TopP = 0.85f, | |||||
| AntiPrompts = new List<string> { "用户:" }, | |||||
| MaxTokens = 2048, | |||||
| RepeatPenalty = 1.05f | |||||
| })) | |||||
| { | |||||
| //Console.Write(text); | |||||
| Console.Write(ConvertFromEncodingToAnother(text, Encoding.UTF8, Encoding.GetEncoding("gb2312"))); | |||||
| } | |||||
| Console.ForegroundColor = ConsoleColor.Green; | |||||
| prompt = Console.ReadLine(); | |||||
| Console.ForegroundColor = ConsoleColor.White; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -24,6 +24,7 @@ public class Runner | |||||
| { "Coding Assistant.", CodingAssistant.Run }, | { "Coding Assistant.", CodingAssistant.Run }, | ||||
| { "Batch Decoding.", BatchedDecoding.Run }, | { "Batch Decoding.", BatchedDecoding.Run }, | ||||
| { "SK Kernel Memory.", KernelMemory.Run }, | { "SK Kernel Memory.", KernelMemory.Run }, | ||||
| { "Chinese gb2312 chat", ChatChineseGB2312.Run }, | |||||
| { "Exit", async () => Environment.Exit(0) } | { "Exit", async () => Environment.Exit(0) } | ||||
| }; | }; | ||||
| @@ -31,7 +31,7 @@ | |||||
| <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="8.0.0" /> | <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="8.0.0" /> | ||||
| <PackageReference Include="Microsoft.KernelMemory.Core" Version="0.12.231123.1-preview" /> | <PackageReference Include="Microsoft.KernelMemory.Core" Version="0.12.231123.1-preview" /> | ||||
| <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta8" /> | <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta8" /> | ||||
| <PackageReference Include="Spectre.Console" Version="0.47.0" /> | |||||
| <PackageReference Include="Spectre.Console" Version="0.48.0" /> | |||||
| </ItemGroup> | </ItemGroup> | ||||
| <ItemGroup> | <ItemGroup> | ||||
| @@ -71,6 +71,9 @@ | |||||
| <None Update="Assets\sample-SK-Readme.pdf"> | <None Update="Assets\sample-SK-Readme.pdf"> | ||||
| <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> | <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> | ||||
| </None> | </None> | ||||
| <None Update="Assets\chat-with-kunkun-chinese.txt"> | |||||
| <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> | |||||
| </None> | |||||
| </ItemGroup> | </ItemGroup> | ||||
| </Project> | </Project> | ||||
| @@ -16,7 +16,7 @@ | |||||
| <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" /> | <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" /> | ||||
| <PackageReference Include="System.Linq.Async" Version="6.0.1" /> | <PackageReference Include="System.Linq.Async" Version="6.0.1" /> | ||||
| <PackageReference Include="xunit" Version="2.6.2" /> | <PackageReference Include="xunit" Version="2.6.2" /> | ||||
| <PackageReference Include="xunit.runner.visualstudio" Version="2.5.3"> | |||||
| <PackageReference Include="xunit.runner.visualstudio" Version="2.5.4"> | |||||
| <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> | <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> | ||||
| <PrivateAssets>all</PrivateAssets> | <PrivateAssets>all</PrivateAssets> | ||||
| </PackageReference> | </PackageReference> | ||||
| @@ -7,7 +7,7 @@ | |||||
| </PropertyGroup> | </PropertyGroup> | ||||
| <ItemGroup> | <ItemGroup> | ||||
| <PackageReference Include="Microsoft.VisualStudio.Validation" Version="17.6.11" /> | |||||
| <PackageReference Include="Microsoft.VisualStudio.Validation" Version="17.8.8" /> | |||||
| <PackageReference Include="Swashbuckle.AspNetCore" Version="6.5.0" /> | <PackageReference Include="Swashbuckle.AspNetCore" Version="6.5.0" /> | ||||
| </ItemGroup> | </ItemGroup> | ||||
| @@ -4,6 +4,7 @@ using System; | |||||
| using System.Collections.Generic; | using System.Collections.Generic; | ||||
| using System.Diagnostics; | using System.Diagnostics; | ||||
| using System.IO; | using System.IO; | ||||
| using System.Linq; | |||||
| using System.Runtime.InteropServices; | using System.Runtime.InteropServices; | ||||
| using System.Text.Json; | using System.Text.Json; | ||||
| @@ -258,6 +259,7 @@ namespace LLama.Native | |||||
| enableLogging = configuration.Logging; | enableLogging = configuration.Logging; | ||||
| // We move the flag to avoid loading library when the variable is called else where. | // We move the flag to avoid loading library when the variable is called else where. | ||||
| NativeLibraryConfig.LibraryHasLoaded = true; | NativeLibraryConfig.LibraryHasLoaded = true; | ||||
| Log(configuration.ToString(), LogLevel.Information); | |||||
| if (!string.IsNullOrEmpty(configuration.Path)) | if (!string.IsNullOrEmpty(configuration.Path)) | ||||
| { | { | ||||
| @@ -273,6 +275,7 @@ namespace LLama.Native | |||||
| var libraryTryLoadOrder = GetLibraryTryOrder(configuration); | var libraryTryLoadOrder = GetLibraryTryOrder(configuration); | ||||
| string[] preferredPaths = configuration.SearchDirectories; | |||||
| string[] possiblePathPrefix = new string[] { | string[] possiblePathPrefix = new string[] { | ||||
| System.AppDomain.CurrentDomain.BaseDirectory, | System.AppDomain.CurrentDomain.BaseDirectory, | ||||
| Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) ?? "" | Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) ?? "" | ||||
| @@ -280,19 +283,22 @@ namespace LLama.Native | |||||
| var tryFindPath = (string filename) => | var tryFindPath = (string filename) => | ||||
| { | { | ||||
| int i = 0; | |||||
| while (!File.Exists(filename)) | |||||
| foreach(var path in preferredPaths) | |||||
| { | { | ||||
| if (i < possiblePathPrefix.Length) | |||||
| if (File.Exists(Path.Combine(path, filename))) | |||||
| { | { | ||||
| filename = Path.Combine(possiblePathPrefix[i], filename); | |||||
| i++; | |||||
| return Path.Combine(path, filename); | |||||
| } | } | ||||
| else | |||||
| } | |||||
| foreach(var path in possiblePathPrefix) | |||||
| { | |||||
| if (File.Exists(Path.Combine(path, filename))) | |||||
| { | { | ||||
| break; | |||||
| return Path.Combine(path, filename); | |||||
| } | } | ||||
| } | } | ||||
| return filename; | return filename; | ||||
| }; | }; | ||||
| @@ -9,6 +9,17 @@ namespace LLama.Native | |||||
| { | { | ||||
| using llama_token = Int32; | using llama_token = Int32; | ||||
| public enum LLamaTokenType | |||||
| { | |||||
| LLAMA_TOKEN_TYPE_UNDEFINED = 0, | |||||
| LLAMA_TOKEN_TYPE_NORMAL = 1, | |||||
| LLAMA_TOKEN_TYPE_UNKNOWN = 2, | |||||
| LLAMA_TOKEN_TYPE_CONTROL = 3, | |||||
| LLAMA_TOKEN_TYPE_USER_DEFINED = 4, | |||||
| LLAMA_TOKEN_TYPE_UNUSED = 5, | |||||
| LLAMA_TOKEN_TYPE_BYTE = 6, | |||||
| } | |||||
| /// <summary> | /// <summary> | ||||
| /// Callback from llama.cpp with log messages | /// Callback from llama.cpp with log messages | ||||
| /// </summary> | /// </summary> | ||||
| @@ -243,6 +254,9 @@ namespace LLama.Native | |||||
| } | } | ||||
| } | } | ||||
| [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] | |||||
| public static extern LLamaTokenType llama_token_get_type(SafeLlamaModelHandle model, llama_token token); | |||||
| /// <summary> | /// <summary> | ||||
| /// Get the size of the context window for the model for this context | /// Get the size of the context window for the model for this context | ||||
| /// </summary> | /// </summary> | ||||
| @@ -1,4 +1,6 @@ | |||||
| using System; | using System; | ||||
| using System.Collections.Generic; | |||||
| using System.Linq; | |||||
| namespace LLama.Native | namespace LLama.Native | ||||
| { | { | ||||
| @@ -27,6 +29,10 @@ namespace LLama.Native | |||||
| private bool _allowFallback = true; | private bool _allowFallback = true; | ||||
| private bool _skipCheck = false; | private bool _skipCheck = false; | ||||
| private bool _logging = false; | private bool _logging = false; | ||||
| /// <summary> | |||||
| /// search directory -> priority level, 0 is the lowest. | |||||
| /// </summary> | |||||
| private List<string> _searchDirectories = new List<string>(); | |||||
| private static void ThrowIfLoaded() | private static void ThrowIfLoaded() | ||||
| { | { | ||||
| @@ -120,13 +126,50 @@ namespace LLama.Native | |||||
| return this; | return this; | ||||
| } | } | ||||
| /// <summary> | |||||
| /// Add self-defined search directories. Note that the file stucture of the added | |||||
| /// directories must be the same as the default directory. Besides, the directory | |||||
| /// won't be used recursively. | |||||
| /// </summary> | |||||
| /// <param name="directories"></param> | |||||
| /// <returns></returns> | |||||
| public NativeLibraryConfig WithSearchDirectories(IEnumerable<string> directories) | |||||
| { | |||||
| ThrowIfLoaded(); | |||||
| _searchDirectories.AddRange(directories); | |||||
| return this; | |||||
| } | |||||
| /// <summary> | |||||
| /// Add self-defined search directories. Note that the file stucture of the added | |||||
| /// directories must be the same as the default directory. Besides, the directory | |||||
| /// won't be used recursively. | |||||
| /// </summary> | |||||
| /// <param name="directory"></param> | |||||
| /// <returns></returns> | |||||
| public NativeLibraryConfig WithSearchDirectory(string directory) | |||||
| { | |||||
| ThrowIfLoaded(); | |||||
| _searchDirectories.Add(directory); | |||||
| return this; | |||||
| } | |||||
| internal static Description CheckAndGatherDescription() | internal static Description CheckAndGatherDescription() | ||||
| { | { | ||||
| if (Instance._allowFallback && Instance._skipCheck) | if (Instance._allowFallback && Instance._skipCheck) | ||||
| { | { | ||||
| throw new ArgumentException("Cannot skip the check when fallback is allowed."); | throw new ArgumentException("Cannot skip the check when fallback is allowed."); | ||||
| } | } | ||||
| return new Description(Instance._libraryPath, Instance._useCuda, Instance._avxLevel, Instance._allowFallback, Instance._skipCheck, Instance._logging); | |||||
| return new Description( | |||||
| Instance._libraryPath, | |||||
| Instance._useCuda, | |||||
| Instance._avxLevel, | |||||
| Instance._allowFallback, | |||||
| Instance._skipCheck, | |||||
| Instance._logging, | |||||
| Instance._searchDirectories.Concat(new string[] { "./" }).ToArray()); | |||||
| } | } | ||||
| internal static string AvxLevelToString(AvxLevel level) | internal static string AvxLevelToString(AvxLevel level) | ||||
| @@ -183,7 +226,31 @@ namespace LLama.Native | |||||
| Avx512, | Avx512, | ||||
| } | } | ||||
| internal record Description(string Path, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, bool Logging); | |||||
| internal record Description(string Path, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, bool Logging, string[] SearchDirectories) | |||||
| { | |||||
| public override string ToString() | |||||
| { | |||||
| string avxLevelString = AvxLevel switch | |||||
| { | |||||
| AvxLevel.None => "NoAVX", | |||||
| AvxLevel.Avx => "AVX", | |||||
| AvxLevel.Avx2 => "AVX2", | |||||
| AvxLevel.Avx512 => "AVX512", | |||||
| _ => "Unknown" | |||||
| }; | |||||
| string searchDirectoriesString = "{ " + string.Join(", ", SearchDirectories) + " }"; | |||||
| return $"NativeLibraryConfig Description:\n" + | |||||
| $"- Path: {Path}\n" + | |||||
| $"- PreferCuda: {UseCuda}\n" + | |||||
| $"- PreferredAvxLevel: {avxLevelString}\n" + | |||||
| $"- AllowFallback: {AllowFallback}\n" + | |||||
| $"- SkipCheck: {SkipCheck}\n" + | |||||
| $"- Logging: {Logging}\n" + | |||||
| $"- SearchDirectories and Priorities: {searchDirectoriesString}"; | |||||
| } | |||||
| } | |||||
| } | } | ||||
| #endif | #endif | ||||
| } | |||||
| } | |||||
| @@ -11,7 +11,7 @@ | |||||
| **The C#/.NET binding of [llama.cpp](https://github.com/ggerganov/llama.cpp). It provides higher-level APIs to inference the LLaMA Models and deploy it on local device with C#/.NET. It works on | **The C#/.NET binding of [llama.cpp](https://github.com/ggerganov/llama.cpp). It provides higher-level APIs to inference the LLaMA Models and deploy it on local device with C#/.NET. It works on | ||||
| both Windows, Linux and MAC without requirment for compiling llama.cpp yourself. Even without GPU or not enought GPU memory, you can still apply LLaMA models well with this repo. 🤗** | |||||
| both Windows, Linux and MAC without requirement for compiling llama.cpp yourself. Even without a GPU or not enough GPU memory, you can still apply LLaMA models well with this repo. 🤗** | |||||
| **Furthermore, it provides integrations with other projects such as [semantic-kernel](https://github.com/microsoft/semantic-kernel), [kernel-memory](https://github.com/microsoft/kernel-memory) and [BotSharp](https://github.com/SciSharp/BotSharp) to provide higher-level applications.** | **Furthermore, it provides integrations with other projects such as [semantic-kernel](https://github.com/microsoft/semantic-kernel), [kernel-memory](https://github.com/microsoft/kernel-memory) and [BotSharp](https://github.com/SciSharp/BotSharp) to provide higher-level applications.** | ||||
| @@ -129,7 +129,7 @@ Console.Write(prompt); | |||||
| // run the inference in a loop to chat with LLM | // run the inference in a loop to chat with LLM | ||||
| while (prompt != "stop") | while (prompt != "stop") | ||||
| { | { | ||||
| foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } })) | |||||
| await foreach (var text in session.ChatAsync(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } })) | |||||
| { | { | ||||
| Console.Write(text); | Console.Write(text); | ||||
| } | } | ||||
| @@ -246,6 +246,7 @@ The llama.cpp commit id will help if you want to compile a DLL yourself. | |||||
| | v0.5.1 | [Llama2 7b GGUF](https://huggingface.co/TheBloke/llama-2-7B-Guanaco-QLoRA-GGUF)| 6b73ef1 | | | v0.5.1 | [Llama2 7b GGUF](https://huggingface.co/TheBloke/llama-2-7B-Guanaco-QLoRA-GGUF)| 6b73ef1 | | ||||
| | v0.6.0 | | [cb33f43](https://github.com/ggerganov/llama.cpp/commit/cb33f43a2a9f5a5a5f8d290dd97c625d9ba97a2f) | | | v0.6.0 | | [cb33f43](https://github.com/ggerganov/llama.cpp/commit/cb33f43a2a9f5a5a5f8d290dd97c625d9ba97a2f) | | ||||
| | v0.7.0, v0.8.0 | [Thespis-13B](https://huggingface.co/TheBloke/Thespis-13B-v0.5-GGUF/tree/main?not-for-all-audiences=true), [LLaMA2-7B](https://huggingface.co/TheBloke/llama-2-7B-Guanaco-QLoRA-GGUF) | [207b519](https://github.com/ggerganov/llama.cpp/commit/207b51900e15cc7f89763a3bb1c565fe11cbb45d) | | | v0.7.0, v0.8.0 | [Thespis-13B](https://huggingface.co/TheBloke/Thespis-13B-v0.5-GGUF/tree/main?not-for-all-audiences=true), [LLaMA2-7B](https://huggingface.co/TheBloke/llama-2-7B-Guanaco-QLoRA-GGUF) | [207b519](https://github.com/ggerganov/llama.cpp/commit/207b51900e15cc7f89763a3bb1c565fe11cbb45d) | | ||||
| | v0.8.1 | | [e937066](https://github.com/ggerganov/llama.cpp/commit/e937066420b79a757bf80e9836eb12b88420a218) | | |||||
| ## License | ## License | ||||