| @@ -23,11 +23,7 @@ fi | |||
| mkdir ./temp; | |||
| mkdir ./temp/runtimes; | |||
| # For sure it could be done better but cp -R did not work on osx | |||
| mkdir ./temp/runtimes/osx-arm64 | |||
| mkdir ./temp/runtimes/osx-x64 | |||
| cp ./LLama/runtimes/*.* ./temp/runtimes/; | |||
| cp ./LLama/runtimes/osx-arm64/*.* ./temp/runtimes/osx-arm64/; | |||
| cp ./LLama/runtimes/osx-x64/*.* ./temp/runtimes/osx-x64; | |||
| cp ./LLama/runtimes/build/*.* ./temp/; | |||
| # get the current version | |||
| @@ -0,0 +1,8 @@ | |||
| 指令:下面是一段你和用户的对话,你叫坤坤,是一个在各方面都拥有丰富经验的助理,你非常乐于回答用户的问题和帮助用户。 | |||
| 用戶:你好,坤坤。 | |||
| 坤坤:你好,有什么我能帮助你的吗? | |||
| 用戶:中国的首都是哪座城市? | |||
| 坤坤:中国的首都是北京市。 | |||
| 用戶:特朗普是谁? | |||
| 坤坤: | |||
| @@ -0,0 +1,69 @@ | |||
| using System; | |||
| using System.Collections.Generic; | |||
| using System.Linq; | |||
| using System.Text; | |||
| using System.Threading.Tasks; | |||
| using LLama.Common; | |||
| namespace LLama.Examples.Examples | |||
| { | |||
| public class ChatChineseGB2312 | |||
| { | |||
| private static string ConvertFromEncodingToAnother(string input, Encoding original, Encoding target) | |||
| { | |||
| byte[] bytes = original.GetBytes(input); | |||
| var convertedBytes = Encoding.Convert(original, target, bytes); | |||
| return target.GetString(convertedBytes); | |||
| } | |||
| public static async Task Run() | |||
| { | |||
| Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); // Register gb2312 encoding | |||
| Console.Write("Please input your model path: "); | |||
| var modelPath = Console.ReadLine(); | |||
| var prompt = File.ReadAllText("Assets/chat-with-kunkun-chinese.txt", encoding: Encoding.GetEncoding("gb2312")).Trim(); | |||
| prompt = ConvertFromEncodingToAnother(prompt, Encoding.GetEncoding("gb2312"), Encoding.UTF8); | |||
| var parameters = new ModelParams(modelPath) | |||
| { | |||
| ContextSize = 1024, | |||
| Seed = 1337, | |||
| GpuLayerCount = 20, | |||
| Encoding = Encoding.UTF8 | |||
| }; | |||
| using var model = LLamaWeights.LoadFromFile(parameters); | |||
| using var context = model.CreateContext(parameters); | |||
| var executor = new InteractiveExecutor(context); | |||
| var session = new ChatSession(executor).WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户")); | |||
| Console.ForegroundColor = ConsoleColor.Yellow; | |||
| Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" + | |||
| " to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers."); | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| // show the prompt | |||
| Console.Write(prompt); | |||
| while (true) | |||
| { | |||
| await foreach (var text in session.ChatAsync(prompt, new InferenceParams() | |||
| { | |||
| Temperature = 0.3f, | |||
| TopK = 5, | |||
| TopP = 0.85f, | |||
| AntiPrompts = new List<string> { "用户:" }, | |||
| MaxTokens = 2048, | |||
| RepeatPenalty = 1.05f | |||
| })) | |||
| { | |||
| //Console.Write(text); | |||
| Console.Write(ConvertFromEncodingToAnother(text, Encoding.UTF8, Encoding.GetEncoding("gb2312"))); | |||
| } | |||
| Console.ForegroundColor = ConsoleColor.Green; | |||
| prompt = Console.ReadLine(); | |||
| Console.ForegroundColor = ConsoleColor.White; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -24,6 +24,7 @@ public class Runner | |||
| { "Coding Assistant.", CodingAssistant.Run }, | |||
| { "Batch Decoding.", BatchedDecoding.Run }, | |||
| { "SK Kernel Memory.", KernelMemory.Run }, | |||
| { "Chinese gb2312 chat", ChatChineseGB2312.Run }, | |||
| { "Exit", async () => Environment.Exit(0) } | |||
| }; | |||
| @@ -31,7 +31,7 @@ | |||
| <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="8.0.0" /> | |||
| <PackageReference Include="Microsoft.KernelMemory.Core" Version="0.12.231123.1-preview" /> | |||
| <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta8" /> | |||
| <PackageReference Include="Spectre.Console" Version="0.47.0" /> | |||
| <PackageReference Include="Spectre.Console" Version="0.48.0" /> | |||
| </ItemGroup> | |||
| <ItemGroup> | |||
| @@ -71,6 +71,9 @@ | |||
| <None Update="Assets\sample-SK-Readme.pdf"> | |||
| <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> | |||
| </None> | |||
| <None Update="Assets\chat-with-kunkun-chinese.txt"> | |||
| <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> | |||
| </None> | |||
| </ItemGroup> | |||
| </Project> | |||
| @@ -16,7 +16,7 @@ | |||
| <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" /> | |||
| <PackageReference Include="System.Linq.Async" Version="6.0.1" /> | |||
| <PackageReference Include="xunit" Version="2.6.2" /> | |||
| <PackageReference Include="xunit.runner.visualstudio" Version="2.5.3"> | |||
| <PackageReference Include="xunit.runner.visualstudio" Version="2.5.4"> | |||
| <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> | |||
| <PrivateAssets>all</PrivateAssets> | |||
| </PackageReference> | |||
| @@ -7,7 +7,7 @@ | |||
| </PropertyGroup> | |||
| <ItemGroup> | |||
| <PackageReference Include="Microsoft.VisualStudio.Validation" Version="17.6.11" /> | |||
| <PackageReference Include="Microsoft.VisualStudio.Validation" Version="17.8.8" /> | |||
| <PackageReference Include="Swashbuckle.AspNetCore" Version="6.5.0" /> | |||
| </ItemGroup> | |||
| @@ -4,6 +4,7 @@ using System; | |||
| using System.Collections.Generic; | |||
| using System.Diagnostics; | |||
| using System.IO; | |||
| using System.Linq; | |||
| using System.Runtime.InteropServices; | |||
| using System.Text.Json; | |||
| @@ -258,6 +259,7 @@ namespace LLama.Native | |||
| enableLogging = configuration.Logging; | |||
| // We move the flag to avoid loading library when the variable is called else where. | |||
| NativeLibraryConfig.LibraryHasLoaded = true; | |||
| Log(configuration.ToString(), LogLevel.Information); | |||
| if (!string.IsNullOrEmpty(configuration.Path)) | |||
| { | |||
| @@ -273,6 +275,7 @@ namespace LLama.Native | |||
| var libraryTryLoadOrder = GetLibraryTryOrder(configuration); | |||
| string[] preferredPaths = configuration.SearchDirectories; | |||
| string[] possiblePathPrefix = new string[] { | |||
| System.AppDomain.CurrentDomain.BaseDirectory, | |||
| Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) ?? "" | |||
| @@ -280,19 +283,22 @@ namespace LLama.Native | |||
| var tryFindPath = (string filename) => | |||
| { | |||
| int i = 0; | |||
| while (!File.Exists(filename)) | |||
| foreach(var path in preferredPaths) | |||
| { | |||
| if (i < possiblePathPrefix.Length) | |||
| if (File.Exists(Path.Combine(path, filename))) | |||
| { | |||
| filename = Path.Combine(possiblePathPrefix[i], filename); | |||
| i++; | |||
| return Path.Combine(path, filename); | |||
| } | |||
| else | |||
| } | |||
| foreach(var path in possiblePathPrefix) | |||
| { | |||
| if (File.Exists(Path.Combine(path, filename))) | |||
| { | |||
| break; | |||
| return Path.Combine(path, filename); | |||
| } | |||
| } | |||
| return filename; | |||
| }; | |||
| @@ -9,6 +9,17 @@ namespace LLama.Native | |||
| { | |||
| using llama_token = Int32; | |||
| public enum LLamaTokenType | |||
| { | |||
| LLAMA_TOKEN_TYPE_UNDEFINED = 0, | |||
| LLAMA_TOKEN_TYPE_NORMAL = 1, | |||
| LLAMA_TOKEN_TYPE_UNKNOWN = 2, | |||
| LLAMA_TOKEN_TYPE_CONTROL = 3, | |||
| LLAMA_TOKEN_TYPE_USER_DEFINED = 4, | |||
| LLAMA_TOKEN_TYPE_UNUSED = 5, | |||
| LLAMA_TOKEN_TYPE_BYTE = 6, | |||
| } | |||
| /// <summary> | |||
| /// Callback from llama.cpp with log messages | |||
| /// </summary> | |||
| @@ -243,6 +254,9 @@ namespace LLama.Native | |||
| } | |||
| } | |||
| [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)] | |||
| public static extern LLamaTokenType llama_token_get_type(SafeLlamaModelHandle model, llama_token token); | |||
| /// <summary> | |||
| /// Get the size of the context window for the model for this context | |||
| /// </summary> | |||
| @@ -1,4 +1,6 @@ | |||
| using System; | |||
| using System.Collections.Generic; | |||
| using System.Linq; | |||
| namespace LLama.Native | |||
| { | |||
| @@ -27,6 +29,10 @@ namespace LLama.Native | |||
| private bool _allowFallback = true; | |||
| private bool _skipCheck = false; | |||
| private bool _logging = false; | |||
| /// <summary> | |||
| /// search directory -> priority level, 0 is the lowest. | |||
| /// </summary> | |||
| private List<string> _searchDirectories = new List<string>(); | |||
| private static void ThrowIfLoaded() | |||
| { | |||
| @@ -120,13 +126,50 @@ namespace LLama.Native | |||
| return this; | |||
| } | |||
| /// <summary> | |||
| /// Add self-defined search directories. Note that the file stucture of the added | |||
| /// directories must be the same as the default directory. Besides, the directory | |||
| /// won't be used recursively. | |||
| /// </summary> | |||
| /// <param name="directories"></param> | |||
| /// <returns></returns> | |||
| public NativeLibraryConfig WithSearchDirectories(IEnumerable<string> directories) | |||
| { | |||
| ThrowIfLoaded(); | |||
| _searchDirectories.AddRange(directories); | |||
| return this; | |||
| } | |||
| /// <summary> | |||
| /// Add self-defined search directories. Note that the file stucture of the added | |||
| /// directories must be the same as the default directory. Besides, the directory | |||
| /// won't be used recursively. | |||
| /// </summary> | |||
| /// <param name="directory"></param> | |||
| /// <returns></returns> | |||
| public NativeLibraryConfig WithSearchDirectory(string directory) | |||
| { | |||
| ThrowIfLoaded(); | |||
| _searchDirectories.Add(directory); | |||
| return this; | |||
| } | |||
| internal static Description CheckAndGatherDescription() | |||
| { | |||
| if (Instance._allowFallback && Instance._skipCheck) | |||
| { | |||
| throw new ArgumentException("Cannot skip the check when fallback is allowed."); | |||
| } | |||
| return new Description(Instance._libraryPath, Instance._useCuda, Instance._avxLevel, Instance._allowFallback, Instance._skipCheck, Instance._logging); | |||
| return new Description( | |||
| Instance._libraryPath, | |||
| Instance._useCuda, | |||
| Instance._avxLevel, | |||
| Instance._allowFallback, | |||
| Instance._skipCheck, | |||
| Instance._logging, | |||
| Instance._searchDirectories.Concat(new string[] { "./" }).ToArray()); | |||
| } | |||
| internal static string AvxLevelToString(AvxLevel level) | |||
| @@ -183,7 +226,31 @@ namespace LLama.Native | |||
| Avx512, | |||
| } | |||
| internal record Description(string Path, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, bool Logging); | |||
| internal record Description(string Path, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, bool Logging, string[] SearchDirectories) | |||
| { | |||
| public override string ToString() | |||
| { | |||
| string avxLevelString = AvxLevel switch | |||
| { | |||
| AvxLevel.None => "NoAVX", | |||
| AvxLevel.Avx => "AVX", | |||
| AvxLevel.Avx2 => "AVX2", | |||
| AvxLevel.Avx512 => "AVX512", | |||
| _ => "Unknown" | |||
| }; | |||
| string searchDirectoriesString = "{ " + string.Join(", ", SearchDirectories) + " }"; | |||
| return $"NativeLibraryConfig Description:\n" + | |||
| $"- Path: {Path}\n" + | |||
| $"- PreferCuda: {UseCuda}\n" + | |||
| $"- PreferredAvxLevel: {avxLevelString}\n" + | |||
| $"- AllowFallback: {AllowFallback}\n" + | |||
| $"- SkipCheck: {SkipCheck}\n" + | |||
| $"- Logging: {Logging}\n" + | |||
| $"- SearchDirectories and Priorities: {searchDirectoriesString}"; | |||
| } | |||
| } | |||
| } | |||
| #endif | |||
| } | |||
| } | |||
| @@ -11,7 +11,7 @@ | |||
| **The C#/.NET binding of [llama.cpp](https://github.com/ggerganov/llama.cpp). It provides higher-level APIs to inference the LLaMA Models and deploy it on local device with C#/.NET. It works on | |||
| both Windows, Linux and MAC without requirment for compiling llama.cpp yourself. Even without GPU or not enought GPU memory, you can still apply LLaMA models well with this repo. 🤗** | |||
| both Windows, Linux and MAC without requirement for compiling llama.cpp yourself. Even without a GPU or not enough GPU memory, you can still apply LLaMA models well with this repo. 🤗** | |||
| **Furthermore, it provides integrations with other projects such as [semantic-kernel](https://github.com/microsoft/semantic-kernel), [kernel-memory](https://github.com/microsoft/kernel-memory) and [BotSharp](https://github.com/SciSharp/BotSharp) to provide higher-level applications.** | |||
| @@ -129,7 +129,7 @@ Console.Write(prompt); | |||
| // run the inference in a loop to chat with LLM | |||
| while (prompt != "stop") | |||
| { | |||
| foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } })) | |||
| await foreach (var text in session.ChatAsync(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } })) | |||
| { | |||
| Console.Write(text); | |||
| } | |||
| @@ -246,6 +246,7 @@ The llama.cpp commit id will help if you want to compile a DLL yourself. | |||
| | v0.5.1 | [Llama2 7b GGUF](https://huggingface.co/TheBloke/llama-2-7B-Guanaco-QLoRA-GGUF)| 6b73ef1 | | |||
| | v0.6.0 | | [cb33f43](https://github.com/ggerganov/llama.cpp/commit/cb33f43a2a9f5a5a5f8d290dd97c625d9ba97a2f) | | |||
| | v0.7.0, v0.8.0 | [Thespis-13B](https://huggingface.co/TheBloke/Thespis-13B-v0.5-GGUF/tree/main?not-for-all-audiences=true), [LLaMA2-7B](https://huggingface.co/TheBloke/llama-2-7B-Guanaco-QLoRA-GGUF) | [207b519](https://github.com/ggerganov/llama.cpp/commit/207b51900e15cc7f89763a3bb1c565fe11cbb45d) | | |||
| | v0.8.1 | | [e937066](https://github.com/ggerganov/llama.cpp/commit/e937066420b79a757bf80e9836eb12b88420a218) | | |||
| ## License | |||