Merge branch 'master' into master

1 year ago · 4fc743c9ba
--- a/.github/prepare_release.sh
+++ b/.github/prepare_release.sh
@@ -23,11 +23,7 @@ fi
 mkdir ./temp;
 mkdir ./temp/runtimes;
 # For sure it could be done better but cp -R did not work on osx
 mkdir ./temp/runtimes/osx-arm64
 mkdir ./temp/runtimes/osx-x64
 cp  ./LLama/runtimes/*.* ./temp/runtimes/;
 cp  ./LLama/runtimes/osx-arm64/*.* ./temp/runtimes/osx-arm64/;
 cp  ./LLama/runtimes/osx-x64/*.* ./temp/runtimes/osx-x64;
 cp ./LLama/runtimes/build/*.* ./temp/;

 # get the current version
--- a/LLama.Examples/Assets/chat-with-kunkun-chinese.txt
+++ b/LLama.Examples/Assets/chat-with-kunkun-chinese.txt
@@ -0,0 +1,8 @@
 指令：下面是一段你和用户的对话，你叫坤坤，是一个在各方面都拥有丰富经验的助理，你非常乐于回答用户的问题和帮助用户。

 用戶：你好，坤坤。
 坤坤：你好，有什么我能帮助你的吗？
 用戶：中国的首都是哪座城市？
 坤坤：中国的首都是北京市。
 用戶：特朗普是谁？
 坤坤：
--- a/LLama.Examples/Examples/ChatChineseGB2312.cs
+++ b/LLama.Examples/Examples/ChatChineseGB2312.cs
@@ -0,0 +1,69 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
 using System.Text;
 using System.Threading.Tasks;
 using LLama.Common;

 namespace LLama.Examples.Examples
 {
    public class ChatChineseGB2312
    {
        private static string ConvertFromEncodingToAnother(string input, Encoding original, Encoding target)
        {
            byte[] bytes = original.GetBytes(input);
            var convertedBytes = Encoding.Convert(original, target, bytes);
            return target.GetString(convertedBytes);
        }

        public static async Task Run()
        {
            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); // Register gb2312 encoding
            Console.Write("Please input your model path: ");
            var modelPath = Console.ReadLine();
            var prompt = File.ReadAllText("Assets/chat-with-kunkun-chinese.txt", encoding: Encoding.GetEncoding("gb2312")).Trim();
            prompt = ConvertFromEncodingToAnother(prompt, Encoding.GetEncoding("gb2312"), Encoding.UTF8);

            var parameters = new ModelParams(modelPath)
            {
                ContextSize = 1024,
                Seed = 1337,
                GpuLayerCount = 20,
                Encoding = Encoding.UTF8
            };
            using var model = LLamaWeights.LoadFromFile(parameters);
            using var context = model.CreateContext(parameters);
            var executor = new InteractiveExecutor(context);

            var session = new ChatSession(executor).WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户"));

            Console.ForegroundColor = ConsoleColor.Yellow;
            Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" +
                " to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers.");
            Console.ForegroundColor = ConsoleColor.White;

            // show the prompt
            Console.Write(prompt);
            while (true)
            {
                await foreach (var text in session.ChatAsync(prompt, new InferenceParams()
                {
                    Temperature = 0.3f,
                    TopK = 5,
                    TopP = 0.85f,
                    AntiPrompts = new List<string> { "用户：" },
                    MaxTokens = 2048,
                    RepeatPenalty = 1.05f
                }))
                {
                    //Console.Write(text);
                    Console.Write(ConvertFromEncodingToAnother(text, Encoding.UTF8, Encoding.GetEncoding("gb2312")));
                }

                Console.ForegroundColor = ConsoleColor.Green;
                prompt = Console.ReadLine();
                Console.ForegroundColor = ConsoleColor.White;
            }
        }
    }
 }
--- a/LLama.Examples/Examples/Runner.cs
+++ b/LLama.Examples/Examples/Runner.cs
@@ -24,6 +24,7 @@ public class Runner
        { "Coding Assistant.", CodingAssistant.Run },
        { "Batch Decoding.", BatchedDecoding.Run },
        { "SK Kernel Memory.", KernelMemory.Run },
        { "Chinese gb2312 chat", ChatChineseGB2312.Run }, 
        { "Exit", async () => Environment.Exit(0) }
    };

--- a/LLama.Examples/LLama.Examples.csproj
+++ b/LLama.Examples/LLama.Examples.csproj
@@ -31,7 +31,7 @@
    <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="8.0.0" />
    <PackageReference Include="Microsoft.KernelMemory.Core" Version="0.12.231123.1-preview" />
    <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta8" />
    <PackageReference Include="Spectre.Console" Version="0.47.0" />
    <PackageReference Include="Spectre.Console" Version="0.48.0" />
  </ItemGroup>

  <ItemGroup>
@@ -71,6 +71,9 @@
    <None Update="Assets\sample-SK-Readme.pdf">
      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
    </None>
    <None Update="Assets\chat-with-kunkun-chinese.txt">
      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
    </None>
  </ItemGroup>

 </Project>
--- a/LLama.Unittest/LLama.Unittest.csproj
+++ b/LLama.Unittest/LLama.Unittest.csproj
@@ -16,7 +16,7 @@
    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
    <PackageReference Include="System.Linq.Async" Version="6.0.1" />
    <PackageReference Include="xunit" Version="2.6.2" />
    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.3">
    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.4">
      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
      <PrivateAssets>all</PrivateAssets>
    </PackageReference>
--- a/LLama.WebAPI/LLama.WebAPI.csproj
+++ b/LLama.WebAPI/LLama.WebAPI.csproj
@@ -7,7 +7,7 @@
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="Microsoft.VisualStudio.Validation" Version="17.6.11" />
    <PackageReference Include="Microsoft.VisualStudio.Validation" Version="17.8.8" />
    <PackageReference Include="Swashbuckle.AspNetCore" Version="6.5.0" />
  </ItemGroup>

--- a/LLama/Native/NativeApi.Load.cs
+++ b/LLama/Native/NativeApi.Load.cs
@@ -4,6 +4,7 @@ using System;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.IO;
 using System.Linq;
 using System.Runtime.InteropServices;
 using System.Text.Json;

@@ -258,6 +259,7 @@ namespace LLama.Native
            enableLogging = configuration.Logging;
            // We move the flag to avoid loading library when the variable is called else where.
            NativeLibraryConfig.LibraryHasLoaded = true;
            Log(configuration.ToString(), LogLevel.Information);

            if (!string.IsNullOrEmpty(configuration.Path))
            {
@@ -273,6 +275,7 @@ namespace LLama.Native

            var libraryTryLoadOrder = GetLibraryTryOrder(configuration);

            string[] preferredPaths = configuration.SearchDirectories;
            string[] possiblePathPrefix = new string[] {
                System.AppDomain.CurrentDomain.BaseDirectory,
                Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) ?? ""
@@ -280,19 +283,22 @@ namespace LLama.Native

            var tryFindPath = (string filename) =>
            {
                int i = 0;
                while (!File.Exists(filename))
                foreach(var path in preferredPaths)
                {
                    if (i < possiblePathPrefix.Length)
                    if (File.Exists(Path.Combine(path, filename)))
                    {
                        filename = Path.Combine(possiblePathPrefix[i], filename);
                        i++;
                        return Path.Combine(path, filename);
                    }
                    else
                }

                foreach(var path in possiblePathPrefix)
                {
                    if (File.Exists(Path.Combine(path, filename)))
                    {
                        break;
                        return Path.Combine(path, filename);
                    }
                }

                return filename;
            };

--- a/LLama/Native/NativeApi.cs
+++ b/LLama/Native/NativeApi.cs
@@ -9,6 +9,17 @@ namespace LLama.Native
 {
    using llama_token = Int32;

    public enum LLamaTokenType
    {
        LLAMA_TOKEN_TYPE_UNDEFINED = 0,
        LLAMA_TOKEN_TYPE_NORMAL = 1,
        LLAMA_TOKEN_TYPE_UNKNOWN = 2,
        LLAMA_TOKEN_TYPE_CONTROL = 3,
        LLAMA_TOKEN_TYPE_USER_DEFINED = 4,
        LLAMA_TOKEN_TYPE_UNUSED = 5,
        LLAMA_TOKEN_TYPE_BYTE = 6,
    }

    /// <summary>
    /// Callback from llama.cpp with log messages
    /// </summary>
@@ -243,6 +254,9 @@ namespace LLama.Native
            }
        }

        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
        public static extern LLamaTokenType llama_token_get_type(SafeLlamaModelHandle model, llama_token token);

        /// <summary>
        /// Get the size of the context window for the model for this context
        /// </summary>
--- a/LLama/Native/NativeLibraryConfig.cs
+++ b/LLama/Native/NativeLibraryConfig.cs
@@ -1,4 +1,6 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;

 namespace LLama.Native
 {
@@ -27,6 +29,10 @@ namespace LLama.Native
        private bool _allowFallback = true;
        private bool _skipCheck = false;
        private bool _logging = false;
        /// <summary>
        /// search directory -> priority level, 0 is the lowest.
        /// </summary>
        private List<string> _searchDirectories = new List<string>();

        private static void ThrowIfLoaded()
        {
@@ -120,13 +126,50 @@ namespace LLama.Native
            return this;
        }

        /// <summary>
        /// Add self-defined search directories. Note that the file stucture of the added 
        /// directories must be the same as the default directory. Besides, the directory 
        /// won't be used recursively.
        /// </summary>
        /// <param name="directories"></param>
        /// <returns></returns>
        public NativeLibraryConfig WithSearchDirectories(IEnumerable<string> directories)
        {
            ThrowIfLoaded();

            _searchDirectories.AddRange(directories);
            return this;
        }

        /// <summary>
        /// Add self-defined search directories. Note that the file stucture of the added 
        /// directories must be the same as the default directory. Besides, the directory 
        /// won't be used recursively.
        /// </summary>
        /// <param name="directory"></param>
        /// <returns></returns>
        public NativeLibraryConfig WithSearchDirectory(string directory)
        {
            ThrowIfLoaded();

            _searchDirectories.Add(directory);
            return this;
        }

        internal static Description CheckAndGatherDescription()
        {
            if (Instance._allowFallback && Instance._skipCheck)
            {
                throw new ArgumentException("Cannot skip the check when fallback is allowed.");
            }
            return new Description(Instance._libraryPath, Instance._useCuda, Instance._avxLevel, Instance._allowFallback, Instance._skipCheck, Instance._logging);
            return new Description(
                Instance._libraryPath, 
                Instance._useCuda, 
                Instance._avxLevel, 
                Instance._allowFallback, 
                Instance._skipCheck, 
                Instance._logging, 
                Instance._searchDirectories.Concat(new string[] { "./" }).ToArray());
        }

        internal static string AvxLevelToString(AvxLevel level)
@@ -183,7 +226,31 @@ namespace LLama.Native
            Avx512,
        }

        internal record Description(string Path, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, bool Logging);
        internal record Description(string Path, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, bool Logging, string[] SearchDirectories)
        {
            public override string ToString()
            {
                string avxLevelString = AvxLevel switch
                {
                    AvxLevel.None => "NoAVX",
                    AvxLevel.Avx => "AVX",
                    AvxLevel.Avx2 => "AVX2",
                    AvxLevel.Avx512 => "AVX512",
                    _ => "Unknown"
                };

                string searchDirectoriesString = "{ " +  string.Join(", ", SearchDirectories) + " }";

                return $"NativeLibraryConfig Description:\n" +
                       $"- Path: {Path}\n" +
                       $"- PreferCuda: {UseCuda}\n" +
                       $"- PreferredAvxLevel: {avxLevelString}\n" +
                       $"- AllowFallback: {AllowFallback}\n" +
                       $"- SkipCheck: {SkipCheck}\n" +
                       $"- Logging: {Logging}\n" +
                       $"- SearchDirectories and Priorities: {searchDirectoriesString}";
            }
        }
    }
 #endif
        }
 }
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@


 **The C#/.NET binding of [llama.cpp](https://github.com/ggerganov/llama.cpp). It provides higher-level APIs to inference the LLaMA Models and deploy it on local device with C#/.NET. It works on 
 both Windows, Linux and MAC without requirment for compiling llama.cpp yourself. Even without GPU or not enought GPU memory, you can still apply LLaMA models well with this repo. 🤗**
 both Windows, Linux and MAC without requirement for compiling llama.cpp yourself. Even without a GPU or not enough GPU memory, you can still apply LLaMA models well with this repo. 🤗**

 **Furthermore, it provides integrations with other projects such as [semantic-kernel](https://github.com/microsoft/semantic-kernel), [kernel-memory](https://github.com/microsoft/kernel-memory) and [BotSharp](https://github.com/SciSharp/BotSharp) to provide higher-level applications.**

@@ -129,7 +129,7 @@ Console.Write(prompt);
 // run the inference in a loop to chat with LLM
 while (prompt != "stop")
 {
    foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } }))
    await foreach (var text in session.ChatAsync(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } }))
    {
        Console.Write(text);
    }
@@ -246,6 +246,7 @@ The llama.cpp commit id will help if you want to compile a DLL yourself.
 | v0.5.1 | [Llama2 7b GGUF](https://huggingface.co/TheBloke/llama-2-7B-Guanaco-QLoRA-GGUF)| 6b73ef1 |
 | v0.6.0 | | [cb33f43](https://github.com/ggerganov/llama.cpp/commit/cb33f43a2a9f5a5a5f8d290dd97c625d9ba97a2f) |
 | v0.7.0, v0.8.0 | [Thespis-13B](https://huggingface.co/TheBloke/Thespis-13B-v0.5-GGUF/tree/main?not-for-all-audiences=true), [LLaMA2-7B](https://huggingface.co/TheBloke/llama-2-7B-Guanaco-QLoRA-GGUF) | [207b519](https://github.com/ggerganov/llama.cpp/commit/207b51900e15cc7f89763a3bb1c565fe11cbb45d) |
 | v0.8.1 | | [e937066](https://github.com/ggerganov/llama.cpp/commit/e937066420b79a757bf80e9836eb12b88420a218) |

 ## License