diff --git a/.github/prepare_release.sh b/.github/prepare_release.sh
index bfc42ed7..a4c96bbf 100755
--- a/.github/prepare_release.sh
+++ b/.github/prepare_release.sh
@@ -23,11 +23,7 @@ fi
 mkdir ./temp;
 mkdir ./temp/runtimes;
 # For sure it could be done better but cp -R did not work on osx
-mkdir ./temp/runtimes/osx-arm64
-mkdir ./temp/runtimes/osx-x64
 cp  ./LLama/runtimes/*.* ./temp/runtimes/;
-cp  ./LLama/runtimes/osx-arm64/*.* ./temp/runtimes/osx-arm64/;
-cp  ./LLama/runtimes/osx-x64/*.* ./temp/runtimes/osx-x64;
 cp ./LLama/runtimes/build/*.* ./temp/;
 
 # get the current version
diff --git a/LLama.Examples/Assets/chat-with-kunkun-chinese.txt b/LLama.Examples/Assets/chat-with-kunkun-chinese.txt
new file mode 100644
index 00000000..295e24d5
--- /dev/null
+++ b/LLama.Examples/Assets/chat-with-kunkun-chinese.txt
@@ -0,0 +1,8 @@
+Ö¸Áî£ºÏÂÃæÊÇÒ»¶ÎÄãºÍÓÃ»§µÄ¶Ô»°£¬Äã½ÐÀ¤À¤£¬ÊÇÒ»¸öÔÚ¸÷·½Ãæ¶¼ÓµÓÐ·á¸»¾­ÑéµÄÖúÀí£¬Äã·Ç³£ÀÖÓÚ»Ø´ðÓÃ»§µÄÎÊÌâºÍ°ïÖúÓÃ»§¡£
+
+ÓÃ‘ô£ºÄãºÃ£¬À¤À¤¡£
+À¤À¤£ºÄãºÃ£¬ÓÐÊ²Ã´ÎÒÄÜ°ïÖúÄãµÄÂð£¿
+ÓÃ‘ô£ºÖÐ¹úµÄÊ×¶¼ÊÇÄÄ×ù³ÇÊÐ£¿
+À¤À¤£ºÖÐ¹úµÄÊ×¶¼ÊÇ±±¾©ÊÐ¡£
+ÓÃ‘ô£ºÌØÀÊÆÕÊÇË­£¿
+À¤À¤£º
\ No newline at end of file
diff --git a/LLama.Examples/Examples/ChatChineseGB2312.cs b/LLama.Examples/Examples/ChatChineseGB2312.cs
new file mode 100644
index 00000000..ff27b962
--- /dev/null
+++ b/LLama.Examples/Examples/ChatChineseGB2312.cs
@@ -0,0 +1,69 @@
+ï»¿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using LLama.Common;
+
+namespace LLama.Examples.Examples
+{
+    public class ChatChineseGB2312
+    {
+        private static string ConvertFromEncodingToAnother(string input, Encoding original, Encoding target)
+        {
+            byte[] bytes = original.GetBytes(input);
+            var convertedBytes = Encoding.Convert(original, target, bytes);
+            return target.GetString(convertedBytes);
+        }
+
+        public static async Task Run()
+        {
+            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); // Register gb2312 encoding
+            Console.Write("Please input your model path: ");
+            var modelPath = Console.ReadLine();
+            var prompt = File.ReadAllText("Assets/chat-with-kunkun-chinese.txt", encoding: Encoding.GetEncoding("gb2312")).Trim();
+            prompt = ConvertFromEncodingToAnother(prompt, Encoding.GetEncoding("gb2312"), Encoding.UTF8);
+
+            var parameters = new ModelParams(modelPath)
+            {
+                ContextSize = 1024,
+                Seed = 1337,
+                GpuLayerCount = 20,
+                Encoding = Encoding.UTF8
+            };
+            using var model = LLamaWeights.LoadFromFile(parameters);
+            using var context = model.CreateContext(parameters);
+            var executor = new InteractiveExecutor(context);
+
+            var session = new ChatSession(executor).WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("ç”¨æˆ·"));
+
+            Console.ForegroundColor = ConsoleColor.Yellow;
+            Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" +
+                " to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers.");
+            Console.ForegroundColor = ConsoleColor.White;
+
+            // show the prompt
+            Console.Write(prompt);
+            while (true)
+            {
+                await foreach (var text in session.ChatAsync(prompt, new InferenceParams()
+                {
+                    Temperature = 0.3f,
+                    TopK = 5,
+                    TopP = 0.85f,
+                    AntiPrompts = new List<string> { "ç”¨æˆ·ï¼š" },
+                    MaxTokens = 2048,
+                    RepeatPenalty = 1.05f
+                }))
+                {
+                    //Console.Write(text);
+                    Console.Write(ConvertFromEncodingToAnother(text, Encoding.UTF8, Encoding.GetEncoding("gb2312")));
+                }
+
+                Console.ForegroundColor = ConsoleColor.Green;
+                prompt = Console.ReadLine();
+                Console.ForegroundColor = ConsoleColor.White;
+            }
+        }
+    }
+}
diff --git a/LLama.Examples/Examples/Runner.cs b/LLama.Examples/Examples/Runner.cs
index d7653657..0a37dcba 100644
--- a/LLama.Examples/Examples/Runner.cs
+++ b/LLama.Examples/Examples/Runner.cs
@@ -24,6 +24,7 @@ public class Runner
         { "Coding Assistant.", CodingAssistant.Run },
         { "Batch Decoding.", BatchedDecoding.Run },
         { "SK Kernel Memory.", KernelMemory.Run },
+        { "Chinese gb2312 chat", ChatChineseGB2312.Run }, 
         { "Exit", async () => Environment.Exit(0) }
     };
 
diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj
index c2491218..958bb6c4 100644
--- a/LLama.Examples/LLama.Examples.csproj
+++ b/LLama.Examples/LLama.Examples.csproj
@@ -31,7 +31,7 @@
     <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="8.0.0" />
     <PackageReference Include="Microsoft.KernelMemory.Core" Version="0.12.231123.1-preview" />
     <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta8" />
-    <PackageReference Include="Spectre.Console" Version="0.47.0" />
+    <PackageReference Include="Spectre.Console" Version="0.48.0" />
   </ItemGroup>
 
   <ItemGroup>
@@ -71,6 +71,9 @@
     <None Update="Assets\sample-SK-Readme.pdf">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </None>
+    <None Update="Assets\chat-with-kunkun-chinese.txt">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
   </ItemGroup>
 
 </Project>
diff --git a/LLama.Unittest/LLama.Unittest.csproj b/LLama.Unittest/LLama.Unittest.csproj
index 148f7660..fbaee5ed 100644
--- a/LLama.Unittest/LLama.Unittest.csproj
+++ b/LLama.Unittest/LLama.Unittest.csproj
@@ -16,7 +16,7 @@
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
     <PackageReference Include="System.Linq.Async" Version="6.0.1" />
     <PackageReference Include="xunit" Version="2.6.2" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.3">
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.4">
       <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
       <PrivateAssets>all</PrivateAssets>
     </PackageReference>
diff --git a/LLama.WebAPI/LLama.WebAPI.csproj b/LLama.WebAPI/LLama.WebAPI.csproj
index bdb2ad8a..a8c1179c 100644
--- a/LLama.WebAPI/LLama.WebAPI.csproj
+++ b/LLama.WebAPI/LLama.WebAPI.csproj
@@ -7,7 +7,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.VisualStudio.Validation" Version="17.6.11" />
+    <PackageReference Include="Microsoft.VisualStudio.Validation" Version="17.8.8" />
     <PackageReference Include="Swashbuckle.AspNetCore" Version="6.5.0" />
   </ItemGroup>
 
diff --git a/LLama/Native/NativeApi.Load.cs b/LLama/Native/NativeApi.Load.cs
index 148f1735..d8a88725 100644
--- a/LLama/Native/NativeApi.Load.cs
+++ b/LLama/Native/NativeApi.Load.cs
@@ -4,6 +4,7 @@ using System;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.IO;
+using System.Linq;
 using System.Runtime.InteropServices;
 using System.Text.Json;
 
@@ -258,6 +259,7 @@ namespace LLama.Native
             enableLogging = configuration.Logging;
             // We move the flag to avoid loading library when the variable is called else where.
             NativeLibraryConfig.LibraryHasLoaded = true;
+            Log(configuration.ToString(), LogLevel.Information);
 
             if (!string.IsNullOrEmpty(configuration.Path))
             {
@@ -273,6 +275,7 @@ namespace LLama.Native
 
             var libraryTryLoadOrder = GetLibraryTryOrder(configuration);
 
+            string[] preferredPaths = configuration.SearchDirectories;
             string[] possiblePathPrefix = new string[] {
                 System.AppDomain.CurrentDomain.BaseDirectory,
                 Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) ?? ""
@@ -280,19 +283,22 @@ namespace LLama.Native
 
             var tryFindPath = (string filename) =>
             {
-                int i = 0;
-                while (!File.Exists(filename))
+                foreach(var path in preferredPaths)
                 {
-                    if (i < possiblePathPrefix.Length)
+                    if (File.Exists(Path.Combine(path, filename)))
                     {
-                        filename = Path.Combine(possiblePathPrefix[i], filename);
-                        i++;
+                        return Path.Combine(path, filename);
                     }
-                    else
+                }
+
+                foreach(var path in possiblePathPrefix)
+                {
+                    if (File.Exists(Path.Combine(path, filename)))
                     {
-                        break;
+                        return Path.Combine(path, filename);
                     }
                 }
+
                 return filename;
             };
 
diff --git a/LLama/Native/NativeApi.cs b/LLama/Native/NativeApi.cs
index a4f97a00..ca6027fc 100644
--- a/LLama/Native/NativeApi.cs
+++ b/LLama/Native/NativeApi.cs
@@ -9,6 +9,17 @@ namespace LLama.Native
 {
     using llama_token = Int32;
 
+    public enum LLamaTokenType
+    {
+        LLAMA_TOKEN_TYPE_UNDEFINED = 0,
+        LLAMA_TOKEN_TYPE_NORMAL = 1,
+        LLAMA_TOKEN_TYPE_UNKNOWN = 2,
+        LLAMA_TOKEN_TYPE_CONTROL = 3,
+        LLAMA_TOKEN_TYPE_USER_DEFINED = 4,
+        LLAMA_TOKEN_TYPE_UNUSED = 5,
+        LLAMA_TOKEN_TYPE_BYTE = 6,
+    }
+
     /// <summary>
     /// Callback from llama.cpp with log messages
     /// </summary>
@@ -243,6 +254,9 @@ namespace LLama.Native
             }
         }
 
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        public static extern LLamaTokenType llama_token_get_type(SafeLlamaModelHandle model, llama_token token);
+
         /// <summary>
         /// Get the size of the context window for the model for this context
         /// </summary>
diff --git a/LLama/Native/NativeLibraryConfig.cs b/LLama/Native/NativeLibraryConfig.cs
index 76f89357..e5135970 100644
--- a/LLama/Native/NativeLibraryConfig.cs
+++ b/LLama/Native/NativeLibraryConfig.cs
@@ -1,4 +1,6 @@
 ï»¿using System;
+using System.Collections.Generic;
+using System.Linq;
 
 namespace LLama.Native
 {
@@ -27,6 +29,10 @@ namespace LLama.Native
         private bool _allowFallback = true;
         private bool _skipCheck = false;
         private bool _logging = false;
+        /// <summary>
+        /// search directory -> priority level, 0 is the lowest.
+        /// </summary>
+        private List<string> _searchDirectories = new List<string>();
 
         private static void ThrowIfLoaded()
         {
@@ -120,13 +126,50 @@ namespace LLama.Native
             return this;
         }
 
+        /// <summary>
+        /// Add self-defined search directories. Note that the file stucture of the added 
+        /// directories must be the same as the default directory. Besides, the directory 
+        /// won't be used recursively.
+        /// </summary>
+        /// <param name="directories"></param>
+        /// <returns></returns>
+        public NativeLibraryConfig WithSearchDirectories(IEnumerable<string> directories)
+        {
+            ThrowIfLoaded();
+
+            _searchDirectories.AddRange(directories);
+            return this;
+        }
+
+        /// <summary>
+        /// Add self-defined search directories. Note that the file stucture of the added 
+        /// directories must be the same as the default directory. Besides, the directory 
+        /// won't be used recursively.
+        /// </summary>
+        /// <param name="directory"></param>
+        /// <returns></returns>
+        public NativeLibraryConfig WithSearchDirectory(string directory)
+        {
+            ThrowIfLoaded();
+
+            _searchDirectories.Add(directory);
+            return this;
+        }
+
         internal static Description CheckAndGatherDescription()
         {
             if (Instance._allowFallback && Instance._skipCheck)
             {
                 throw new ArgumentException("Cannot skip the check when fallback is allowed.");
             }
-            return new Description(Instance._libraryPath, Instance._useCuda, Instance._avxLevel, Instance._allowFallback, Instance._skipCheck, Instance._logging);
+            return new Description(
+                Instance._libraryPath, 
+                Instance._useCuda, 
+                Instance._avxLevel, 
+                Instance._allowFallback, 
+                Instance._skipCheck, 
+                Instance._logging, 
+                Instance._searchDirectories.Concat(new string[] { "./" }).ToArray());
         }
 
         internal static string AvxLevelToString(AvxLevel level)
@@ -183,7 +226,31 @@ namespace LLama.Native
             Avx512,
         }
 
-        internal record Description(string Path, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, bool Logging);
+        internal record Description(string Path, bool UseCuda, AvxLevel AvxLevel, bool AllowFallback, bool SkipCheck, bool Logging, string[] SearchDirectories)
+        {
+            public override string ToString()
+            {
+                string avxLevelString = AvxLevel switch
+                {
+                    AvxLevel.None => "NoAVX",
+                    AvxLevel.Avx => "AVX",
+                    AvxLevel.Avx2 => "AVX2",
+                    AvxLevel.Avx512 => "AVX512",
+                    _ => "Unknown"
+                };
+
+                string searchDirectoriesString = "{ " +  string.Join(", ", SearchDirectories) + " }";
+
+                return $"NativeLibraryConfig Description:\n" +
+                       $"- Path: {Path}\n" +
+                       $"- PreferCuda: {UseCuda}\n" +
+                       $"- PreferredAvxLevel: {avxLevelString}\n" +
+                       $"- AllowFallback: {AllowFallback}\n" +
+                       $"- SkipCheck: {SkipCheck}\n" +
+                       $"- Logging: {Logging}\n" +
+                       $"- SearchDirectories and Priorities: {searchDirectoriesString}";
+            }
+        }
     }
 #endif
-        }
+}
diff --git a/README.md b/README.md
index a3d2643e..af517804 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@
 
 
 **The C#/.NET binding of [llama.cpp](https://github.com/ggerganov/llama.cpp). It provides higher-level APIs to inference the LLaMA Models and deploy it on local device with C#/.NET. It works on 
-both Windows, Linux and MAC without requirment for compiling llama.cpp yourself. Even without GPU or not enought GPU memory, you can still apply LLaMA models well with this repo. ðŸ¤—**
+both Windows, Linux and MAC without requirement for compiling llama.cpp yourself. Even without a GPU or not enough GPU memory, you can still apply LLaMA models well with this repo. ðŸ¤—**
 
 **Furthermore, it provides integrations with other projects such as [semantic-kernel](https://github.com/microsoft/semantic-kernel), [kernel-memory](https://github.com/microsoft/kernel-memory) and [BotSharp](https://github.com/SciSharp/BotSharp) to provide higher-level applications.**
 
@@ -129,7 +129,7 @@ Console.Write(prompt);
 // run the inference in a loop to chat with LLM
 while (prompt != "stop")
 {
-    foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } }))
+    await foreach (var text in session.ChatAsync(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } }))
     {
         Console.Write(text);
     }
@@ -246,6 +246,7 @@ The llama.cpp commit id will help if you want to compile a DLL yourself.
 | v0.5.1 | [Llama2 7b GGUF](https://huggingface.co/TheBloke/llama-2-7B-Guanaco-QLoRA-GGUF)| 6b73ef1 |
 | v0.6.0 | | [cb33f43](https://github.com/ggerganov/llama.cpp/commit/cb33f43a2a9f5a5a5f8d290dd97c625d9ba97a2f) |
 | v0.7.0, v0.8.0 | [Thespis-13B](https://huggingface.co/TheBloke/Thespis-13B-v0.5-GGUF/tree/main?not-for-all-audiences=true), [LLaMA2-7B](https://huggingface.co/TheBloke/llama-2-7B-Guanaco-QLoRA-GGUF) | [207b519](https://github.com/ggerganov/llama.cpp/commit/207b51900e15cc7f89763a3bb1c565fe11cbb45d) |
+| v0.8.1 | | [e937066](https://github.com/ggerganov/llama.cpp/commit/e937066420b79a757bf80e9836eb12b88420a218) |
 
 ## License