diff --git a/LLama/LLamaSharp.Runtime.targets b/LLama/LLamaSharp.Runtime.targets
index bc9a6911..b98fa2f6 100644
--- a/LLama/LLamaSharp.Runtime.targets
+++ b/LLama/LLamaSharp.Runtime.targets
@@ -5,39 +5,39 @@
PreserveNewest
- libllama.dll
+ runtimes/win-x64/native/libllama.dll
PreserveNewest
- libllama-cuda11.dll
+ runtimes/win-x64/native/cuda11/libllama.dll
PreserveNewest
- libllama-cuda12.dll
+ runtimes/win-x64/native/cuda12/libllama.dll
PreserveNewest
- libllama.so
+ runtimes/linux-x64/native/libllama.so
PreserveNewest
- libllama-cuda11.so
+ runtimes/linux-x64/native/cuda11/libllama.so
PreserveNewest
- libllama-cuda12.so
+ runtimes/linux-x64/native/cuda12/libllama.so
PreserveNewest
- runtimes/macos-arm64/libllama.dylib
+ runtimes/osx-arm64/native/libllama.dylib
PreserveNewest
- runtimes/macos-arm64/ggml-metal.metal
+ runtimes/osx-arm64/native/ggml-metal.metal
PreserveNewest
- runtimes/macos-x86_64/libllama.dylib
+ runtimes/osx-x64/native/libllama.dylib
\ No newline at end of file
diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj
index d525202f..f79fcb43 100644
--- a/LLama/LLamaSharp.csproj
+++ b/LLama/LLamaSharp.csproj
@@ -7,7 +7,7 @@
AnyCPU;x64;Arm64
True
- 0.5.0
+ 0.7.1
Yaohui Liu, Martin Evans, Haiping Chen
SciSharp STACK
true
diff --git a/LLama/Native/NativeApi.cs b/LLama/Native/NativeApi.cs
index 119a36fb..3819980f 100644
--- a/LLama/Native/NativeApi.cs
+++ b/LLama/Native/NativeApi.cs
@@ -1,7 +1,10 @@
using System;
using System.Buffers;
+using System.Collections.Generic;
+using System.IO;
using System.Runtime.InteropServices;
using System.Text;
+using System.Text.Json;
using LLama.Exceptions;
#pragma warning disable IDE1006 // Naming Styles
@@ -43,6 +46,200 @@ namespace LLama.Native
llama_backend_init(false);
}
+ private static int GetCudaMajorVersion()
+ {
+ string? cudaPath;
+ string version = "";
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ cudaPath = Environment.GetEnvironmentVariable("CUDA_PATH");
+ if(cudaPath is null)
+ {
+ return -1;
+ }
+ version = GetCudaVersionFromPath(cudaPath);
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
+ {
+ // Try the default first
+ cudaPath = "/usr/local/bin/cuda";
+ version = GetCudaVersionFromPath(cudaPath);
+ if (string.IsNullOrEmpty(version))
+ {
+ cudaPath = Environment.GetEnvironmentVariable("LD_LIBRARY_PATH");
+ if(cudaPath is null)
+ {
+ return -1;
+ }
+ foreach(var path in cudaPath.Split(':'))
+ {
+ version = GetCudaVersionFromPath(Path.Combine(path, ".."));
+ if (string.IsNullOrEmpty(version))
+ {
+ break;
+ }
+ }
+ }
+ }
+
+ if (string.IsNullOrEmpty(version))
+ {
+ return -1;
+ }
+ else
+ {
+ version = version.Split('.')[0];
+ bool success = int.TryParse(version, out var majorVersion);
+ if (success)
+ {
+ return majorVersion;
+ }
+ else
+ {
+ return -1;
+ }
+ }
+ }
+
+ private static string GetCudaVersionFromPath(string cudaPath)
+ {
+ try
+ {
+ string json = File.ReadAllText(Path.Combine(cudaPath, cudaVersionFile));
+ using (JsonDocument document = JsonDocument.Parse(json))
+ {
+ JsonElement root = document.RootElement;
+ JsonElement cublasNode = root.GetProperty("libcublas");
+ JsonElement versionNode = cublasNode.GetProperty("version");
+ if (versionNode.ValueKind == JsonValueKind.Undefined)
+ {
+ return string.Empty;
+ }
+ return versionNode.GetString();
+ }
+ }
+ catch (Exception)
+ {
+ return string.Empty;
+ }
+ }
+
+#if NET6_0_OR_GREATER
+ private static string GetAvxLibraryPath(NativeLibraryConfig.AvxLevel avxLevel, string prefix, string suffix)
+ {
+ var avxStr = NativeLibraryConfig.AvxLevelToString(avxLevel);
+ if (!string.IsNullOrEmpty(avxStr))
+ {
+ avxStr += "/";
+ }
+ return $"{prefix}{avxStr}{libraryName}{suffix}";
+ }
+
+ private static List GetLibraryTryOrder(NativeLibraryConfig.Description configuration)
+ {
+ OSPlatform platform;
+ string prefix, suffix;
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ platform = OSPlatform.Windows;
+ prefix = "runtimes/win-x64/native/";
+ suffix = ".dll";
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
+ {
+ platform = OSPlatform.Linux;
+ prefix = "runtimes/linux-x64/native/";
+ suffix = ".so";
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+ {
+ platform = OSPlatform.OSX;
+ suffix = ".dylib";
+ if (System.Runtime.Intrinsics.Arm.ArmBase.Arm64.IsSupported)
+ {
+ prefix = "runtimes/osx-arm64/native/";
+ }
+ else
+ {
+ prefix = "runtimes/osx-x64/native/";
+ }
+ }
+ else
+ {
+ throw new RuntimeError($"Your system plarform is not supported, please open an issue in LLamaSharp.");
+ }
+
+ List result = new();
+ if (configuration.UseCuda && (platform == OSPlatform.Windows || platform == OSPlatform.Linux)) // no cuda on macos
+ {
+ int cudaVersion = GetCudaMajorVersion();
+
+ // TODO: load cuda library with avx
+ if (cudaVersion == -1 && !configuration.AllowFallback)
+ {
+ // if check skipped, we just try to load cuda libraries one by one.
+ if (configuration.SkipCheck)
+ {
+ result.Add($"{prefix}cuda12/{libraryName}{suffix}");
+ result.Add($"{prefix}cuda11/{libraryName}{suffix}");
+ }
+ else
+ {
+ throw new RuntimeError("Configured to load a cuda library but no cuda detected on your device.");
+ }
+ }
+ else if (cudaVersion == 11)
+ {
+ result.Add($"{prefix}cuda11/{libraryName}{suffix}");
+ }
+ else if (cudaVersion == 12)
+ {
+ result.Add($"{prefix}cuda12/{libraryName}{suffix}");
+ }
+ else if (cudaVersion > 0)
+ {
+ throw new RuntimeError($"Cuda version {cudaVersion} hasn't been supported by LLamaSharp, please open an issue for it.");
+ }
+ // otherwise no cuda detected but allow fallback
+ }
+
+ // use cpu (or mac possibly with metal)
+ if (!configuration.AllowFallback && platform != OSPlatform.OSX)
+ {
+ result.Add(GetAvxLibraryPath(configuration.AvxLevel, prefix, suffix));
+ }
+ else if(platform != OSPlatform.OSX) // in macos there's absolutely no avx
+ {
+#if NET8_0_OR_GREATER
+ if (configuration.AvxLevel == NativeLibraryConfig.AvxLevel.Avx512)
+ {
+ result.Add(GetAvxLibraryPath(NativeLibraryConfig.AvxLevel.Avx512, prefix, suffix)));
+ result.Add(GetAvxLibraryPath(NativeLibraryConfig.AvxLevel.Avx2, prefix, suffix)));
+ result.Add(GetAvxLibraryPath(NativeLibraryConfig.AvxLevel.Avx, prefix, suffix)));
+ }
+ else
+#endif
+ if (configuration.AvxLevel == NativeLibraryConfig.AvxLevel.Avx2)
+ {
+ result.Add(GetAvxLibraryPath(NativeLibraryConfig.AvxLevel.Avx2, prefix, suffix));
+ result.Add(GetAvxLibraryPath(NativeLibraryConfig.AvxLevel.Avx, prefix, suffix));
+ }
+ else if (configuration.AvxLevel == NativeLibraryConfig.AvxLevel.Avx)
+ {
+ result.Add(GetAvxLibraryPath(NativeLibraryConfig.AvxLevel.Avx, prefix, suffix));
+ }
+ result.Add(GetAvxLibraryPath(NativeLibraryConfig.AvxLevel.None, prefix, suffix));
+ }
+
+ if(platform == OSPlatform.OSX)
+ {
+ result.Add($"{prefix}{libraryName}{suffix}");
+ }
+
+ return result;
+ }
+#endif
+
///
/// Try to load libllama, using CPU feature detection to try and load a more specialised DLL if possible
///
@@ -50,38 +247,43 @@ namespace LLama.Native
private static IntPtr TryLoadLibrary()
{
#if NET6_0_OR_GREATER
+ var configuration = NativeLibraryConfig.GetInstance().Desc;
- if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ if (!string.IsNullOrEmpty(configuration.Path))
{
- // All of the Windows libraries, in order of preference
- return TryLoad("cu12.1.0/libllama.dll")
- ?? TryLoad("cu11.7.1/libllama.dll")
-#if NET8_0_OR_GREATER
- ?? TryLoad("avx512/libllama.dll", System.Runtime.Intrinsics.X86.Avx512.IsSupported)
-#endif
- ?? TryLoad("avx2/libllama.dll", System.Runtime.Intrinsics.X86.Avx2.IsSupported)
- ?? TryLoad("avx/libllama.dll", System.Runtime.Intrinsics.X86.Avx.IsSupported)
- ?? IntPtr.Zero;
+ // When loading the user specified library, there's no fallback.
+ var result = TryLoad(configuration.Path, true);
+ if (result is null || result == IntPtr.Zero)
+ {
+ throw new RuntimeError($"Failed to load the native library [{configuration.Path}] you specified.");
+ }
+ return result ?? IntPtr.Zero;
}
- if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
+ var libraryTryLoadOrder = GetLibraryTryOrder(configuration);
+
+ foreach(var libraryPath in libraryTryLoadOrder)
{
- // All of the Linux libraries, in order of preference
- return TryLoad("cu12.1.0/libllama.so")
- ?? TryLoad("cu11.7.1/libllama.so")
-#if NET8_0_OR_GREATER
- ?? TryLoad("avx512/libllama.so", System.Runtime.Intrinsics.X86.Avx512.IsSupported)
-#endif
- ?? TryLoad("avx2/libllama.so", System.Runtime.Intrinsics.X86.Avx2.IsSupported)
- ?? TryLoad("avx/libllama.so", System.Runtime.Intrinsics.X86.Avx.IsSupported)
- ?? IntPtr.Zero;
+ var result = TryLoad(libraryPath, true);
+ if(result is not null && result != IntPtr.Zero)
+ {
+ Console.ForegroundColor = ConsoleColor.Red;
+ Console.WriteLine($"[Native Library] {libraryPath} is loaded.");
+ Console.ResetColor();
+ return result ?? IntPtr.Zero;
+ }
+ else
+ {
+ Console.WriteLine($"Tried to load {libraryPath}");
+ }
}
- if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+ if (!configuration.AllowFallback)
{
- return TryLoad("runtimes/macos-arm64/libllama.dylib", System.Runtime.Intrinsics.Arm.ArmBase.Arm64.IsSupported)
- ?? TryLoad("runtimes/macos-x86_64/libllama.dylib")
- ?? IntPtr.Zero;
+ throw new RuntimeError("Failed to load the library that match your rule, please" +
+ " 1) check your rule." +
+ " 2) try to allow fallback." +
+ " 3) or open an issue if it's expected to be successful.");
}
#endif
@@ -103,6 +305,7 @@ namespace LLama.Native
}
private const string libraryName = "libllama";
+ private const string cudaVersionFile = "version.json";
///
/// A method that does nothing. This is a native method, calling it will force the llama native dependencies to be loaded.
diff --git a/LLama/NativeLibraryConfig.cs b/LLama/NativeLibraryConfig.cs
new file mode 100644
index 00000000..a81c4fa7
--- /dev/null
+++ b/LLama/NativeLibraryConfig.cs
@@ -0,0 +1,113 @@
+using System;
+
+namespace LLama
+{
+#if NET6_0_OR_GREATER
+ ///
+ /// A class about configurations when loading native libraries.
+ /// Note that it could be configured only once before any call to llama model apis.
+ ///
+ public class NativeLibraryConfig
+ {
+ private static NativeLibraryConfig? instance;
+ private static readonly object lockObject = new object();
+
+ ///
+ /// Whether there's already a config for native library.
+ ///
+ public bool Initialied { get; private set; }
+ internal Description Desc { get; private set; }
+
+ internal static NativeLibraryConfig GetInstance()
+ {
+ if (instance is null)
+ {
+ lock (lockObject)
+ {
+ if (instance is null)
+ {
+ instance = new NativeLibraryConfig();
+ }
+ }
+ }
+ return instance;
+ }
+
+ ///
+ /// Load a specified native library as backend for LLamaSharp
+ ///
+ ///
+ ///
+ public static void WithLibrary(string libraryPath)
+ {
+ var config = GetInstance();
+ if (config.Initialied)
+ {
+ throw new InvalidOperationException("NativeLibraryConfig could be configured only once before any call to llama model apis.");
+ }
+ config.Desc = new Description(libraryPath);
+ }
+
+ ///
+ /// Ass rules to match a suitable library from installed LLamaSharp backend.
+ ///
+ ///
+ ///
+ /// Whether to allow fall-back when your hardware doesn't support your configuration.
+ /// Whether to skip the check when fallback is allowed.
+ /// It's especially useful when your cuda library is not in the default path.
+ ///
+ public static void WithMatchRule(bool useCuda = true, AvxLevel avxLevel = AvxLevel.Avx2, bool allowFallback = true, bool skipCheck = false)
+ {
+ if(allowFallback && skipCheck)
+ {
+ throw new ArgumentException("Cannot skip the check when fallback is allowed.");
+ }
+ var config = GetInstance();
+ if (config.Initialied)
+ {
+ throw new InvalidOperationException("NativeLibraryConfig could be configured only once before any call to llama model apis.");
+ }
+ config.Desc = new Description(UseCuda: useCuda, AvxLevel: avxLevel, AllowFallback: allowFallback, SkipCheck: skipCheck);
+ }
+
+ internal static string AvxLevelToString(AvxLevel level)
+ {
+ return level switch
+ {
+ AvxLevel.None => string.Empty,
+ AvxLevel.Avx => "avx",
+ AvxLevel.Avx2 => "avx2",
+#if NET8_0_OR_GREATER
+ AvxLevel.Avx512 => "avx512"
+#endif
+ _ => throw new ArgumentException($"Cannot recognize Avx level {level}")
+ };
+ }
+
+
+ private NativeLibraryConfig()
+ {
+ Desc = new Description();
+ }
+
+ ///
+ /// Avx support configuration
+ ///
+ public enum AvxLevel
+ {
+ ///
+ None = 0,
+ ///
+ Avx = 1,
+ ///
+ Avx2 = 2,
+#if NET8_0_OR_GREATER
+ ///
+ Avx512 = 3,
+#endif
+ }
+ internal record Description(string Path = "", bool UseCuda = true, AvxLevel AvxLevel = AvxLevel.Avx2, bool AllowFallback = true, bool SkipCheck = false);
+ }
+#endif
+ }
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
index 5664be89..d7d1fd6b 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
@@ -21,7 +21,7 @@
-
+