From b2423fe6e9fd468a3c435b3e714dd05bac4c54d9 Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Sun, 7 Apr 2024 19:47:39 +0200 Subject: [PATCH 01/14] Standardizing Image Data implementation --- .../Examples/LlavaInteractiveModeExecute.cs | 31 +++++++------ LLama/Abstractions/ILLamaExecutor.cs | 46 ++++++++++++++++++- LLama/LLamaStatelessExecutor.cs | 8 ++-- 3 files changed, 65 insertions(+), 20 deletions(-) diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs index 112fe23f..8cfa7376 100644 --- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs +++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs @@ -1,7 +1,8 @@ using System.Text.RegularExpressions; +using LLama.Batched; using LLama.Common; using Spectre.Console; -using LLama.Native; +using LLama.Abstractions; namespace LLama.Examples.Examples { @@ -18,8 +19,12 @@ namespace LLama.Examples.Examples var prompt = $"{{{modelImage}}}\nUSER:\nProvide a full description of the image.\nASSISTANT:\n"; - var parameters = new ModelParams(modelPath); - + var parameters = new ModelParams(modelPath) + { + ContextSize = 4096, + Seed = 1337, + GpuLayerCount = 10 + }; using var model = LLamaWeights.LoadFromFile(parameters); using var context = model.CreateContext(parameters); @@ -42,16 +47,16 @@ namespace LLama.Examples.Examples var imageMatches = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value); var imageCount = imageMatches.Count(); var hasImages = imageCount > 0; + byte[][] imageBytes = null; if (hasImages) { var imagePathsWithCurlyBraces = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value); - var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value).ToList(); + var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value); - List imageBytes; try { - imageBytes = imagePaths.Select(File.ReadAllBytes).ToList(); + imageBytes = imagePaths.Select(File.ReadAllBytes).ToArray(); } catch (IOException exception) { @@ -64,17 +69,15 @@ namespace LLama.Examples.Examples break; } - // Each prompt with images we clear cache - // When the prompt contains images we clear KV_CACHE to restart conversation - // See: - // https://github.com/ggerganov/llama.cpp/discussions/3620 - ex.Context.NativeHandle.KvCacheRemove( LLamaSeqId.Zero, -1, -1 ); int index = 0; foreach (var path in imagePathsWithCurlyBraces) { // First image replace to tag " : ""); + if (index++ == 0) + prompt = prompt.Replace(path, ""); + else + prompt = prompt.Replace(path, ""); } @@ -99,7 +102,7 @@ namespace LLama.Examples.Examples // foreach (var image in imagePaths) { - ex.Images.Add(await File.ReadAllBytesAsync(image)); + ex.Images.Add(new ImageData(ImageData.DataType.ImagePath, image)); } } @@ -115,7 +118,7 @@ namespace LLama.Examples.Examples // let the user finish with exit // - if (prompt != null && prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase)) + if (prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase)) break; } diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs index 574a27d8..977cbc5e 100644 --- a/LLama/Abstractions/ILLamaExecutor.cs +++ b/LLama/Abstractions/ILLamaExecutor.cs @@ -25,9 +25,9 @@ namespace LLama.Abstractions public LLavaWeights? ClipModel { get; } /// - /// List of images: List of images in byte array format. + /// List of images: Image filen path, uri or image byte array. See ImageData. /// - public List Images { get; } + public List Images { get; } /// /// Asynchronously infers a response from the model. @@ -38,4 +38,46 @@ namespace LLama.Abstractions /// IAsyncEnumerable InferAsync(string text, IInferenceParams? inferenceParams = null, CancellationToken token = default); } + + /// + /// Holds image data + /// + public class ImageData + { + /// + /// constructor + /// + /// + /// + public ImageData(DataType type, object data) { Type = type; Data = data; } + + /// + /// the possible types of image data + /// + public enum DataType + { + /// + /// file path + /// + ImagePath, + /// + /// byte array + /// + ImageBytes, + /// + /// uri + /// + ImageURL + } + + /// + /// the type of this image data + /// + public DataType Type { get; set; } + + /// + /// the image data (string, byte array or uri) + /// + public object? Data { get; set; } + } } diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs index a3c52a02..9d2f8c78 100644 --- a/LLama/LLamaStatelessExecutor.cs +++ b/LLama/LLamaStatelessExecutor.cs @@ -34,7 +34,7 @@ namespace LLama public LLavaWeights? ClipModel { get; } /// - public List Images { get; set; } + public List Images { get; set; } /// /// The context used by the executor when running the inference. @@ -49,7 +49,7 @@ namespace LLama /// public StatelessExecutor(LLamaWeights weights, IContextParams @params, ILogger? logger = null) { - Images = new List(); + Images = new List(); _weights = weights; _params = @params; _logger = logger; @@ -90,7 +90,7 @@ namespace LLama lastTokens.Add(0); // Tokenize the prompt - var tokens = Context.Tokenize(prompt, special: true).ToList(); + var tokens = Context.Tokenize(prompt).ToList(); lastTokens.AddRange(tokens); // Evaluate the prompt, in chunks smaller than the max batch size @@ -124,7 +124,7 @@ namespace LLama } // Check if this is the EOS token - if (id == _weights.Tokens.EOS) + if (id == _weights.EndOfSentenceToken) break; // Decode this token into text From f264024666d6ff0557215f9b17d19a4ca8b3056c Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Mon, 8 Apr 2024 16:10:54 +0200 Subject: [PATCH 02/14] Simplifying image handling --- .../Examples/LlavaInteractiveModeExecute.cs | 2 +- LLama/Abstractions/ILLamaExecutor.cs | 44 +------------------ LLama/LLamaStatelessExecutor.cs | 4 +- 3 files changed, 4 insertions(+), 46 deletions(-) diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs index 8cfa7376..507f041b 100644 --- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs +++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs @@ -102,7 +102,7 @@ namespace LLama.Examples.Examples // foreach (var image in imagePaths) { - ex.Images.Add(new ImageData(ImageData.DataType.ImagePath, image)); + ex.Images.Add(File.ReadAllBytes(image)); } } diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs index 977cbc5e..d6c8d2ce 100644 --- a/LLama/Abstractions/ILLamaExecutor.cs +++ b/LLama/Abstractions/ILLamaExecutor.cs @@ -27,7 +27,7 @@ namespace LLama.Abstractions /// /// List of images: Image filen path, uri or image byte array. See ImageData. /// - public List Images { get; } + public List Images { get; } /// /// Asynchronously infers a response from the model. @@ -38,46 +38,4 @@ namespace LLama.Abstractions /// IAsyncEnumerable InferAsync(string text, IInferenceParams? inferenceParams = null, CancellationToken token = default); } - - /// - /// Holds image data - /// - public class ImageData - { - /// - /// constructor - /// - /// - /// - public ImageData(DataType type, object data) { Type = type; Data = data; } - - /// - /// the possible types of image data - /// - public enum DataType - { - /// - /// file path - /// - ImagePath, - /// - /// byte array - /// - ImageBytes, - /// - /// uri - /// - ImageURL - } - - /// - /// the type of this image data - /// - public DataType Type { get; set; } - - /// - /// the image data (string, byte array or uri) - /// - public object? Data { get; set; } - } } diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs index 9d2f8c78..f9d6ca5b 100644 --- a/LLama/LLamaStatelessExecutor.cs +++ b/LLama/LLamaStatelessExecutor.cs @@ -34,7 +34,7 @@ namespace LLama public LLavaWeights? ClipModel { get; } /// - public List Images { get; set; } + public List Images { get; set; } /// /// The context used by the executor when running the inference. @@ -49,7 +49,7 @@ namespace LLama /// public StatelessExecutor(LLamaWeights weights, IContextParams @params, ILogger? logger = null) { - Images = new List(); + Images = new List(); _weights = weights; _params = @params; _logger = logger; From 3ded2dd74d2200522787a1ac2f0484b3251182c4 Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Fri, 19 Apr 2024 08:40:43 +0200 Subject: [PATCH 03/14] Embeddings correction --- LLama/LLamaEmbedder.cs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs index f60f3cd5..c29b6b25 100644 --- a/LLama/LLamaEmbedder.cs +++ b/LLama/LLamaEmbedder.cs @@ -97,18 +97,15 @@ namespace LLama private float[] GetEmbeddingsArray() { - unsafe + var embeddings = NativeApi.llama_get_embeddings(Context.NativeHandle); + if (embeddings == null || embeddings.Length == 0) { - var embeddings = NativeApi.llama_get_embeddings(Context.NativeHandle); - - if (embeddings == null) - embeddings = NativeApi.llama_get_embeddings_seq(Context.NativeHandle, LLamaSeqId.Zero); - - if (embeddings == null) + embeddings = NativeApi.llama_get_embeddings_seq(Context.NativeHandle, LLamaSeqId.Zero); + if (embeddings == null || embeddings.Length == 0) return Array.Empty(); - - return new Span(embeddings, Context.EmbeddingSize).ToArray(); } + + return embeddings.ToArray(); } private static void Normalize(Span embeddings) @@ -119,7 +116,6 @@ namespace LLama lengthSqr += value * value; var length = (float)Math.Sqrt(lengthSqr); - // Do not divide by length if it is zero if (length <= float.Epsilon) return; From b1f3987fae88fa85a9655c12869b9505f58c8d3e Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Fri, 19 Apr 2024 10:55:36 +0200 Subject: [PATCH 04/14] Automatic Solution Generator - Work in progress --- CMakeLists.txt | 126 ++++++++++++++++ LLama.GenerateSolution/CMakeLists.txt.in | 126 ++++++++++++++++ .../GenerateSolution.csproj | 14 ++ LLama.GenerateSolution/GenerateSolution.sln | 25 ++++ LLama.GenerateSolution/Program.cs | 137 ++++++++++++++++++ 5 files changed, 428 insertions(+) create mode 100644 CMakeLists.txt create mode 100644 LLama.GenerateSolution/CMakeLists.txt.in create mode 100644 LLama.GenerateSolution/GenerateSolution.csproj create mode 100644 LLama.GenerateSolution/GenerateSolution.sln create mode 100644 LLama.GenerateSolution/Program.cs diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..b84dc1de --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,126 @@ +#---------------------------------------------------------------------------------------- +# INFO: +# - How to use: change the flags in the 'Set one of these ON and all others OFF' section +# - CUDA: it will use automatically the CUDA SDK version installed +# +#---------------------------------------------------------------------------------------- +cmake_minimum_required(VERSION 3.8) +project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp) +if(NOT MSVC) + message(FATAL_ERROR "This CMake file only works with MSVC.") +endif(NOT MSVC) + +#--------- Set one of these ON and all others OFF -------------------> +option(LLAMA_CUDA_AVX2 "CUDA + AVX2" ON) +option(LLAMA_AVX2 "AVX2 (no CUDA)" OFF) +option(LLAMA_CUDA "CUDA (no AVX)" OFF) +#etc... add other setups +#<--------- Set one of these ON and all others OFF ------------------- + +# --------------- Don't change below this line ----------------------- + +# Variable Settings +if(LLAMA_CUDA_AVX2) + option(LLAMA_AVX "llama: enable AVX" ON) + option(LLAMA_AVX2 "llama: enable AVX2" ON) + option(LLAMA_CUBLAS "llama: use CUDA" ON) +elseif(LLAMA_AVX2) + option(LLAMA_AVX "llama: enable AVX" ON) + option(LLAMA_AVX2 "llama: enable AVX2" ON) + option(LLAMA_CUBLAS "llama: use CUDA" OFF) +elseif(LLAMA_CUDA) + option(LLAMA_AVX "llama: enable AVX" OFF) + option(LLAMA_AVX2 "llama: enable AVX2" OFF) + option(LLAMA_CUBLAS "llama: use CUDA" ON) +elseif(OTHER_SETUPS) + #etc... +endif() + +# Fixed Settings +# general +option(BUILD_SHARED_LIBS "build shared libraries" ON) +option(LLAMA_STATIC "llama: static link libraries" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) +option(LLAMA_LTO "llama: enable link time optimization" OFF) +option(LLAMA_CCACHE "llama: use ccache if available" ON) + +# debug +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +option(LLAMA_GPROF "llama: enable gprof" OFF) + +# build +option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) + +# sanitizers +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) + +option(LLAMA_AVX512 "llama: enable AVX512" OFF) +option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) +option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) +option(LLAMA_FMA "llama: enable FMA" OFF) +# in MSVC F16C is implied with AVX2/AVX512 +if (NOT MSVC) + option(LLAMA_F16C "llama: enable F16C" OFF) +endif() + +if (WIN32) + set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version") +endif() + +# 3rd party libs +option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) +option(LLAMA_BLAS "llama: use BLAS" OFF) +set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") +#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF) +option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) +option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF) +set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") +set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") +option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) +set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") +set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING + "llama: max. batch size for using peer access") +option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) +option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) +option(LLAMA_CLBLAST "llama: use CLBlast" OFF) +option(LLAMA_VULKAN "llama: use Vulkan" OFF) +option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF) +option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF) +option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF) +option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF) +option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) +option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) +option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) +option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF) +option(LLAMA_KOMPUTE "llama: use Kompute" OFF) +option(LLAMA_MPI "llama: use MPI" OFF) +option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) +option(LLAMA_SYCL "llama: use SYCL" OFF) +option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) +option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) + +option(LLAMA_BUILD_TESTS "llama: build tests" OFF) +option(LLAMA_BUILD_EXAMPLES "llama: build examples" ON) +option(LLAMA_BUILD_SERVER "llama: build server example" OFF) + +# add perf arguments +option(LLAMA_PERF "llama: enable perf" OFF) + +include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E) + +include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE) + +include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D) + +include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF) + +include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1) + +include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587) + +include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52) + +add_subdirectory(./llama.cpp) \ No newline at end of file diff --git a/LLama.GenerateSolution/CMakeLists.txt.in b/LLama.GenerateSolution/CMakeLists.txt.in new file mode 100644 index 00000000..b84dc1de --- /dev/null +++ b/LLama.GenerateSolution/CMakeLists.txt.in @@ -0,0 +1,126 @@ +#---------------------------------------------------------------------------------------- +# INFO: +# - How to use: change the flags in the 'Set one of these ON and all others OFF' section +# - CUDA: it will use automatically the CUDA SDK version installed +# +#---------------------------------------------------------------------------------------- +cmake_minimum_required(VERSION 3.8) +project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp) +if(NOT MSVC) + message(FATAL_ERROR "This CMake file only works with MSVC.") +endif(NOT MSVC) + +#--------- Set one of these ON and all others OFF -------------------> +option(LLAMA_CUDA_AVX2 "CUDA + AVX2" ON) +option(LLAMA_AVX2 "AVX2 (no CUDA)" OFF) +option(LLAMA_CUDA "CUDA (no AVX)" OFF) +#etc... add other setups +#<--------- Set one of these ON and all others OFF ------------------- + +# --------------- Don't change below this line ----------------------- + +# Variable Settings +if(LLAMA_CUDA_AVX2) + option(LLAMA_AVX "llama: enable AVX" ON) + option(LLAMA_AVX2 "llama: enable AVX2" ON) + option(LLAMA_CUBLAS "llama: use CUDA" ON) +elseif(LLAMA_AVX2) + option(LLAMA_AVX "llama: enable AVX" ON) + option(LLAMA_AVX2 "llama: enable AVX2" ON) + option(LLAMA_CUBLAS "llama: use CUDA" OFF) +elseif(LLAMA_CUDA) + option(LLAMA_AVX "llama: enable AVX" OFF) + option(LLAMA_AVX2 "llama: enable AVX2" OFF) + option(LLAMA_CUBLAS "llama: use CUDA" ON) +elseif(OTHER_SETUPS) + #etc... +endif() + +# Fixed Settings +# general +option(BUILD_SHARED_LIBS "build shared libraries" ON) +option(LLAMA_STATIC "llama: static link libraries" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) +option(LLAMA_LTO "llama: enable link time optimization" OFF) +option(LLAMA_CCACHE "llama: use ccache if available" ON) + +# debug +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +option(LLAMA_GPROF "llama: enable gprof" OFF) + +# build +option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) + +# sanitizers +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) + +option(LLAMA_AVX512 "llama: enable AVX512" OFF) +option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) +option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) +option(LLAMA_FMA "llama: enable FMA" OFF) +# in MSVC F16C is implied with AVX2/AVX512 +if (NOT MSVC) + option(LLAMA_F16C "llama: enable F16C" OFF) +endif() + +if (WIN32) + set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version") +endif() + +# 3rd party libs +option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) +option(LLAMA_BLAS "llama: use BLAS" OFF) +set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") +#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF) +option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) +option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF) +set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") +set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") +option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) +set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") +set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING + "llama: max. batch size for using peer access") +option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) +option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) +option(LLAMA_CLBLAST "llama: use CLBlast" OFF) +option(LLAMA_VULKAN "llama: use Vulkan" OFF) +option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF) +option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF) +option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF) +option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF) +option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) +option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) +option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) +option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF) +option(LLAMA_KOMPUTE "llama: use Kompute" OFF) +option(LLAMA_MPI "llama: use MPI" OFF) +option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) +option(LLAMA_SYCL "llama: use SYCL" OFF) +option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) +option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) + +option(LLAMA_BUILD_TESTS "llama: build tests" OFF) +option(LLAMA_BUILD_EXAMPLES "llama: build examples" ON) +option(LLAMA_BUILD_SERVER "llama: build server example" OFF) + +# add perf arguments +option(LLAMA_PERF "llama: enable perf" OFF) + +include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E) + +include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE) + +include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D) + +include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF) + +include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1) + +include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587) + +include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52) + +add_subdirectory(./llama.cpp) \ No newline at end of file diff --git a/LLama.GenerateSolution/GenerateSolution.csproj b/LLama.GenerateSolution/GenerateSolution.csproj new file mode 100644 index 00000000..f28f91ba --- /dev/null +++ b/LLama.GenerateSolution/GenerateSolution.csproj @@ -0,0 +1,14 @@ + + + + Exe + net7.0 + enable + enable + + + + + + + diff --git a/LLama.GenerateSolution/GenerateSolution.sln b/LLama.GenerateSolution/GenerateSolution.sln new file mode 100644 index 00000000..74c9e8e1 --- /dev/null +++ b/LLama.GenerateSolution/GenerateSolution.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.8.34525.116 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GenerateSolution", "GenerateSolution.csproj", "{89306FE9-4428-4C70-AF58-0AF871BED56B}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {32128714-30D0-4015-9184-24F609AAE564} + EndGlobalSection +EndGlobal diff --git a/LLama.GenerateSolution/Program.cs b/LLama.GenerateSolution/Program.cs new file mode 100644 index 00000000..ebf082b1 --- /dev/null +++ b/LLama.GenerateSolution/Program.cs @@ -0,0 +1,137 @@ +using Spectre.Console; +using System; +using System.Diagnostics; +using System.Text; +using static System.Runtime.InteropServices.JavaScript.JSType; + +namespace GenerateSolution +{ + internal class Program + { + static void Main(string[] args) + { + System.Console.InputEncoding = Encoding.Unicode; + System.Console.OutputEncoding = Encoding.Unicode; + + // Check if we can accept key strokes + if (!AnsiConsole.Profile.Capabilities.Interactive) + { + AnsiConsole.MarkupLine("[red]Environment does not support interaction.[/]"); + return; + } + + var options = AskOptions(); + var cmakePath = AskCMakePath(); + if(string.IsNullOrEmpty(cmakePath) == true) + { + cmakePath = "C:\\Program Files\\CMake\\bin\\cmake.exe"; + } + AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", cmakePath); + + string cmakeListsPath = @"..\..\..\..\CMakeLists.txt"; + + //cmake [] -B [-S ] + //TODO: get the chosen arguments from above (hardcoded values below) + //TODO: edit the CMakeList.txt.in template and create the CMakeLists.txt with the chosen options + cmakeListsPath += " -G \"Visual Studio 17 2022\" -A x64 -B ..\\..\\..\\..\\ -S ..\\..\\..\\..\\"; + + ProcessStartInfo startInfo = new ProcessStartInfo + { + FileName = cmakePath, + Arguments = cmakeListsPath, + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true, + }; + + try + { + bool bSuccess = false; + string lastError = ""; + AnsiConsole.Progress() + .AutoClear(false) + .Columns(new ProgressColumn[] + { + new TaskDescriptionColumn(), + new SpinnerColumn(Spinner.Known.Ascii), + }) + .Start(ctx => + { + var cmakeTask = ctx.AddTask("Generating VS Solution", autoStart: false).IsIndeterminate(); + cmakeTask.StartTask(); + using (Process process = new Process()) + { + process.StartInfo = startInfo; + process.Start(); + string output = process.StandardOutput.ReadToEnd(); + lastError = process.StandardError.ReadToEnd(); + process.WaitForExit(); + cmakeTask.StopTask(); + if (process.ExitCode == 0) + { + bSuccess = true; + } + } + }); + + if (bSuccess == true) + { + AnsiConsole.WriteLine("VS solution generated successfully."); + } + else + { + AnsiConsole.WriteLine($"Error running CMake configuration: {lastError}"); + } + } + catch (Exception ex) + { + AnsiConsole.WriteLine("[red]ERROR[/] " + ex.Message); + } + + Console.ReadLine(); + } + + public static string AskCMakePath() + { + return AnsiConsole.Prompt( + new TextPrompt("What's your [green]CMake path[/] (default: C:\\Program Files\\CMake\\bin\\cmake.exe)?") + .AllowEmpty()); + } + + public static List AskOptions() + { + var options = AnsiConsole.Prompt( + new MultiSelectionPrompt() + .PageSize(10) + .Title("Select the preferred [green]options[/]?") + .MoreChoicesText("[grey](Move up and down to reveal more options)[/]") + .InstructionsText("[grey](Press [blue][/] to toggle an option, [green][/] to accept)[/]") + .AddChoiceGroup("Avx", new[] + { + "Avx2", "Avx512" + }) + .AddChoiceGroup("Cuda", new[] + { + "Cuda" + }) + .AddChoices(new[] + { + "x64", + }) + .AddChoiceGroup("Visual Studio", new[] + { + "Visual Studio 16 2019", + "Visual Studio 17 2022" + }) + ); + + if (options.Count > 0) + { + AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", string.Join(",",options)); + } + + return options; + } + } +} From ad2c81d9574df2be5f6213ee2136a1b19afd1236 Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Fri, 19 Apr 2024 17:16:52 +0200 Subject: [PATCH 05/14] Revert "Automatic Solution Generator - Work in progress" This reverts commit 9c91fac20f3ebde5d1f1bc6a9feacaaa61c4d087. --- CMakeLists.txt | 126 ---------------- LLama.GenerateSolution/CMakeLists.txt.in | 126 ---------------- .../GenerateSolution.csproj | 14 -- LLama.GenerateSolution/GenerateSolution.sln | 25 ---- LLama.GenerateSolution/Program.cs | 137 ------------------ 5 files changed, 428 deletions(-) delete mode 100644 CMakeLists.txt delete mode 100644 LLama.GenerateSolution/CMakeLists.txt.in delete mode 100644 LLama.GenerateSolution/GenerateSolution.csproj delete mode 100644 LLama.GenerateSolution/GenerateSolution.sln delete mode 100644 LLama.GenerateSolution/Program.cs diff --git a/CMakeLists.txt b/CMakeLists.txt deleted file mode 100644 index b84dc1de..00000000 --- a/CMakeLists.txt +++ /dev/null @@ -1,126 +0,0 @@ -#---------------------------------------------------------------------------------------- -# INFO: -# - How to use: change the flags in the 'Set one of these ON and all others OFF' section -# - CUDA: it will use automatically the CUDA SDK version installed -# -#---------------------------------------------------------------------------------------- -cmake_minimum_required(VERSION 3.8) -project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp) -if(NOT MSVC) - message(FATAL_ERROR "This CMake file only works with MSVC.") -endif(NOT MSVC) - -#--------- Set one of these ON and all others OFF -------------------> -option(LLAMA_CUDA_AVX2 "CUDA + AVX2" ON) -option(LLAMA_AVX2 "AVX2 (no CUDA)" OFF) -option(LLAMA_CUDA "CUDA (no AVX)" OFF) -#etc... add other setups -#<--------- Set one of these ON and all others OFF ------------------- - -# --------------- Don't change below this line ----------------------- - -# Variable Settings -if(LLAMA_CUDA_AVX2) - option(LLAMA_AVX "llama: enable AVX" ON) - option(LLAMA_AVX2 "llama: enable AVX2" ON) - option(LLAMA_CUBLAS "llama: use CUDA" ON) -elseif(LLAMA_AVX2) - option(LLAMA_AVX "llama: enable AVX" ON) - option(LLAMA_AVX2 "llama: enable AVX2" ON) - option(LLAMA_CUBLAS "llama: use CUDA" OFF) -elseif(LLAMA_CUDA) - option(LLAMA_AVX "llama: enable AVX" OFF) - option(LLAMA_AVX2 "llama: enable AVX2" OFF) - option(LLAMA_CUBLAS "llama: use CUDA" ON) -elseif(OTHER_SETUPS) - #etc... -endif() - -# Fixed Settings -# general -option(BUILD_SHARED_LIBS "build shared libraries" ON) -option(LLAMA_STATIC "llama: static link libraries" OFF) -option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) -option(LLAMA_LTO "llama: enable link time optimization" OFF) -option(LLAMA_CCACHE "llama: use ccache if available" ON) - -# debug -option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) -option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) -option(LLAMA_GPROF "llama: enable gprof" OFF) - -# build -option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) - -# sanitizers -option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) -option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) -option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) - -option(LLAMA_AVX512 "llama: enable AVX512" OFF) -option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) -option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) -option(LLAMA_FMA "llama: enable FMA" OFF) -# in MSVC F16C is implied with AVX2/AVX512 -if (NOT MSVC) - option(LLAMA_F16C "llama: enable F16C" OFF) -endif() - -if (WIN32) - set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version") -endif() - -# 3rd party libs -option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) -option(LLAMA_BLAS "llama: use BLAS" OFF) -set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") -#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF) -option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) -option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF) -set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") -set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") -option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) -set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") -set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING - "llama: max. batch size for using peer access") -option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) -option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) -option(LLAMA_CLBLAST "llama: use CLBlast" OFF) -option(LLAMA_VULKAN "llama: use Vulkan" OFF) -option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF) -option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF) -option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF) -option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF) -option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) -option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) -option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) -option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF) -option(LLAMA_KOMPUTE "llama: use Kompute" OFF) -option(LLAMA_MPI "llama: use MPI" OFF) -option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) -option(LLAMA_SYCL "llama: use SYCL" OFF) -option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) -option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) - -option(LLAMA_BUILD_TESTS "llama: build tests" OFF) -option(LLAMA_BUILD_EXAMPLES "llama: build examples" ON) -option(LLAMA_BUILD_SERVER "llama: build server example" OFF) - -# add perf arguments -option(LLAMA_PERF "llama: enable perf" OFF) - -include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E) - -include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE) - -include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D) - -include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF) - -include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1) - -include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587) - -include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52) - -add_subdirectory(./llama.cpp) \ No newline at end of file diff --git a/LLama.GenerateSolution/CMakeLists.txt.in b/LLama.GenerateSolution/CMakeLists.txt.in deleted file mode 100644 index b84dc1de..00000000 --- a/LLama.GenerateSolution/CMakeLists.txt.in +++ /dev/null @@ -1,126 +0,0 @@ -#---------------------------------------------------------------------------------------- -# INFO: -# - How to use: change the flags in the 'Set one of these ON and all others OFF' section -# - CUDA: it will use automatically the CUDA SDK version installed -# -#---------------------------------------------------------------------------------------- -cmake_minimum_required(VERSION 3.8) -project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp) -if(NOT MSVC) - message(FATAL_ERROR "This CMake file only works with MSVC.") -endif(NOT MSVC) - -#--------- Set one of these ON and all others OFF -------------------> -option(LLAMA_CUDA_AVX2 "CUDA + AVX2" ON) -option(LLAMA_AVX2 "AVX2 (no CUDA)" OFF) -option(LLAMA_CUDA "CUDA (no AVX)" OFF) -#etc... add other setups -#<--------- Set one of these ON and all others OFF ------------------- - -# --------------- Don't change below this line ----------------------- - -# Variable Settings -if(LLAMA_CUDA_AVX2) - option(LLAMA_AVX "llama: enable AVX" ON) - option(LLAMA_AVX2 "llama: enable AVX2" ON) - option(LLAMA_CUBLAS "llama: use CUDA" ON) -elseif(LLAMA_AVX2) - option(LLAMA_AVX "llama: enable AVX" ON) - option(LLAMA_AVX2 "llama: enable AVX2" ON) - option(LLAMA_CUBLAS "llama: use CUDA" OFF) -elseif(LLAMA_CUDA) - option(LLAMA_AVX "llama: enable AVX" OFF) - option(LLAMA_AVX2 "llama: enable AVX2" OFF) - option(LLAMA_CUBLAS "llama: use CUDA" ON) -elseif(OTHER_SETUPS) - #etc... -endif() - -# Fixed Settings -# general -option(BUILD_SHARED_LIBS "build shared libraries" ON) -option(LLAMA_STATIC "llama: static link libraries" OFF) -option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) -option(LLAMA_LTO "llama: enable link time optimization" OFF) -option(LLAMA_CCACHE "llama: use ccache if available" ON) - -# debug -option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) -option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) -option(LLAMA_GPROF "llama: enable gprof" OFF) - -# build -option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) - -# sanitizers -option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) -option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) -option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) - -option(LLAMA_AVX512 "llama: enable AVX512" OFF) -option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) -option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) -option(LLAMA_FMA "llama: enable FMA" OFF) -# in MSVC F16C is implied with AVX2/AVX512 -if (NOT MSVC) - option(LLAMA_F16C "llama: enable F16C" OFF) -endif() - -if (WIN32) - set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version") -endif() - -# 3rd party libs -option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) -option(LLAMA_BLAS "llama: use BLAS" OFF) -set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") -#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF) -option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) -option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF) -set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") -set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") -option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) -set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") -set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING - "llama: max. batch size for using peer access") -option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) -option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) -option(LLAMA_CLBLAST "llama: use CLBlast" OFF) -option(LLAMA_VULKAN "llama: use Vulkan" OFF) -option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF) -option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF) -option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF) -option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF) -option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) -option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) -option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) -option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF) -option(LLAMA_KOMPUTE "llama: use Kompute" OFF) -option(LLAMA_MPI "llama: use MPI" OFF) -option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) -option(LLAMA_SYCL "llama: use SYCL" OFF) -option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) -option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) - -option(LLAMA_BUILD_TESTS "llama: build tests" OFF) -option(LLAMA_BUILD_EXAMPLES "llama: build examples" ON) -option(LLAMA_BUILD_SERVER "llama: build server example" OFF) - -# add perf arguments -option(LLAMA_PERF "llama: enable perf" OFF) - -include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E) - -include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE) - -include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D) - -include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF) - -include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1) - -include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587) - -include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52) - -add_subdirectory(./llama.cpp) \ No newline at end of file diff --git a/LLama.GenerateSolution/GenerateSolution.csproj b/LLama.GenerateSolution/GenerateSolution.csproj deleted file mode 100644 index f28f91ba..00000000 --- a/LLama.GenerateSolution/GenerateSolution.csproj +++ /dev/null @@ -1,14 +0,0 @@ - - - - Exe - net7.0 - enable - enable - - - - - - - diff --git a/LLama.GenerateSolution/GenerateSolution.sln b/LLama.GenerateSolution/GenerateSolution.sln deleted file mode 100644 index 74c9e8e1..00000000 --- a/LLama.GenerateSolution/GenerateSolution.sln +++ /dev/null @@ -1,25 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.8.34525.116 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GenerateSolution", "GenerateSolution.csproj", "{89306FE9-4428-4C70-AF58-0AF871BED56B}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Any CPU = Debug|Any CPU - Release|Any CPU = Release|Any CPU - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.Build.0 = Debug|Any CPU - {89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.ActiveCfg = Release|Any CPU - {89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.Build.0 = Release|Any CPU - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {32128714-30D0-4015-9184-24F609AAE564} - EndGlobalSection -EndGlobal diff --git a/LLama.GenerateSolution/Program.cs b/LLama.GenerateSolution/Program.cs deleted file mode 100644 index ebf082b1..00000000 --- a/LLama.GenerateSolution/Program.cs +++ /dev/null @@ -1,137 +0,0 @@ -using Spectre.Console; -using System; -using System.Diagnostics; -using System.Text; -using static System.Runtime.InteropServices.JavaScript.JSType; - -namespace GenerateSolution -{ - internal class Program - { - static void Main(string[] args) - { - System.Console.InputEncoding = Encoding.Unicode; - System.Console.OutputEncoding = Encoding.Unicode; - - // Check if we can accept key strokes - if (!AnsiConsole.Profile.Capabilities.Interactive) - { - AnsiConsole.MarkupLine("[red]Environment does not support interaction.[/]"); - return; - } - - var options = AskOptions(); - var cmakePath = AskCMakePath(); - if(string.IsNullOrEmpty(cmakePath) == true) - { - cmakePath = "C:\\Program Files\\CMake\\bin\\cmake.exe"; - } - AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", cmakePath); - - string cmakeListsPath = @"..\..\..\..\CMakeLists.txt"; - - //cmake [] -B [-S ] - //TODO: get the chosen arguments from above (hardcoded values below) - //TODO: edit the CMakeList.txt.in template and create the CMakeLists.txt with the chosen options - cmakeListsPath += " -G \"Visual Studio 17 2022\" -A x64 -B ..\\..\\..\\..\\ -S ..\\..\\..\\..\\"; - - ProcessStartInfo startInfo = new ProcessStartInfo - { - FileName = cmakePath, - Arguments = cmakeListsPath, - RedirectStandardOutput = true, - RedirectStandardError = true, - UseShellExecute = false, - CreateNoWindow = true, - }; - - try - { - bool bSuccess = false; - string lastError = ""; - AnsiConsole.Progress() - .AutoClear(false) - .Columns(new ProgressColumn[] - { - new TaskDescriptionColumn(), - new SpinnerColumn(Spinner.Known.Ascii), - }) - .Start(ctx => - { - var cmakeTask = ctx.AddTask("Generating VS Solution", autoStart: false).IsIndeterminate(); - cmakeTask.StartTask(); - using (Process process = new Process()) - { - process.StartInfo = startInfo; - process.Start(); - string output = process.StandardOutput.ReadToEnd(); - lastError = process.StandardError.ReadToEnd(); - process.WaitForExit(); - cmakeTask.StopTask(); - if (process.ExitCode == 0) - { - bSuccess = true; - } - } - }); - - if (bSuccess == true) - { - AnsiConsole.WriteLine("VS solution generated successfully."); - } - else - { - AnsiConsole.WriteLine($"Error running CMake configuration: {lastError}"); - } - } - catch (Exception ex) - { - AnsiConsole.WriteLine("[red]ERROR[/] " + ex.Message); - } - - Console.ReadLine(); - } - - public static string AskCMakePath() - { - return AnsiConsole.Prompt( - new TextPrompt("What's your [green]CMake path[/] (default: C:\\Program Files\\CMake\\bin\\cmake.exe)?") - .AllowEmpty()); - } - - public static List AskOptions() - { - var options = AnsiConsole.Prompt( - new MultiSelectionPrompt() - .PageSize(10) - .Title("Select the preferred [green]options[/]?") - .MoreChoicesText("[grey](Move up and down to reveal more options)[/]") - .InstructionsText("[grey](Press [blue][/] to toggle an option, [green][/] to accept)[/]") - .AddChoiceGroup("Avx", new[] - { - "Avx2", "Avx512" - }) - .AddChoiceGroup("Cuda", new[] - { - "Cuda" - }) - .AddChoices(new[] - { - "x64", - }) - .AddChoiceGroup("Visual Studio", new[] - { - "Visual Studio 16 2019", - "Visual Studio 17 2022" - }) - ); - - if (options.Count > 0) - { - AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", string.Join(",",options)); - } - - return options; - } - } -} From 5a196ec6f9a3fac03b0b57bdb97f1c5deb98d3d8 Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Wed, 24 Apr 2024 07:56:58 +0200 Subject: [PATCH 06/14] Reapply "Automatic Solution Generator - Work in progress" This reverts commit ad2c81d9574df2be5f6213ee2136a1b19afd1236. --- CMakeLists.txt | 126 ++++++++++++++++ LLama.GenerateSolution/CMakeLists.txt.in | 126 ++++++++++++++++ .../GenerateSolution.csproj | 14 ++ LLama.GenerateSolution/GenerateSolution.sln | 25 ++++ LLama.GenerateSolution/Program.cs | 137 ++++++++++++++++++ 5 files changed, 428 insertions(+) create mode 100644 CMakeLists.txt create mode 100644 LLama.GenerateSolution/CMakeLists.txt.in create mode 100644 LLama.GenerateSolution/GenerateSolution.csproj create mode 100644 LLama.GenerateSolution/GenerateSolution.sln create mode 100644 LLama.GenerateSolution/Program.cs diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..b84dc1de --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,126 @@ +#---------------------------------------------------------------------------------------- +# INFO: +# - How to use: change the flags in the 'Set one of these ON and all others OFF' section +# - CUDA: it will use automatically the CUDA SDK version installed +# +#---------------------------------------------------------------------------------------- +cmake_minimum_required(VERSION 3.8) +project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp) +if(NOT MSVC) + message(FATAL_ERROR "This CMake file only works with MSVC.") +endif(NOT MSVC) + +#--------- Set one of these ON and all others OFF -------------------> +option(LLAMA_CUDA_AVX2 "CUDA + AVX2" ON) +option(LLAMA_AVX2 "AVX2 (no CUDA)" OFF) +option(LLAMA_CUDA "CUDA (no AVX)" OFF) +#etc... add other setups +#<--------- Set one of these ON and all others OFF ------------------- + +# --------------- Don't change below this line ----------------------- + +# Variable Settings +if(LLAMA_CUDA_AVX2) + option(LLAMA_AVX "llama: enable AVX" ON) + option(LLAMA_AVX2 "llama: enable AVX2" ON) + option(LLAMA_CUBLAS "llama: use CUDA" ON) +elseif(LLAMA_AVX2) + option(LLAMA_AVX "llama: enable AVX" ON) + option(LLAMA_AVX2 "llama: enable AVX2" ON) + option(LLAMA_CUBLAS "llama: use CUDA" OFF) +elseif(LLAMA_CUDA) + option(LLAMA_AVX "llama: enable AVX" OFF) + option(LLAMA_AVX2 "llama: enable AVX2" OFF) + option(LLAMA_CUBLAS "llama: use CUDA" ON) +elseif(OTHER_SETUPS) + #etc... +endif() + +# Fixed Settings +# general +option(BUILD_SHARED_LIBS "build shared libraries" ON) +option(LLAMA_STATIC "llama: static link libraries" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) +option(LLAMA_LTO "llama: enable link time optimization" OFF) +option(LLAMA_CCACHE "llama: use ccache if available" ON) + +# debug +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +option(LLAMA_GPROF "llama: enable gprof" OFF) + +# build +option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) + +# sanitizers +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) + +option(LLAMA_AVX512 "llama: enable AVX512" OFF) +option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) +option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) +option(LLAMA_FMA "llama: enable FMA" OFF) +# in MSVC F16C is implied with AVX2/AVX512 +if (NOT MSVC) + option(LLAMA_F16C "llama: enable F16C" OFF) +endif() + +if (WIN32) + set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version") +endif() + +# 3rd party libs +option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) +option(LLAMA_BLAS "llama: use BLAS" OFF) +set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") +#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF) +option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) +option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF) +set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") +set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") +option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) +set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") +set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING + "llama: max. batch size for using peer access") +option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) +option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) +option(LLAMA_CLBLAST "llama: use CLBlast" OFF) +option(LLAMA_VULKAN "llama: use Vulkan" OFF) +option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF) +option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF) +option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF) +option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF) +option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) +option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) +option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) +option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF) +option(LLAMA_KOMPUTE "llama: use Kompute" OFF) +option(LLAMA_MPI "llama: use MPI" OFF) +option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) +option(LLAMA_SYCL "llama: use SYCL" OFF) +option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) +option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) + +option(LLAMA_BUILD_TESTS "llama: build tests" OFF) +option(LLAMA_BUILD_EXAMPLES "llama: build examples" ON) +option(LLAMA_BUILD_SERVER "llama: build server example" OFF) + +# add perf arguments +option(LLAMA_PERF "llama: enable perf" OFF) + +include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E) + +include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE) + +include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D) + +include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF) + +include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1) + +include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587) + +include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52) + +add_subdirectory(./llama.cpp) \ No newline at end of file diff --git a/LLama.GenerateSolution/CMakeLists.txt.in b/LLama.GenerateSolution/CMakeLists.txt.in new file mode 100644 index 00000000..b84dc1de --- /dev/null +++ b/LLama.GenerateSolution/CMakeLists.txt.in @@ -0,0 +1,126 @@ +#---------------------------------------------------------------------------------------- +# INFO: +# - How to use: change the flags in the 'Set one of these ON and all others OFF' section +# - CUDA: it will use automatically the CUDA SDK version installed +# +#---------------------------------------------------------------------------------------- +cmake_minimum_required(VERSION 3.8) +project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp) +if(NOT MSVC) + message(FATAL_ERROR "This CMake file only works with MSVC.") +endif(NOT MSVC) + +#--------- Set one of these ON and all others OFF -------------------> +option(LLAMA_CUDA_AVX2 "CUDA + AVX2" ON) +option(LLAMA_AVX2 "AVX2 (no CUDA)" OFF) +option(LLAMA_CUDA "CUDA (no AVX)" OFF) +#etc... add other setups +#<--------- Set one of these ON and all others OFF ------------------- + +# --------------- Don't change below this line ----------------------- + +# Variable Settings +if(LLAMA_CUDA_AVX2) + option(LLAMA_AVX "llama: enable AVX" ON) + option(LLAMA_AVX2 "llama: enable AVX2" ON) + option(LLAMA_CUBLAS "llama: use CUDA" ON) +elseif(LLAMA_AVX2) + option(LLAMA_AVX "llama: enable AVX" ON) + option(LLAMA_AVX2 "llama: enable AVX2" ON) + option(LLAMA_CUBLAS "llama: use CUDA" OFF) +elseif(LLAMA_CUDA) + option(LLAMA_AVX "llama: enable AVX" OFF) + option(LLAMA_AVX2 "llama: enable AVX2" OFF) + option(LLAMA_CUBLAS "llama: use CUDA" ON) +elseif(OTHER_SETUPS) + #etc... +endif() + +# Fixed Settings +# general +option(BUILD_SHARED_LIBS "build shared libraries" ON) +option(LLAMA_STATIC "llama: static link libraries" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) +option(LLAMA_LTO "llama: enable link time optimization" OFF) +option(LLAMA_CCACHE "llama: use ccache if available" ON) + +# debug +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +option(LLAMA_GPROF "llama: enable gprof" OFF) + +# build +option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) + +# sanitizers +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) + +option(LLAMA_AVX512 "llama: enable AVX512" OFF) +option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) +option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) +option(LLAMA_FMA "llama: enable FMA" OFF) +# in MSVC F16C is implied with AVX2/AVX512 +if (NOT MSVC) + option(LLAMA_F16C "llama: enable F16C" OFF) +endif() + +if (WIN32) + set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version") +endif() + +# 3rd party libs +option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) +option(LLAMA_BLAS "llama: use BLAS" OFF) +set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") +#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF) +option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) +option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF) +set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") +set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") +option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) +set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") +set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING + "llama: max. batch size for using peer access") +option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) +option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) +option(LLAMA_CLBLAST "llama: use CLBlast" OFF) +option(LLAMA_VULKAN "llama: use Vulkan" OFF) +option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF) +option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF) +option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF) +option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF) +option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) +option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) +option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) +option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF) +option(LLAMA_KOMPUTE "llama: use Kompute" OFF) +option(LLAMA_MPI "llama: use MPI" OFF) +option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) +option(LLAMA_SYCL "llama: use SYCL" OFF) +option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) +option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) + +option(LLAMA_BUILD_TESTS "llama: build tests" OFF) +option(LLAMA_BUILD_EXAMPLES "llama: build examples" ON) +option(LLAMA_BUILD_SERVER "llama: build server example" OFF) + +# add perf arguments +option(LLAMA_PERF "llama: enable perf" OFF) + +include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E) + +include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE) + +include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D) + +include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF) + +include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1) + +include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587) + +include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52) + +add_subdirectory(./llama.cpp) \ No newline at end of file diff --git a/LLama.GenerateSolution/GenerateSolution.csproj b/LLama.GenerateSolution/GenerateSolution.csproj new file mode 100644 index 00000000..f28f91ba --- /dev/null +++ b/LLama.GenerateSolution/GenerateSolution.csproj @@ -0,0 +1,14 @@ + + + + Exe + net7.0 + enable + enable + + + + + + + diff --git a/LLama.GenerateSolution/GenerateSolution.sln b/LLama.GenerateSolution/GenerateSolution.sln new file mode 100644 index 00000000..74c9e8e1 --- /dev/null +++ b/LLama.GenerateSolution/GenerateSolution.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.8.34525.116 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GenerateSolution", "GenerateSolution.csproj", "{89306FE9-4428-4C70-AF58-0AF871BED56B}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {32128714-30D0-4015-9184-24F609AAE564} + EndGlobalSection +EndGlobal diff --git a/LLama.GenerateSolution/Program.cs b/LLama.GenerateSolution/Program.cs new file mode 100644 index 00000000..ebf082b1 --- /dev/null +++ b/LLama.GenerateSolution/Program.cs @@ -0,0 +1,137 @@ +using Spectre.Console; +using System; +using System.Diagnostics; +using System.Text; +using static System.Runtime.InteropServices.JavaScript.JSType; + +namespace GenerateSolution +{ + internal class Program + { + static void Main(string[] args) + { + System.Console.InputEncoding = Encoding.Unicode; + System.Console.OutputEncoding = Encoding.Unicode; + + // Check if we can accept key strokes + if (!AnsiConsole.Profile.Capabilities.Interactive) + { + AnsiConsole.MarkupLine("[red]Environment does not support interaction.[/]"); + return; + } + + var options = AskOptions(); + var cmakePath = AskCMakePath(); + if(string.IsNullOrEmpty(cmakePath) == true) + { + cmakePath = "C:\\Program Files\\CMake\\bin\\cmake.exe"; + } + AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", cmakePath); + + string cmakeListsPath = @"..\..\..\..\CMakeLists.txt"; + + //cmake [] -B [-S ] + //TODO: get the chosen arguments from above (hardcoded values below) + //TODO: edit the CMakeList.txt.in template and create the CMakeLists.txt with the chosen options + cmakeListsPath += " -G \"Visual Studio 17 2022\" -A x64 -B ..\\..\\..\\..\\ -S ..\\..\\..\\..\\"; + + ProcessStartInfo startInfo = new ProcessStartInfo + { + FileName = cmakePath, + Arguments = cmakeListsPath, + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true, + }; + + try + { + bool bSuccess = false; + string lastError = ""; + AnsiConsole.Progress() + .AutoClear(false) + .Columns(new ProgressColumn[] + { + new TaskDescriptionColumn(), + new SpinnerColumn(Spinner.Known.Ascii), + }) + .Start(ctx => + { + var cmakeTask = ctx.AddTask("Generating VS Solution", autoStart: false).IsIndeterminate(); + cmakeTask.StartTask(); + using (Process process = new Process()) + { + process.StartInfo = startInfo; + process.Start(); + string output = process.StandardOutput.ReadToEnd(); + lastError = process.StandardError.ReadToEnd(); + process.WaitForExit(); + cmakeTask.StopTask(); + if (process.ExitCode == 0) + { + bSuccess = true; + } + } + }); + + if (bSuccess == true) + { + AnsiConsole.WriteLine("VS solution generated successfully."); + } + else + { + AnsiConsole.WriteLine($"Error running CMake configuration: {lastError}"); + } + } + catch (Exception ex) + { + AnsiConsole.WriteLine("[red]ERROR[/] " + ex.Message); + } + + Console.ReadLine(); + } + + public static string AskCMakePath() + { + return AnsiConsole.Prompt( + new TextPrompt("What's your [green]CMake path[/] (default: C:\\Program Files\\CMake\\bin\\cmake.exe)?") + .AllowEmpty()); + } + + public static List AskOptions() + { + var options = AnsiConsole.Prompt( + new MultiSelectionPrompt() + .PageSize(10) + .Title("Select the preferred [green]options[/]?") + .MoreChoicesText("[grey](Move up and down to reveal more options)[/]") + .InstructionsText("[grey](Press [blue][/] to toggle an option, [green][/] to accept)[/]") + .AddChoiceGroup("Avx", new[] + { + "Avx2", "Avx512" + }) + .AddChoiceGroup("Cuda", new[] + { + "Cuda" + }) + .AddChoices(new[] + { + "x64", + }) + .AddChoiceGroup("Visual Studio", new[] + { + "Visual Studio 16 2019", + "Visual Studio 17 2022" + }) + ); + + if (options.Count > 0) + { + AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", string.Join(",",options)); + } + + return options; + } + } +} From 5a4c0d4637dd164850bad40ac98bbe7f8bab6559 Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Wed, 24 Apr 2024 07:57:09 +0200 Subject: [PATCH 07/14] Revert "Automatic Solution Generator - Work in progress" This reverts commit b1f3987fae88fa85a9655c12869b9505f58c8d3e. --- CMakeLists.txt | 126 ---------------- LLama.GenerateSolution/CMakeLists.txt.in | 126 ---------------- .../GenerateSolution.csproj | 14 -- LLama.GenerateSolution/GenerateSolution.sln | 25 ---- LLama.GenerateSolution/Program.cs | 137 ------------------ 5 files changed, 428 deletions(-) delete mode 100644 CMakeLists.txt delete mode 100644 LLama.GenerateSolution/CMakeLists.txt.in delete mode 100644 LLama.GenerateSolution/GenerateSolution.csproj delete mode 100644 LLama.GenerateSolution/GenerateSolution.sln delete mode 100644 LLama.GenerateSolution/Program.cs diff --git a/CMakeLists.txt b/CMakeLists.txt deleted file mode 100644 index b84dc1de..00000000 --- a/CMakeLists.txt +++ /dev/null @@ -1,126 +0,0 @@ -#---------------------------------------------------------------------------------------- -# INFO: -# - How to use: change the flags in the 'Set one of these ON and all others OFF' section -# - CUDA: it will use automatically the CUDA SDK version installed -# -#---------------------------------------------------------------------------------------- -cmake_minimum_required(VERSION 3.8) -project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp) -if(NOT MSVC) - message(FATAL_ERROR "This CMake file only works with MSVC.") -endif(NOT MSVC) - -#--------- Set one of these ON and all others OFF -------------------> -option(LLAMA_CUDA_AVX2 "CUDA + AVX2" ON) -option(LLAMA_AVX2 "AVX2 (no CUDA)" OFF) -option(LLAMA_CUDA "CUDA (no AVX)" OFF) -#etc... add other setups -#<--------- Set one of these ON and all others OFF ------------------- - -# --------------- Don't change below this line ----------------------- - -# Variable Settings -if(LLAMA_CUDA_AVX2) - option(LLAMA_AVX "llama: enable AVX" ON) - option(LLAMA_AVX2 "llama: enable AVX2" ON) - option(LLAMA_CUBLAS "llama: use CUDA" ON) -elseif(LLAMA_AVX2) - option(LLAMA_AVX "llama: enable AVX" ON) - option(LLAMA_AVX2 "llama: enable AVX2" ON) - option(LLAMA_CUBLAS "llama: use CUDA" OFF) -elseif(LLAMA_CUDA) - option(LLAMA_AVX "llama: enable AVX" OFF) - option(LLAMA_AVX2 "llama: enable AVX2" OFF) - option(LLAMA_CUBLAS "llama: use CUDA" ON) -elseif(OTHER_SETUPS) - #etc... -endif() - -# Fixed Settings -# general -option(BUILD_SHARED_LIBS "build shared libraries" ON) -option(LLAMA_STATIC "llama: static link libraries" OFF) -option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) -option(LLAMA_LTO "llama: enable link time optimization" OFF) -option(LLAMA_CCACHE "llama: use ccache if available" ON) - -# debug -option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) -option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) -option(LLAMA_GPROF "llama: enable gprof" OFF) - -# build -option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) - -# sanitizers -option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) -option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) -option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) - -option(LLAMA_AVX512 "llama: enable AVX512" OFF) -option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) -option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) -option(LLAMA_FMA "llama: enable FMA" OFF) -# in MSVC F16C is implied with AVX2/AVX512 -if (NOT MSVC) - option(LLAMA_F16C "llama: enable F16C" OFF) -endif() - -if (WIN32) - set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version") -endif() - -# 3rd party libs -option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) -option(LLAMA_BLAS "llama: use BLAS" OFF) -set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") -#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF) -option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) -option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF) -set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") -set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") -option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) -set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") -set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING - "llama: max. batch size for using peer access") -option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) -option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) -option(LLAMA_CLBLAST "llama: use CLBlast" OFF) -option(LLAMA_VULKAN "llama: use Vulkan" OFF) -option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF) -option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF) -option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF) -option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF) -option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) -option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) -option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) -option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF) -option(LLAMA_KOMPUTE "llama: use Kompute" OFF) -option(LLAMA_MPI "llama: use MPI" OFF) -option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) -option(LLAMA_SYCL "llama: use SYCL" OFF) -option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) -option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) - -option(LLAMA_BUILD_TESTS "llama: build tests" OFF) -option(LLAMA_BUILD_EXAMPLES "llama: build examples" ON) -option(LLAMA_BUILD_SERVER "llama: build server example" OFF) - -# add perf arguments -option(LLAMA_PERF "llama: enable perf" OFF) - -include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E) - -include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE) - -include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D) - -include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF) - -include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1) - -include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587) - -include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52) - -add_subdirectory(./llama.cpp) \ No newline at end of file diff --git a/LLama.GenerateSolution/CMakeLists.txt.in b/LLama.GenerateSolution/CMakeLists.txt.in deleted file mode 100644 index b84dc1de..00000000 --- a/LLama.GenerateSolution/CMakeLists.txt.in +++ /dev/null @@ -1,126 +0,0 @@ -#---------------------------------------------------------------------------------------- -# INFO: -# - How to use: change the flags in the 'Set one of these ON and all others OFF' section -# - CUDA: it will use automatically the CUDA SDK version installed -# -#---------------------------------------------------------------------------------------- -cmake_minimum_required(VERSION 3.8) -project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp) -if(NOT MSVC) - message(FATAL_ERROR "This CMake file only works with MSVC.") -endif(NOT MSVC) - -#--------- Set one of these ON and all others OFF -------------------> -option(LLAMA_CUDA_AVX2 "CUDA + AVX2" ON) -option(LLAMA_AVX2 "AVX2 (no CUDA)" OFF) -option(LLAMA_CUDA "CUDA (no AVX)" OFF) -#etc... add other setups -#<--------- Set one of these ON and all others OFF ------------------- - -# --------------- Don't change below this line ----------------------- - -# Variable Settings -if(LLAMA_CUDA_AVX2) - option(LLAMA_AVX "llama: enable AVX" ON) - option(LLAMA_AVX2 "llama: enable AVX2" ON) - option(LLAMA_CUBLAS "llama: use CUDA" ON) -elseif(LLAMA_AVX2) - option(LLAMA_AVX "llama: enable AVX" ON) - option(LLAMA_AVX2 "llama: enable AVX2" ON) - option(LLAMA_CUBLAS "llama: use CUDA" OFF) -elseif(LLAMA_CUDA) - option(LLAMA_AVX "llama: enable AVX" OFF) - option(LLAMA_AVX2 "llama: enable AVX2" OFF) - option(LLAMA_CUBLAS "llama: use CUDA" ON) -elseif(OTHER_SETUPS) - #etc... -endif() - -# Fixed Settings -# general -option(BUILD_SHARED_LIBS "build shared libraries" ON) -option(LLAMA_STATIC "llama: static link libraries" OFF) -option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) -option(LLAMA_LTO "llama: enable link time optimization" OFF) -option(LLAMA_CCACHE "llama: use ccache if available" ON) - -# debug -option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) -option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) -option(LLAMA_GPROF "llama: enable gprof" OFF) - -# build -option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) - -# sanitizers -option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) -option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) -option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) - -option(LLAMA_AVX512 "llama: enable AVX512" OFF) -option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) -option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) -option(LLAMA_FMA "llama: enable FMA" OFF) -# in MSVC F16C is implied with AVX2/AVX512 -if (NOT MSVC) - option(LLAMA_F16C "llama: enable F16C" OFF) -endif() - -if (WIN32) - set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version") -endif() - -# 3rd party libs -option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) -option(LLAMA_BLAS "llama: use BLAS" OFF) -set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") -#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF) -option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) -option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF) -set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") -set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") -option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) -set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") -set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING - "llama: max. batch size for using peer access") -option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) -option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) -option(LLAMA_CLBLAST "llama: use CLBlast" OFF) -option(LLAMA_VULKAN "llama: use Vulkan" OFF) -option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF) -option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF) -option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF) -option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF) -option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) -option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) -option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) -option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF) -option(LLAMA_KOMPUTE "llama: use Kompute" OFF) -option(LLAMA_MPI "llama: use MPI" OFF) -option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) -option(LLAMA_SYCL "llama: use SYCL" OFF) -option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) -option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) - -option(LLAMA_BUILD_TESTS "llama: build tests" OFF) -option(LLAMA_BUILD_EXAMPLES "llama: build examples" ON) -option(LLAMA_BUILD_SERVER "llama: build server example" OFF) - -# add perf arguments -option(LLAMA_PERF "llama: enable perf" OFF) - -include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E) - -include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE) - -include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D) - -include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF) - -include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1) - -include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587) - -include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52) - -add_subdirectory(./llama.cpp) \ No newline at end of file diff --git a/LLama.GenerateSolution/GenerateSolution.csproj b/LLama.GenerateSolution/GenerateSolution.csproj deleted file mode 100644 index f28f91ba..00000000 --- a/LLama.GenerateSolution/GenerateSolution.csproj +++ /dev/null @@ -1,14 +0,0 @@ - - - - Exe - net7.0 - enable - enable - - - - - - - diff --git a/LLama.GenerateSolution/GenerateSolution.sln b/LLama.GenerateSolution/GenerateSolution.sln deleted file mode 100644 index 74c9e8e1..00000000 --- a/LLama.GenerateSolution/GenerateSolution.sln +++ /dev/null @@ -1,25 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.8.34525.116 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GenerateSolution", "GenerateSolution.csproj", "{89306FE9-4428-4C70-AF58-0AF871BED56B}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Any CPU = Debug|Any CPU - Release|Any CPU = Release|Any CPU - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.Build.0 = Debug|Any CPU - {89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.ActiveCfg = Release|Any CPU - {89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.Build.0 = Release|Any CPU - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {32128714-30D0-4015-9184-24F609AAE564} - EndGlobalSection -EndGlobal diff --git a/LLama.GenerateSolution/Program.cs b/LLama.GenerateSolution/Program.cs deleted file mode 100644 index ebf082b1..00000000 --- a/LLama.GenerateSolution/Program.cs +++ /dev/null @@ -1,137 +0,0 @@ -using Spectre.Console; -using System; -using System.Diagnostics; -using System.Text; -using static System.Runtime.InteropServices.JavaScript.JSType; - -namespace GenerateSolution -{ - internal class Program - { - static void Main(string[] args) - { - System.Console.InputEncoding = Encoding.Unicode; - System.Console.OutputEncoding = Encoding.Unicode; - - // Check if we can accept key strokes - if (!AnsiConsole.Profile.Capabilities.Interactive) - { - AnsiConsole.MarkupLine("[red]Environment does not support interaction.[/]"); - return; - } - - var options = AskOptions(); - var cmakePath = AskCMakePath(); - if(string.IsNullOrEmpty(cmakePath) == true) - { - cmakePath = "C:\\Program Files\\CMake\\bin\\cmake.exe"; - } - AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", cmakePath); - - string cmakeListsPath = @"..\..\..\..\CMakeLists.txt"; - - //cmake [] -B [-S ] - //TODO: get the chosen arguments from above (hardcoded values below) - //TODO: edit the CMakeList.txt.in template and create the CMakeLists.txt with the chosen options - cmakeListsPath += " -G \"Visual Studio 17 2022\" -A x64 -B ..\\..\\..\\..\\ -S ..\\..\\..\\..\\"; - - ProcessStartInfo startInfo = new ProcessStartInfo - { - FileName = cmakePath, - Arguments = cmakeListsPath, - RedirectStandardOutput = true, - RedirectStandardError = true, - UseShellExecute = false, - CreateNoWindow = true, - }; - - try - { - bool bSuccess = false; - string lastError = ""; - AnsiConsole.Progress() - .AutoClear(false) - .Columns(new ProgressColumn[] - { - new TaskDescriptionColumn(), - new SpinnerColumn(Spinner.Known.Ascii), - }) - .Start(ctx => - { - var cmakeTask = ctx.AddTask("Generating VS Solution", autoStart: false).IsIndeterminate(); - cmakeTask.StartTask(); - using (Process process = new Process()) - { - process.StartInfo = startInfo; - process.Start(); - string output = process.StandardOutput.ReadToEnd(); - lastError = process.StandardError.ReadToEnd(); - process.WaitForExit(); - cmakeTask.StopTask(); - if (process.ExitCode == 0) - { - bSuccess = true; - } - } - }); - - if (bSuccess == true) - { - AnsiConsole.WriteLine("VS solution generated successfully."); - } - else - { - AnsiConsole.WriteLine($"Error running CMake configuration: {lastError}"); - } - } - catch (Exception ex) - { - AnsiConsole.WriteLine("[red]ERROR[/] " + ex.Message); - } - - Console.ReadLine(); - } - - public static string AskCMakePath() - { - return AnsiConsole.Prompt( - new TextPrompt("What's your [green]CMake path[/] (default: C:\\Program Files\\CMake\\bin\\cmake.exe)?") - .AllowEmpty()); - } - - public static List AskOptions() - { - var options = AnsiConsole.Prompt( - new MultiSelectionPrompt() - .PageSize(10) - .Title("Select the preferred [green]options[/]?") - .MoreChoicesText("[grey](Move up and down to reveal more options)[/]") - .InstructionsText("[grey](Press [blue][/] to toggle an option, [green][/] to accept)[/]") - .AddChoiceGroup("Avx", new[] - { - "Avx2", "Avx512" - }) - .AddChoiceGroup("Cuda", new[] - { - "Cuda" - }) - .AddChoices(new[] - { - "x64", - }) - .AddChoiceGroup("Visual Studio", new[] - { - "Visual Studio 16 2019", - "Visual Studio 17 2022" - }) - ); - - if (options.Count > 0) - { - AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", string.Join(",",options)); - } - - return options; - } - } -} From 8ea82bcc2855abaca3fc7a0e7c8cb7bb152585a1 Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Wed, 24 Apr 2024 07:57:12 +0200 Subject: [PATCH 08/14] Revert "Embeddings correction" This reverts commit 3ded2dd74d2200522787a1ac2f0484b3251182c4. --- LLama/LLamaEmbedder.cs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs index c29b6b25..f60f3cd5 100644 --- a/LLama/LLamaEmbedder.cs +++ b/LLama/LLamaEmbedder.cs @@ -97,15 +97,18 @@ namespace LLama private float[] GetEmbeddingsArray() { - var embeddings = NativeApi.llama_get_embeddings(Context.NativeHandle); - if (embeddings == null || embeddings.Length == 0) + unsafe { - embeddings = NativeApi.llama_get_embeddings_seq(Context.NativeHandle, LLamaSeqId.Zero); - if (embeddings == null || embeddings.Length == 0) + var embeddings = NativeApi.llama_get_embeddings(Context.NativeHandle); + + if (embeddings == null) + embeddings = NativeApi.llama_get_embeddings_seq(Context.NativeHandle, LLamaSeqId.Zero); + + if (embeddings == null) return Array.Empty(); - } - return embeddings.ToArray(); + return new Span(embeddings, Context.EmbeddingSize).ToArray(); + } } private static void Normalize(Span embeddings) @@ -116,6 +119,7 @@ namespace LLama lengthSqr += value * value; var length = (float)Math.Sqrt(lengthSqr); + // Do not divide by length if it is zero if (length <= float.Epsilon) return; From 6bd269da60cc3bbb56d6d2e0a1a1b1eadbaf3b91 Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Wed, 24 Apr 2024 07:57:15 +0200 Subject: [PATCH 09/14] Revert "Simplifying image handling" This reverts commit f264024666d6ff0557215f9b17d19a4ca8b3056c. --- .../Examples/LlavaInteractiveModeExecute.cs | 2 +- LLama/Abstractions/ILLamaExecutor.cs | 44 ++++++++++++++++++- LLama/LLamaStatelessExecutor.cs | 4 +- 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs index 507f041b..8cfa7376 100644 --- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs +++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs @@ -102,7 +102,7 @@ namespace LLama.Examples.Examples // foreach (var image in imagePaths) { - ex.Images.Add(File.ReadAllBytes(image)); + ex.Images.Add(new ImageData(ImageData.DataType.ImagePath, image)); } } diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs index d6c8d2ce..977cbc5e 100644 --- a/LLama/Abstractions/ILLamaExecutor.cs +++ b/LLama/Abstractions/ILLamaExecutor.cs @@ -27,7 +27,7 @@ namespace LLama.Abstractions /// /// List of images: Image filen path, uri or image byte array. See ImageData. /// - public List Images { get; } + public List Images { get; } /// /// Asynchronously infers a response from the model. @@ -38,4 +38,46 @@ namespace LLama.Abstractions /// IAsyncEnumerable InferAsync(string text, IInferenceParams? inferenceParams = null, CancellationToken token = default); } + + /// + /// Holds image data + /// + public class ImageData + { + /// + /// constructor + /// + /// + /// + public ImageData(DataType type, object data) { Type = type; Data = data; } + + /// + /// the possible types of image data + /// + public enum DataType + { + /// + /// file path + /// + ImagePath, + /// + /// byte array + /// + ImageBytes, + /// + /// uri + /// + ImageURL + } + + /// + /// the type of this image data + /// + public DataType Type { get; set; } + + /// + /// the image data (string, byte array or uri) + /// + public object? Data { get; set; } + } } diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs index f9d6ca5b..9d2f8c78 100644 --- a/LLama/LLamaStatelessExecutor.cs +++ b/LLama/LLamaStatelessExecutor.cs @@ -34,7 +34,7 @@ namespace LLama public LLavaWeights? ClipModel { get; } /// - public List Images { get; set; } + public List Images { get; set; } /// /// The context used by the executor when running the inference. @@ -49,7 +49,7 @@ namespace LLama /// public StatelessExecutor(LLamaWeights weights, IContextParams @params, ILogger? logger = null) { - Images = new List(); + Images = new List(); _weights = weights; _params = @params; _logger = logger; From 156d7bb4636646c3d595d697d70d0f2a6a61f3fa Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Wed, 24 Apr 2024 07:57:17 +0200 Subject: [PATCH 10/14] Revert "Standardizing Image Data implementation" This reverts commit b2423fe6e9fd468a3c435b3e714dd05bac4c54d9. --- .../Examples/LlavaInteractiveModeExecute.cs | 31 ++++++------- LLama/Abstractions/ILLamaExecutor.cs | 46 +------------------ LLama/LLamaStatelessExecutor.cs | 8 ++-- 3 files changed, 20 insertions(+), 65 deletions(-) diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs index 8cfa7376..112fe23f 100644 --- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs +++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs @@ -1,8 +1,7 @@ using System.Text.RegularExpressions; -using LLama.Batched; using LLama.Common; using Spectre.Console; -using LLama.Abstractions; +using LLama.Native; namespace LLama.Examples.Examples { @@ -19,12 +18,8 @@ namespace LLama.Examples.Examples var prompt = $"{{{modelImage}}}\nUSER:\nProvide a full description of the image.\nASSISTANT:\n"; - var parameters = new ModelParams(modelPath) - { - ContextSize = 4096, - Seed = 1337, - GpuLayerCount = 10 - }; + var parameters = new ModelParams(modelPath); + using var model = LLamaWeights.LoadFromFile(parameters); using var context = model.CreateContext(parameters); @@ -47,16 +42,16 @@ namespace LLama.Examples.Examples var imageMatches = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value); var imageCount = imageMatches.Count(); var hasImages = imageCount > 0; - byte[][] imageBytes = null; if (hasImages) { var imagePathsWithCurlyBraces = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value); - var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value); + var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value).ToList(); + List imageBytes; try { - imageBytes = imagePaths.Select(File.ReadAllBytes).ToArray(); + imageBytes = imagePaths.Select(File.ReadAllBytes).ToList(); } catch (IOException exception) { @@ -69,15 +64,17 @@ namespace LLama.Examples.Examples break; } + // Each prompt with images we clear cache + // When the prompt contains images we clear KV_CACHE to restart conversation + // See: + // https://github.com/ggerganov/llama.cpp/discussions/3620 + ex.Context.NativeHandle.KvCacheRemove( LLamaSeqId.Zero, -1, -1 ); int index = 0; foreach (var path in imagePathsWithCurlyBraces) { // First image replace to tag "); - else - prompt = prompt.Replace(path, ""); + prompt = prompt.Replace(path, index++ == 0 ? "" : ""); } @@ -102,7 +99,7 @@ namespace LLama.Examples.Examples // foreach (var image in imagePaths) { - ex.Images.Add(new ImageData(ImageData.DataType.ImagePath, image)); + ex.Images.Add(await File.ReadAllBytesAsync(image)); } } @@ -118,7 +115,7 @@ namespace LLama.Examples.Examples // let the user finish with exit // - if (prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase)) + if (prompt != null && prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase)) break; } diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs index 977cbc5e..574a27d8 100644 --- a/LLama/Abstractions/ILLamaExecutor.cs +++ b/LLama/Abstractions/ILLamaExecutor.cs @@ -25,9 +25,9 @@ namespace LLama.Abstractions public LLavaWeights? ClipModel { get; } /// - /// List of images: Image filen path, uri or image byte array. See ImageData. + /// List of images: List of images in byte array format. /// - public List Images { get; } + public List Images { get; } /// /// Asynchronously infers a response from the model. @@ -38,46 +38,4 @@ namespace LLama.Abstractions /// IAsyncEnumerable InferAsync(string text, IInferenceParams? inferenceParams = null, CancellationToken token = default); } - - /// - /// Holds image data - /// - public class ImageData - { - /// - /// constructor - /// - /// - /// - public ImageData(DataType type, object data) { Type = type; Data = data; } - - /// - /// the possible types of image data - /// - public enum DataType - { - /// - /// file path - /// - ImagePath, - /// - /// byte array - /// - ImageBytes, - /// - /// uri - /// - ImageURL - } - - /// - /// the type of this image data - /// - public DataType Type { get; set; } - - /// - /// the image data (string, byte array or uri) - /// - public object? Data { get; set; } - } } diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs index 9d2f8c78..a3c52a02 100644 --- a/LLama/LLamaStatelessExecutor.cs +++ b/LLama/LLamaStatelessExecutor.cs @@ -34,7 +34,7 @@ namespace LLama public LLavaWeights? ClipModel { get; } /// - public List Images { get; set; } + public List Images { get; set; } /// /// The context used by the executor when running the inference. @@ -49,7 +49,7 @@ namespace LLama /// public StatelessExecutor(LLamaWeights weights, IContextParams @params, ILogger? logger = null) { - Images = new List(); + Images = new List(); _weights = weights; _params = @params; _logger = logger; @@ -90,7 +90,7 @@ namespace LLama lastTokens.Add(0); // Tokenize the prompt - var tokens = Context.Tokenize(prompt).ToList(); + var tokens = Context.Tokenize(prompt, special: true).ToList(); lastTokens.AddRange(tokens); // Evaluate the prompt, in chunks smaller than the max batch size @@ -124,7 +124,7 @@ namespace LLama } // Check if this is the EOS token - if (id == _weights.EndOfSentenceToken) + if (id == _weights.Tokens.EOS) break; // Decode this token into text From ab8dd0dfc7604249b70cf73334245e953377949f Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Wed, 24 Apr 2024 08:06:40 +0200 Subject: [PATCH 11/14] Correcting non-standard way of working with PromptExecutionSettings The extension of PromptExecutionSettings is not only for ChatCompletion, but also for text completion and text embedding. --- .../Examples/SemanticKernelPrompt.cs | 4 +-- .../LLamaSharpChatCompletion.cs | 12 ++++---- .../ChatRequestSettings.cs | 30 +++++++++++++------ .../ChatRequestSettingsConverter.cs | 10 +++---- LLama.SemanticKernel/ExtensionMethods.cs | 7 ++--- .../LLamaSharpTextCompletion.cs | 5 ++-- .../ChatRequestSettingsConverterTests.cs | 15 +++++----- .../ChatRequestSettingsTests.cs | 16 +++++----- .../SemanticKernel/ExtensionMethodsTests.cs | 2 +- 9 files changed, 56 insertions(+), 45 deletions(-) rename LLama.SemanticKernel/{ChatCompletion => }/ChatRequestSettings.cs (76%) rename LLama.SemanticKernel/{ChatCompletion => }/ChatRequestSettingsConverter.cs (88%) diff --git a/LLama.Examples/Examples/SemanticKernelPrompt.cs b/LLama.Examples/Examples/SemanticKernelPrompt.cs index fdf58b3a..38002d3d 100644 --- a/LLama.Examples/Examples/SemanticKernelPrompt.cs +++ b/LLama.Examples/Examples/SemanticKernelPrompt.cs @@ -1,9 +1,9 @@ using LLama.Common; -using LLamaSharp.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel; using LLamaSharp.SemanticKernel.TextCompletion; using Microsoft.SemanticKernel.TextGeneration; using Microsoft.Extensions.DependencyInjection; +using LLamaSharp.SemanticKernel; namespace LLama.Examples.Examples { @@ -31,7 +31,7 @@ namespace LLama.Examples.Examples One line TLDR with the fewest words."; - ChatRequestSettings settings = new() { MaxTokens = 100 }; + LLamaSharpPromptExecutionSettings settings = new() { MaxTokens = 100 }; var summarize = kernel.CreateFunctionFromPrompt(prompt, settings); string text1 = @" diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs index 7bcbaf7b..26ecdccc 100644 --- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs +++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs @@ -17,7 +17,7 @@ namespace LLamaSharp.SemanticKernel.ChatCompletion; public sealed class LLamaSharpChatCompletion : IChatCompletionService { private readonly ILLamaExecutor _model; - private ChatRequestSettings defaultRequestSettings; + private LLamaSharpPromptExecutionSettings defaultRequestSettings; private readonly IHistoryTransform historyTransform; private readonly ITextStreamTransform outputTransform; @@ -25,9 +25,9 @@ public sealed class LLamaSharpChatCompletion : IChatCompletionService public IReadOnlyDictionary Attributes => this._attributes; - static ChatRequestSettings GetDefaultSettings() + static LLamaSharpPromptExecutionSettings GetDefaultSettings() { - return new ChatRequestSettings + return new LLamaSharpPromptExecutionSettings { MaxTokens = 256, Temperature = 0, @@ -37,7 +37,7 @@ public sealed class LLamaSharpChatCompletion : IChatCompletionService } public LLamaSharpChatCompletion(ILLamaExecutor model, - ChatRequestSettings? defaultRequestSettings = default, + LLamaSharpPromptExecutionSettings? defaultRequestSettings = default, IHistoryTransform? historyTransform = null, ITextStreamTransform? outputTransform = null) { @@ -65,7 +65,7 @@ public sealed class LLamaSharpChatCompletion : IChatCompletionService public async Task> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) { var settings = executionSettings != null - ? ChatRequestSettings.FromRequestSettings(executionSettings) + ? LLamaSharpPromptExecutionSettings.FromRequestSettings(executionSettings) : defaultRequestSettings; var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory()); @@ -86,7 +86,7 @@ public sealed class LLamaSharpChatCompletion : IChatCompletionService public async IAsyncEnumerable GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { var settings = executionSettings != null - ? ChatRequestSettings.FromRequestSettings(executionSettings) + ? LLamaSharpPromptExecutionSettings.FromRequestSettings(executionSettings) : defaultRequestSettings; var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory()); diff --git a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs b/LLama.SemanticKernel/ChatRequestSettings.cs similarity index 76% rename from LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs rename to LLama.SemanticKernel/ChatRequestSettings.cs index ac22e1fc..87dda39e 100644 --- a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs +++ b/LLama.SemanticKernel/ChatRequestSettings.cs @@ -1,10 +1,22 @@ -using Microsoft.SemanticKernel; + +/* Unmerged change from project 'LLamaSharp.SemanticKernel (netstandard2.0)' +Before: +using Microsoft.SemanticKernel; +After: +using LLamaSharp; +using LLamaSharp.SemanticKernel; +using LLamaSharp.SemanticKernel; +using LLamaSharp.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel; +*/ +using LLamaSharp.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel; using System.Text.Json; using System.Text.Json.Serialization; -namespace LLamaSharp.SemanticKernel.ChatCompletion; +namespace LLamaSharp.SemanticKernel; -public class ChatRequestSettings : PromptExecutionSettings +public class LLamaSharpPromptExecutionSettings : PromptExecutionSettings { /// /// Temperature controls the randomness of the completion. @@ -68,30 +80,30 @@ public class ChatRequestSettings : PromptExecutionSettings /// Template configuration /// Default max tokens /// An instance of OpenAIRequestSettings - public static ChatRequestSettings FromRequestSettings(PromptExecutionSettings? requestSettings, int? defaultMaxTokens = null) + public static LLamaSharpPromptExecutionSettings FromRequestSettings(PromptExecutionSettings? requestSettings, int? defaultMaxTokens = null) { if (requestSettings is null) { - return new ChatRequestSettings() + return new LLamaSharpPromptExecutionSettings() { MaxTokens = defaultMaxTokens }; } - if (requestSettings is ChatRequestSettings requestSettingsChatRequestSettings) + if (requestSettings is LLamaSharpPromptExecutionSettings requestSettingsChatRequestSettings) { return requestSettingsChatRequestSettings; } var json = JsonSerializer.Serialize(requestSettings); - var chatRequestSettings = JsonSerializer.Deserialize(json, s_options); + var chatRequestSettings = JsonSerializer.Deserialize(json, s_options); if (chatRequestSettings is not null) { return chatRequestSettings; } - throw new ArgumentException($"Invalid request settings, cannot convert to {nameof(ChatRequestSettings)}", nameof(requestSettings)); + throw new ArgumentException($"Invalid request settings, cannot convert to {nameof(LLamaSharpPromptExecutionSettings)}", nameof(requestSettings)); } private static readonly JsonSerializerOptions s_options = CreateOptions(); @@ -105,7 +117,7 @@ public class ChatRequestSettings : PromptExecutionSettings AllowTrailingCommas = true, PropertyNameCaseInsensitive = true, ReadCommentHandling = JsonCommentHandling.Skip, - Converters = { new ChatRequestSettingsConverter() } + Converters = { new LLamaSharpPromptExecutionSettingsConverter() } }; return options; diff --git a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs b/LLama.SemanticKernel/ChatRequestSettingsConverter.cs similarity index 88% rename from LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs rename to LLama.SemanticKernel/ChatRequestSettingsConverter.cs index e320ea3f..36ca9c6c 100644 --- a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs +++ b/LLama.SemanticKernel/ChatRequestSettingsConverter.cs @@ -3,17 +3,17 @@ using System.Collections.Generic; using System.Text.Json; using System.Text.Json.Serialization; -namespace LLamaSharp.SemanticKernel.ChatCompletion; +namespace LLamaSharp.SemanticKernel; /// /// JSON converter for /// -public class ChatRequestSettingsConverter : JsonConverter +public class LLamaSharpPromptExecutionSettingsConverter : JsonConverter { /// - public override ChatRequestSettings? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + public override LLamaSharpPromptExecutionSettings? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) { - var requestSettings = new ChatRequestSettings(); + var requestSettings = new LLamaSharpPromptExecutionSettings(); while (reader.Read() && reader.TokenType != JsonTokenType.EndObject) { @@ -77,7 +77,7 @@ public class ChatRequestSettingsConverter : JsonConverter } /// - public override void Write(Utf8JsonWriter writer, ChatRequestSettings value, JsonSerializerOptions options) + public override void Write(Utf8JsonWriter writer, LLamaSharpPromptExecutionSettings value, JsonSerializerOptions options) { writer.WriteStartObject(); diff --git a/LLama.SemanticKernel/ExtensionMethods.cs b/LLama.SemanticKernel/ExtensionMethods.cs index 85f9064c..086999aa 100644 --- a/LLama.SemanticKernel/ExtensionMethods.cs +++ b/LLama.SemanticKernel/ExtensionMethods.cs @@ -1,5 +1,4 @@ -using LLamaSharp.SemanticKernel.ChatCompletion; -using Microsoft.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel.ChatCompletion; namespace LLamaSharp.SemanticKernel; public static class ExtensionMethods @@ -23,11 +22,11 @@ public static class ExtensionMethods } /// - /// Convert ChatRequestSettings to LLamaSharp InferenceParams + /// Convert LLamaSharpPromptExecutionSettings to LLamaSharp InferenceParams /// /// /// - internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this ChatRequestSettings requestSettings) + internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this LLamaSharpPromptExecutionSettings requestSettings) { if (requestSettings is null) { diff --git a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs index 08ec33e1..31e07b2b 100644 --- a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs +++ b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs @@ -1,5 +1,4 @@ using LLama.Abstractions; -using LLamaSharp.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.Services; using Microsoft.SemanticKernel.TextGeneration; @@ -24,7 +23,7 @@ public sealed class LLamaSharpTextCompletion : ITextGenerationService /// public async Task> GetTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) { - var settings = ChatRequestSettings.FromRequestSettings(executionSettings); + var settings = LLamaSharpPromptExecutionSettings.FromRequestSettings(executionSettings); var result = executor.InferAsync(prompt, settings?.ToLLamaSharpInferenceParams(), cancellationToken); var sb = new StringBuilder(); await foreach (var token in result) @@ -37,7 +36,7 @@ public sealed class LLamaSharpTextCompletion : ITextGenerationService /// public async IAsyncEnumerable GetStreamingTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { - var settings = ChatRequestSettings.FromRequestSettings(executionSettings); + var settings = LLamaSharpPromptExecutionSettings.FromRequestSettings(executionSettings); var result = executor.InferAsync(prompt, settings?.ToLLamaSharpInferenceParams(), cancellationToken); await foreach (var token in result) { diff --git a/LLama.Unittest/SemanticKernel/ChatRequestSettingsConverterTests.cs b/LLama.Unittest/SemanticKernel/ChatRequestSettingsConverterTests.cs index 4190e852..4828a407 100644 --- a/LLama.Unittest/SemanticKernel/ChatRequestSettingsConverterTests.cs +++ b/LLama.Unittest/SemanticKernel/ChatRequestSettingsConverterTests.cs @@ -1,4 +1,5 @@ -using LLamaSharp.SemanticKernel.ChatCompletion; +using LLamaSharp.SemanticKernel; +using LLamaSharp.SemanticKernel.ChatCompletion; using System.Text.Json; namespace LLama.Unittest.SemanticKernel @@ -10,11 +11,11 @@ namespace LLama.Unittest.SemanticKernel { // Arrange var options = new JsonSerializerOptions(); - options.Converters.Add(new ChatRequestSettingsConverter()); + options.Converters.Add(new LLamaSharpPromptExecutionSettingsConverter()); var json = "{}"; // Act - var requestSettings = JsonSerializer.Deserialize(json, options); + var requestSettings = JsonSerializer.Deserialize(json, options); // Assert Assert.NotNull(requestSettings); @@ -36,7 +37,7 @@ namespace LLama.Unittest.SemanticKernel // Arrange var options = new JsonSerializerOptions(); options.AllowTrailingCommas = true; - options.Converters.Add(new ChatRequestSettingsConverter()); + options.Converters.Add(new LLamaSharpPromptExecutionSettingsConverter()); var json = @"{ ""frequency_penalty"": 0.5, ""max_tokens"": 250, @@ -49,7 +50,7 @@ namespace LLama.Unittest.SemanticKernel }"; // Act - var requestSettings = JsonSerializer.Deserialize(json, options); + var requestSettings = JsonSerializer.Deserialize(json, options); // Assert Assert.NotNull(requestSettings); @@ -73,7 +74,7 @@ namespace LLama.Unittest.SemanticKernel // Arrange var options = new JsonSerializerOptions(); options.AllowTrailingCommas = true; - options.Converters.Add(new ChatRequestSettingsConverter()); + options.Converters.Add(new LLamaSharpPromptExecutionSettingsConverter()); var json = @"{ ""FrequencyPenalty"": 0.5, ""MaxTokens"": 250, @@ -86,7 +87,7 @@ namespace LLama.Unittest.SemanticKernel }"; // Act - var requestSettings = JsonSerializer.Deserialize(json, options); + var requestSettings = JsonSerializer.Deserialize(json, options); // Assert Assert.NotNull(requestSettings); diff --git a/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs b/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs index ef5d9670..d75a8d4b 100644 --- a/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs +++ b/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs @@ -1,4 +1,4 @@ -using LLamaSharp.SemanticKernel.ChatCompletion; +using LLamaSharp.SemanticKernel; using Microsoft.SemanticKernel; namespace LLama.Unittest.SemanticKernel @@ -10,7 +10,7 @@ namespace LLama.Unittest.SemanticKernel { // Arrange // Act - var requestSettings = ChatRequestSettings.FromRequestSettings(null, null); + var requestSettings = LLamaSharpPromptExecutionSettings.FromRequestSettings(null, null); // Assert Assert.NotNull(requestSettings); @@ -31,7 +31,7 @@ namespace LLama.Unittest.SemanticKernel { // Arrange // Act - var requestSettings = ChatRequestSettings.FromRequestSettings(null, 200); + var requestSettings = LLamaSharpPromptExecutionSettings.FromRequestSettings(null, 200); // Assert Assert.NotNull(requestSettings); @@ -51,7 +51,7 @@ namespace LLama.Unittest.SemanticKernel public void ChatRequestSettings_FromExistingRequestSettings() { // Arrange - var originalRequestSettings = new ChatRequestSettings() + var originalRequestSettings = new LLamaSharpPromptExecutionSettings() { FrequencyPenalty = 0.5, MaxTokens = 100, @@ -64,7 +64,7 @@ namespace LLama.Unittest.SemanticKernel }; // Act - var requestSettings = ChatRequestSettings.FromRequestSettings(originalRequestSettings); + var requestSettings = LLamaSharpPromptExecutionSettings.FromRequestSettings(originalRequestSettings); // Assert Assert.NotNull(requestSettings); @@ -81,7 +81,7 @@ namespace LLama.Unittest.SemanticKernel }; // Act - var requestSettings = ChatRequestSettings.FromRequestSettings(originalRequestSettings); + var requestSettings = LLamaSharpPromptExecutionSettings.FromRequestSettings(originalRequestSettings); // Assert Assert.NotNull(requestSettings); @@ -109,7 +109,7 @@ namespace LLama.Unittest.SemanticKernel }; // Act - var requestSettings = ChatRequestSettings.FromRequestSettings(originalRequestSettings); + var requestSettings = LLamaSharpPromptExecutionSettings.FromRequestSettings(originalRequestSettings); // Assert Assert.NotNull(requestSettings); @@ -148,7 +148,7 @@ namespace LLama.Unittest.SemanticKernel }; // Act - var requestSettings = ChatRequestSettings.FromRequestSettings(originalRequestSettings); + var requestSettings = LLamaSharpPromptExecutionSettings.FromRequestSettings(originalRequestSettings); // Assert Assert.NotNull(requestSettings); diff --git a/LLama.Unittest/SemanticKernel/ExtensionMethodsTests.cs b/LLama.Unittest/SemanticKernel/ExtensionMethodsTests.cs index dfcef182..574611fc 100644 --- a/LLama.Unittest/SemanticKernel/ExtensionMethodsTests.cs +++ b/LLama.Unittest/SemanticKernel/ExtensionMethodsTests.cs @@ -37,7 +37,7 @@ namespace LLamaSharp.SemanticKernel.Tests public void ToLLamaSharpInferenceParams_StateUnderTest_ExpectedBehavior() { // Arrange - var requestSettings = new ChatRequestSettings(); + var requestSettings = new LLamaSharpPromptExecutionSettings(); // Act var result = ExtensionMethods.ToLLamaSharpInferenceParams( From 59a0afdb778b77b5ed1930108e4d154f0c5ac9ac Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Wed, 24 Apr 2024 08:24:02 +0200 Subject: [PATCH 12/14] Renaming files to correspond to class names --- ...hatRequestSettings.cs => LLamaSharpPromptExecutionSettings.cs} | 0 ...Converter.cs => LLamaSharpPromptExecutionSettingsConverter.cs} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename LLama.SemanticKernel/{ChatRequestSettings.cs => LLamaSharpPromptExecutionSettings.cs} (100%) rename LLama.SemanticKernel/{ChatRequestSettingsConverter.cs => LLamaSharpPromptExecutionSettingsConverter.cs} (100%) diff --git a/LLama.SemanticKernel/ChatRequestSettings.cs b/LLama.SemanticKernel/LLamaSharpPromptExecutionSettings.cs similarity index 100% rename from LLama.SemanticKernel/ChatRequestSettings.cs rename to LLama.SemanticKernel/LLamaSharpPromptExecutionSettings.cs diff --git a/LLama.SemanticKernel/ChatRequestSettingsConverter.cs b/LLama.SemanticKernel/LLamaSharpPromptExecutionSettingsConverter.cs similarity index 100% rename from LLama.SemanticKernel/ChatRequestSettingsConverter.cs rename to LLama.SemanticKernel/LLamaSharpPromptExecutionSettingsConverter.cs From 2aa96b206f88f1a9a715e6d59e371d72c9d03e31 Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Sat, 27 Apr 2024 09:39:40 +0200 Subject: [PATCH 13/14] Adding Response Format - Correcting non-standard way of working with PromptExecutionSettings can be used downstream to post-process the messages based on the requested format --- LLama.SemanticKernel/LLamaSharpPromptExecutionSettings.cs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/LLama.SemanticKernel/LLamaSharpPromptExecutionSettings.cs b/LLama.SemanticKernel/LLamaSharpPromptExecutionSettings.cs index 87dda39e..5e8a6669 100644 --- a/LLama.SemanticKernel/LLamaSharpPromptExecutionSettings.cs +++ b/LLama.SemanticKernel/LLamaSharpPromptExecutionSettings.cs @@ -74,6 +74,12 @@ public class LLamaSharpPromptExecutionSettings : PromptExecutionSettings [JsonPropertyName("token_selection_biases")] public IDictionary TokenSelectionBiases { get; set; } = new Dictionary(); + /// + /// Indicates the format of the response which can be used downstream to post-process the messages. Handlebars: handlebars_object. JSON: json_object, etc. + /// + [JsonPropertyName("response_format")] + public string ResponseFormat { get; set; } = string.Empty; + /// /// Create a new settings object with the values from another settings object. /// From 54c01d4c2c295ba88e8beb70827cc8af323baaf2 Mon Sep 17 00:00:00 2001 From: Zoli Somogyi Date: Tue, 30 Apr 2024 19:28:31 +0200 Subject: [PATCH 14/14] Making old code obsolete - SemanticKernel: Correcting working with PromptExecutionSettings --- .../ChatCompletion/ChatRequestSettings.cs | 114 ++++++++++++++++++ .../ChatRequestSettingsConverter.cs | 105 ++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs create mode 100644 LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs diff --git a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs new file mode 100644 index 00000000..683f8c45 --- /dev/null +++ b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs @@ -0,0 +1,114 @@ +using Microsoft.SemanticKernel; +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace LLamaSharp.SemanticKernel.ChatCompletion; + +[Obsolete("Use LLamaSharpPromptExecutionSettings instead")] +public class ChatRequestSettings : PromptExecutionSettings +{ + /// + /// Temperature controls the randomness of the completion. + /// The higher the temperature, the more random the completion. + /// + [JsonPropertyName("temperature")] + public double Temperature { get; set; } = 0; + + /// + /// TopP controls the diversity of the completion. + /// The higher the TopP, the more diverse the completion. + /// + [JsonPropertyName("top_p")] + public double TopP { get; set; } = 0; + + /// + /// Number between -2.0 and 2.0. Positive values penalize new tokens + /// based on whether they appear in the text so far, increasing the + /// model's likelihood to talk about new topics. + /// + [JsonPropertyName("presence_penalty")] + public double PresencePenalty { get; set; } = 0; + + /// + /// Number between -2.0 and 2.0. Positive values penalize new tokens + /// based on their existing frequency in the text so far, decreasing + /// the model's likelihood to repeat the same line verbatim. + /// + [JsonPropertyName("frequency_penalty")] + public double FrequencyPenalty { get; set; } = 0; + + /// + /// Sequences where the completion will stop generating further tokens. + /// + [JsonPropertyName("stop_sequences")] + public IList StopSequences { get; set; } = Array.Empty(); + + /// + /// How many completions to generate for each prompt. Default is 1. + /// Note: Because this parameter generates many completions, it can quickly consume your token quota. + /// Use carefully and ensure that you have reasonable settings for max_tokens and stop. + /// + [JsonPropertyName("results_per_prompt")] + public int ResultsPerPrompt { get; set; } = 1; + + /// + /// The maximum number of tokens to generate in the completion. + /// + [JsonPropertyName("max_tokens")] + public int? MaxTokens { get; set; } + + /// + /// Modify the likelihood of specified tokens appearing in the completion. + /// + [JsonPropertyName("token_selection_biases")] + public IDictionary TokenSelectionBiases { get; set; } = new Dictionary(); + + /// + /// Create a new settings object with the values from another settings object. + /// + /// Template configuration + /// Default max tokens + /// An instance of OpenAIRequestSettings + public static ChatRequestSettings FromRequestSettings(PromptExecutionSettings? requestSettings, int? defaultMaxTokens = null) + { + if (requestSettings is null) + { + return new ChatRequestSettings() + { + MaxTokens = defaultMaxTokens + }; + } + + if (requestSettings is ChatRequestSettings requestSettingsChatRequestSettings) + { + return requestSettingsChatRequestSettings; + } + + var json = JsonSerializer.Serialize(requestSettings); + var chatRequestSettings = JsonSerializer.Deserialize(json, s_options); + + if (chatRequestSettings is not null) + { + return chatRequestSettings; + } + + throw new ArgumentException($"Invalid request settings, cannot convert to {nameof(ChatRequestSettings)}", nameof(requestSettings)); + } + + private static readonly JsonSerializerOptions s_options = CreateOptions(); + + private static JsonSerializerOptions CreateOptions() + { + JsonSerializerOptions options = new() + { + WriteIndented = true, + MaxDepth = 20, + AllowTrailingCommas = true, + PropertyNameCaseInsensitive = true, + ReadCommentHandling = JsonCommentHandling.Skip, + Converters = { new ChatRequestSettingsConverter() } + }; + + return options; + } +} diff --git a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs new file mode 100644 index 00000000..15bc45cd --- /dev/null +++ b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs @@ -0,0 +1,105 @@ +using System; +using System.Collections.Generic; +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace LLamaSharp.SemanticKernel.ChatCompletion; + +/// +/// JSON converter for +/// +[Obsolete("Use LLamaSharpPromptExecutionSettingsConverter instead")] +public class ChatRequestSettingsConverter : JsonConverter +{ + /// + public override ChatRequestSettings? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + var requestSettings = new ChatRequestSettings(); + + while (reader.Read() && reader.TokenType != JsonTokenType.EndObject) + { + if (reader.TokenType == JsonTokenType.PropertyName) + { + string? propertyName = reader.GetString(); + + if (propertyName is not null) + { + // normalise property name to uppercase + propertyName = propertyName.ToUpperInvariant(); + } + + reader.Read(); + + switch (propertyName) + { + case "MODELID": + case "MODEL_ID": + requestSettings.ModelId = reader.GetString(); + break; + case "TEMPERATURE": + requestSettings.Temperature = reader.GetDouble(); + break; + case "TOPP": + case "TOP_P": + requestSettings.TopP = reader.GetDouble(); + break; + case "FREQUENCYPENALTY": + case "FREQUENCY_PENALTY": + requestSettings.FrequencyPenalty = reader.GetDouble(); + break; + case "PRESENCEPENALTY": + case "PRESENCE_PENALTY": + requestSettings.PresencePenalty = reader.GetDouble(); + break; + case "MAXTOKENS": + case "MAX_TOKENS": + requestSettings.MaxTokens = reader.GetInt32(); + break; + case "STOPSEQUENCES": + case "STOP_SEQUENCES": + requestSettings.StopSequences = JsonSerializer.Deserialize>(ref reader, options) ?? Array.Empty(); + break; + case "RESULTSPERPROMPT": + case "RESULTS_PER_PROMPT": + requestSettings.ResultsPerPrompt = reader.GetInt32(); + break; + case "TOKENSELECTIONBIASES": + case "TOKEN_SELECTION_BIASES": + requestSettings.TokenSelectionBiases = JsonSerializer.Deserialize>(ref reader, options) ?? new Dictionary(); + break; + default: + reader.Skip(); + break; + } + } + } + + return requestSettings; + } + + /// + public override void Write(Utf8JsonWriter writer, ChatRequestSettings value, JsonSerializerOptions options) + { + writer.WriteStartObject(); + + writer.WriteNumber("temperature", value.Temperature); + writer.WriteNumber("top_p", value.TopP); + writer.WriteNumber("frequency_penalty", value.FrequencyPenalty); + writer.WriteNumber("presence_penalty", value.PresencePenalty); + if (value.MaxTokens is null) + { + writer.WriteNull("max_tokens"); + } + else + { + writer.WriteNumber("max_tokens", (decimal)value.MaxTokens); + } + writer.WritePropertyName("stop_sequences"); + JsonSerializer.Serialize(writer, value.StopSequences, options); + writer.WriteNumber("results_per_prompt", value.ResultsPerPrompt); + writer.WritePropertyName("token_selection_biases"); + JsonSerializer.Serialize(writer, value.TokenSelectionBiases, options); + + writer.WriteEndObject(); + } +} \ No newline at end of file