From b2423fe6e9fd468a3c435b3e714dd05bac4c54d9 Mon Sep 17 00:00:00 2001
From: Zoli Somogyi <zsomogyi.be@gmail.com>
Date: Sun, 7 Apr 2024 19:47:39 +0200
Subject: [PATCH 01/14] Standardizing Image Data implementation

---
 .../Examples/LlavaInteractiveModeExecute.cs   | 31 +++++++------
 LLama/Abstractions/ILLamaExecutor.cs          | 46 ++++++++++++++++++-
 LLama/LLamaStatelessExecutor.cs               |  8 ++--
 3 files changed, 65 insertions(+), 20 deletions(-)
diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
index 112fe23f..8cfa7376 100644
--- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
+++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
@@ -1,7 +1,8 @@
 ﻿using System.Text.RegularExpressions;
+using LLama.Batched;
 using LLama.Common;
 using Spectre.Console;
-using LLama.Native;
+using LLama.Abstractions;
 
 namespace LLama.Examples.Examples
 {
@@ -18,8 +19,12 @@ namespace LLama.Examples.Examples
 
             var prompt = $"{{{modelImage}}}\nUSER:\nProvide a full description of the image.\nASSISTANT:\n";
 
-            var parameters = new ModelParams(modelPath);
-
+            var parameters = new ModelParams(modelPath)
+            {
+                ContextSize = 4096,
+                Seed = 1337,
+                GpuLayerCount = 10
+            };
             using var model = LLamaWeights.LoadFromFile(parameters);
             using var context = model.CreateContext(parameters);
             
@@ -42,16 +47,16 @@ namespace LLama.Examples.Examples
                 var imageMatches = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value);
                 var imageCount = imageMatches.Count();
                 var hasImages = imageCount > 0;
+                byte[][] imageBytes = null;
 
                 if (hasImages)
                 {
                     var imagePathsWithCurlyBraces = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value);
-                    var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value).ToList();
+                    var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value);
 
-                    List<byte[]> imageBytes;
                     try
                     {
-                        imageBytes = imagePaths.Select(File.ReadAllBytes).ToList();
+                        imageBytes = imagePaths.Select(File.ReadAllBytes).ToArray();
                     }
                     catch (IOException exception)
                     {
@@ -64,17 +69,15 @@ namespace LLama.Examples.Examples
                         break;
                     }
 
-                    // Each prompt with images we clear cache
-                    // When the prompt contains images we clear KV_CACHE to restart conversation
-                    // See:
-                    // https://github.com/ggerganov/llama.cpp/discussions/3620
-                    ex.Context.NativeHandle.KvCacheRemove( LLamaSeqId.Zero, -1, -1 );
 
                     int index = 0;
                     foreach (var path in imagePathsWithCurlyBraces)
                     {
                         // First image replace to tag <image, the rest of the images delete the tag
-                        prompt = prompt.Replace(path, index++ == 0 ? "<image>" : "");
+                        if (index++ == 0)
+                            prompt = prompt.Replace(path, "<image>");
+                        else
+                            prompt = prompt.Replace(path, "");
                     }
 
                   
@@ -99,7 +102,7 @@ namespace LLama.Examples.Examples
                     //
                     foreach (var image in imagePaths)
                     {
-                        ex.Images.Add(await File.ReadAllBytesAsync(image));
+                        ex.Images.Add(new ImageData(ImageData.DataType.ImagePath, image));
                     }
                 }
 
@@ -115,7 +118,7 @@ namespace LLama.Examples.Examples
                 
                 // let the user finish with exit
                 //
-                if (prompt != null && prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase))
+                if (prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase))
                     break;
 
             }
diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs
index 574a27d8..977cbc5e 100644
--- a/LLama/Abstractions/ILLamaExecutor.cs
+++ b/LLama/Abstractions/ILLamaExecutor.cs
@@ -25,9 +25,9 @@ namespace LLama.Abstractions
         public LLavaWeights? ClipModel { get;  }
 
         /// <summary>
-        /// List of images: List of images in byte array format.
+        /// List of images: Image filen path, uri or image byte array. See ImageData.
         /// </summary>
-        public List<byte[]> Images { get; }
+        public List<ImageData> Images { get; }
 
         /// <summary>
         /// Asynchronously infers a response from the model.
@@ -38,4 +38,46 @@ namespace LLama.Abstractions
         /// <returns></returns>
         IAsyncEnumerable<string> InferAsync(string text, IInferenceParams? inferenceParams = null, CancellationToken token = default);
     }
+
+    /// <summary>
+    /// Holds image data
+    /// </summary>
+    public class ImageData
+    {
+        /// <summary>
+        /// constructor
+        /// </summary>
+        /// <param name="type"></param>
+        /// <param name="data"></param>
+        public ImageData(DataType type, object data) { Type = type; Data = data; }
+
+        /// <summary>
+        /// the possible types of image data
+        /// </summary>
+        public enum DataType 
+        { 
+            /// <summary>
+            /// file path
+            /// </summary>
+            ImagePath, 
+            /// <summary>
+            /// byte array
+            /// </summary>
+            ImageBytes, 
+            /// <summary>
+            /// uri
+            /// </summary>
+            ImageURL 
+        }
+
+        /// <summary>
+        /// the type of this image data
+        /// </summary>
+        public DataType Type { get; set; }
+
+        /// <summary>
+        /// the image data (string, byte array or uri)
+        /// </summary>
+        public object? Data { get; set; }
+    }
 }
diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs
index a3c52a02..9d2f8c78 100644
--- a/LLama/LLamaStatelessExecutor.cs
+++ b/LLama/LLamaStatelessExecutor.cs
@@ -34,7 +34,7 @@ namespace LLama
         public LLavaWeights? ClipModel { get;  }
 
         /// <inheritdoc />
-        public List<byte[]> Images { get; set; }
+        public List<ImageData> Images { get; set; }
 
         /// <summary>
         /// The context used by the executor when running the inference.
@@ -49,7 +49,7 @@ namespace LLama
         /// <param name="logger"></param>
         public StatelessExecutor(LLamaWeights weights, IContextParams @params, ILogger? logger = null)
         {
-            Images = new List<byte[]>();
+            Images = new List<ImageData>();
             _weights = weights;
             _params = @params;
             _logger = logger;
@@ -90,7 +90,7 @@ namespace LLama
                 lastTokens.Add(0);
 
             // Tokenize the prompt
-            var tokens = Context.Tokenize(prompt, special: true).ToList();
+            var tokens = Context.Tokenize(prompt).ToList();
             lastTokens.AddRange(tokens);
 
             // Evaluate the prompt, in chunks smaller than the max batch size
@@ -124,7 +124,7 @@ namespace LLama
                 }
 
                 // Check if this is the EOS token
-                if (id == _weights.Tokens.EOS)
+                if (id == _weights.EndOfSentenceToken)
                     break;
 
                 // Decode this token into text

From f264024666d6ff0557215f9b17d19a4ca8b3056c Mon Sep 17 00:00:00 2001
From: Zoli Somogyi <zsomogyi.be@gmail.com>
Date: Mon, 8 Apr 2024 16:10:54 +0200
Subject: [PATCH 02/14] Simplifying image handling

---
 .../Examples/LlavaInteractiveModeExecute.cs   |  2 +-
 LLama/Abstractions/ILLamaExecutor.cs          | 44 +------------------
 LLama/LLamaStatelessExecutor.cs               |  4 +-
 3 files changed, 4 insertions(+), 46 deletions(-)

diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
index 8cfa7376..507f041b 100644
--- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
+++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
@@ -102,7 +102,7 @@ namespace LLama.Examples.Examples
                     //
                     foreach (var image in imagePaths)
                     {
-                        ex.Images.Add(new ImageData(ImageData.DataType.ImagePath, image));
+                        ex.Images.Add(File.ReadAllBytes(image));
                     }
                 }
 
diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs
index 977cbc5e..d6c8d2ce 100644
--- a/LLama/Abstractions/ILLamaExecutor.cs
+++ b/LLama/Abstractions/ILLamaExecutor.cs
@@ -27,7 +27,7 @@ namespace LLama.Abstractions
         /// <summary>
         /// List of images: Image filen path, uri or image byte array. See ImageData.
         /// </summary>
-        public List<ImageData> Images { get; }
+        public List<byte[]> Images { get; }
 
         /// <summary>
         /// Asynchronously infers a response from the model.
@@ -38,46 +38,4 @@ namespace LLama.Abstractions
         /// <returns></returns>
         IAsyncEnumerable<string> InferAsync(string text, IInferenceParams? inferenceParams = null, CancellationToken token = default);
     }
-
-    /// <summary>
-    /// Holds image data
-    /// </summary>
-    public class ImageData
-    {
-        /// <summary>
-        /// constructor
-        /// </summary>
-        /// <param name="type"></param>
-        /// <param name="data"></param>
-        public ImageData(DataType type, object data) { Type = type; Data = data; }
-
-        /// <summary>
-        /// the possible types of image data
-        /// </summary>
-        public enum DataType 
-        { 
-            /// <summary>
-            /// file path
-            /// </summary>
-            ImagePath, 
-            /// <summary>
-            /// byte array
-            /// </summary>
-            ImageBytes, 
-            /// <summary>
-            /// uri
-            /// </summary>
-            ImageURL 
-        }
-
-        /// <summary>
-        /// the type of this image data
-        /// </summary>
-        public DataType Type { get; set; }
-
-        /// <summary>
-        /// the image data (string, byte array or uri)
-        /// </summary>
-        public object? Data { get; set; }
-    }
 }
diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs
index 9d2f8c78..f9d6ca5b 100644
--- a/LLama/LLamaStatelessExecutor.cs
+++ b/LLama/LLamaStatelessExecutor.cs
@@ -34,7 +34,7 @@ namespace LLama
         public LLavaWeights? ClipModel { get;  }
 
         /// <inheritdoc />
-        public List<ImageData> Images { get; set; }
+        public List<byte[]> Images { get; set; }
 
         /// <summary>
         /// The context used by the executor when running the inference.
@@ -49,7 +49,7 @@ namespace LLama
         /// <param name="logger"></param>
         public StatelessExecutor(LLamaWeights weights, IContextParams @params, ILogger? logger = null)
         {
-            Images = new List<ImageData>();
+            Images = new List<byte[]>();
             _weights = weights;
             _params = @params;
             _logger = logger;

From 3ded2dd74d2200522787a1ac2f0484b3251182c4 Mon Sep 17 00:00:00 2001
From: Zoli Somogyi <zsomogyi.be@gmail.com>
Date: Fri, 19 Apr 2024 08:40:43 +0200
Subject: [PATCH 03/14] Embeddings correction

---
 LLama/LLamaEmbedder.cs | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs
index f60f3cd5..c29b6b25 100644
--- a/LLama/LLamaEmbedder.cs
+++ b/LLama/LLamaEmbedder.cs
@@ -97,18 +97,15 @@ namespace LLama
 
         private float[] GetEmbeddingsArray()
         {
-            unsafe
+            var embeddings = NativeApi.llama_get_embeddings(Context.NativeHandle);
+            if (embeddings == null || embeddings.Length == 0)
             {
-                var embeddings = NativeApi.llama_get_embeddings(Context.NativeHandle);
-
-                if (embeddings == null)
-                    embeddings = NativeApi.llama_get_embeddings_seq(Context.NativeHandle, LLamaSeqId.Zero);
-
-                if (embeddings == null)
+                embeddings = NativeApi.llama_get_embeddings_seq(Context.NativeHandle, LLamaSeqId.Zero);
+                if (embeddings == null || embeddings.Length == 0)
                     return Array.Empty<float>();
-
-                return new Span<float>(embeddings, Context.EmbeddingSize).ToArray();
             }
+
+            return embeddings.ToArray();
         }
 
         private static void Normalize(Span<float> embeddings)
@@ -119,7 +116,6 @@ namespace LLama
                 lengthSqr += value * value;
             var length = (float)Math.Sqrt(lengthSqr);
 
-            // Do not divide by length if it is zero
             if (length <= float.Epsilon)
                 return;
 

From b1f3987fae88fa85a9655c12869b9505f58c8d3e Mon Sep 17 00:00:00 2001
From: Zoli Somogyi <zsomogyi.be@gmail.com>
Date: Fri, 19 Apr 2024 10:55:36 +0200
Subject: [PATCH 04/14] Automatic Solution Generator - Work in progress

---
 CMakeLists.txt                                | 126 ++++++++++++++++
 LLama.GenerateSolution/CMakeLists.txt.in      | 126 ++++++++++++++++
 .../GenerateSolution.csproj                   |  14 ++
 LLama.GenerateSolution/GenerateSolution.sln   |  25 ++++
 LLama.GenerateSolution/Program.cs             | 137 ++++++++++++++++++
 5 files changed, 428 insertions(+)
 create mode 100644 CMakeLists.txt
 create mode 100644 LLama.GenerateSolution/CMakeLists.txt.in
 create mode 100644 LLama.GenerateSolution/GenerateSolution.csproj
 create mode 100644 LLama.GenerateSolution/GenerateSolution.sln
 create mode 100644 LLama.GenerateSolution/Program.cs

diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 00000000..b84dc1de
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,126 @@
+#----------------------------------------------------------------------------------------
+# INFO: 
+# - How to use: change the flags in the 'Set one of these ON and all others OFF' section
+# - CUDA: it will use automatically the CUDA SDK version installed
+#
+#----------------------------------------------------------------------------------------
+cmake_minimum_required(VERSION 3.8)
+project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp)
+if(NOT MSVC)
+         message(FATAL_ERROR "This CMake file only works with MSVC.")
+endif(NOT MSVC)
+
+#--------- Set one of these ON and all others OFF ------------------->
+option(LLAMA_CUDA_AVX2           "CUDA + AVX2"          ON)
+option(LLAMA_AVX2                "AVX2 (no CUDA)"       OFF)
+option(LLAMA_CUDA                "CUDA (no AVX)"        OFF)
+#etc... add other setups
+#<--------- Set one of these ON and all others OFF -------------------
+
+# --------------- Don't change below this line -----------------------
+
+# Variable Settings
+if(LLAMA_CUDA_AVX2)
+	option(LLAMA_AVX                    "llama: enable AVX"                                		ON)
+	option(LLAMA_AVX2                   "llama: enable AVX2"                               		ON)
+	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		ON)
+elseif(LLAMA_AVX2)
+	option(LLAMA_AVX                    "llama: enable AVX"                                		ON)
+	option(LLAMA_AVX2                   "llama: enable AVX2"                               		ON)
+	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		OFF)
+elseif(LLAMA_CUDA)
+	option(LLAMA_AVX                    "llama: enable AVX"                                		OFF)
+	option(LLAMA_AVX2                   "llama: enable AVX2"                               		OFF)
+	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		ON)	
+elseif(OTHER_SETUPS)
+	#etc...
+endif()
+
+# Fixed Settings
+# general
+option(BUILD_SHARED_LIBS                "build shared libraries"                                ON)
+option(LLAMA_STATIC                     "llama: static link libraries"                          OFF)
+option(LLAMA_NATIVE                     "llama: enable -march=native flag"                      OFF)
+option(LLAMA_LTO                        "llama: enable link time optimization"                  OFF)
+option(LLAMA_CCACHE                     "llama: use ccache if available"                        ON)
+
+# debug
+option(LLAMA_ALL_WARNINGS               "llama: enable all compiler warnings"                   ON)
+option(LLAMA_ALL_WARNINGS_3RD_PARTY     "llama: enable all compiler warnings in 3rd party libs" OFF)
+option(LLAMA_GPROF                      "llama: enable gprof"                                   OFF)
+
+# build
+option(LLAMA_FATAL_WARNINGS             "llama: enable -Werror flag"                            OFF)
+
+# sanitizers
+option(LLAMA_SANITIZE_THREAD            "llama: enable thread sanitizer"                        OFF)
+option(LLAMA_SANITIZE_ADDRESS           "llama: enable address sanitizer"                       OFF)
+option(LLAMA_SANITIZE_UNDEFINED         "llama: enable undefined sanitizer"                     OFF)
+
+option(LLAMA_AVX512                          "llama: enable AVX512"                             OFF)
+option(LLAMA_AVX512_VBMI                     "llama: enable AVX512-VBMI"                        OFF)
+option(LLAMA_AVX512_VNNI                     "llama: enable AVX512-VNNI"                        OFF)
+option(LLAMA_FMA                             "llama: enable FMA"                                OFF)
+# in MSVC F16C is implied with AVX2/AVX512
+if (NOT MSVC)
+    option(LLAMA_F16C                        "llama: enable F16C"                               OFF)
+endif()
+
+if (WIN32)
+    set(LLAMA_WIN_VER "0x602" CACHE STRING 	  "llama: Windows Version")
+endif()
+
+# 3rd party libs
+option(LLAMA_ACCELERATE                      "llama: enable Accelerate framework"               ON)
+option(LLAMA_BLAS                            "llama: use BLAS"                                  OFF)
+set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
+#option(LLAMA_CUDA_CUBLAS                     "llama: use cuBLAS for prompt processing"          OFF)
+option(LLAMA_CUDA_FORCE_DMMV                 "llama: use dmmv instead of mmvq CUDA kernels"     OFF)
+option(LLAMA_CUDA_FORCE_MMQ                  "llama: use mmq kernels instead of cuBLAS"         OFF)
+set(LLAMA_CUDA_DMMV_X      "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
+set(LLAMA_CUDA_MMV_Y        "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
+option(LLAMA_CUDA_F16                        "llama: use 16 bit floats for some calculations"   OFF)
+set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
+set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
+                                             "llama: max. batch size for using peer access")
+option(LLAMA_HIPBLAS                         "llama: use hipBLAS"                               OFF)
+option(LLAMA_HIP_UMA                         "llama: use HIP unified memory architecture"       OFF)
+option(LLAMA_CLBLAST                         "llama: use CLBlast"                               OFF)
+option(LLAMA_VULKAN                          "llama: use Vulkan"                                OFF)
+option(LLAMA_VULKAN_CHECK_RESULTS            "llama: run Vulkan op checks"                      OFF)
+option(LLAMA_VULKAN_DEBUG                    "llama: enable Vulkan debug output"                OFF)
+option(LLAMA_VULKAN_VALIDATE                 "llama: enable Vulkan validation"                  OFF)
+option(LLAMA_VULKAN_RUN_TESTS                "llama: run Vulkan tests"                          OFF)
+option(LLAMA_METAL                           "llama: use Metal"                                 ${LLAMA_METAL_DEFAULT})
+option(LLAMA_METAL_NDEBUG                    "llama: disable Metal debugging"                   OFF)
+option(LLAMA_METAL_SHADER_DEBUG              "llama: compile Metal with -fno-fast-math"         OFF)
+option(LLAMA_METAL_EMBED_LIBRARY             "llama: embed Metal library"                       OFF)
+option(LLAMA_KOMPUTE                         "llama: use Kompute"                               OFF)
+option(LLAMA_MPI                             "llama: use MPI"                                   OFF)
+option(LLAMA_QKK_64                          "llama: use super-block size of 64 for k-quants"   OFF)
+option(LLAMA_SYCL                            "llama: use SYCL"                                  OFF)
+option(LLAMA_SYCL_F16                        "llama: use 16 bit floats for sycl calculations"   OFF)
+option(LLAMA_CPU_HBM                         "llama: use memkind for CPU HBM"                   OFF)
+
+option(LLAMA_BUILD_TESTS                     "llama: build tests"    							OFF)
+option(LLAMA_BUILD_EXAMPLES                  "llama: build examples" 							ON)
+option(LLAMA_BUILD_SERVER                    "llama: build server example"                      OFF)
+
+# add perf arguments
+option(LLAMA_PERF                            "llama: enable perf"                               OFF)
+
+include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E)
+
+include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE)
+
+include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D)
+
+include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF)
+
+include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1)
+
+include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587)
+
+include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52)
+
+add_subdirectory(./llama.cpp)
\ No newline at end of file
diff --git a/LLama.GenerateSolution/CMakeLists.txt.in b/LLama.GenerateSolution/CMakeLists.txt.in
new file mode 100644
index 00000000..b84dc1de
--- /dev/null
+++ b/LLama.GenerateSolution/CMakeLists.txt.in
@@ -0,0 +1,126 @@
+#----------------------------------------------------------------------------------------
+# INFO: 
+# - How to use: change the flags in the 'Set one of these ON and all others OFF' section
+# - CUDA: it will use automatically the CUDA SDK version installed
+#
+#----------------------------------------------------------------------------------------
+cmake_minimum_required(VERSION 3.8)
+project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp)
+if(NOT MSVC)
+         message(FATAL_ERROR "This CMake file only works with MSVC.")
+endif(NOT MSVC)
+
+#--------- Set one of these ON and all others OFF ------------------->
+option(LLAMA_CUDA_AVX2           "CUDA + AVX2"          ON)
+option(LLAMA_AVX2                "AVX2 (no CUDA)"       OFF)
+option(LLAMA_CUDA                "CUDA (no AVX)"        OFF)
+#etc... add other setups
+#<--------- Set one of these ON and all others OFF -------------------
+
+# --------------- Don't change below this line -----------------------
+
+# Variable Settings
+if(LLAMA_CUDA_AVX2)
+	option(LLAMA_AVX                    "llama: enable AVX"                                		ON)
+	option(LLAMA_AVX2                   "llama: enable AVX2"                               		ON)
+	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		ON)
+elseif(LLAMA_AVX2)
+	option(LLAMA_AVX                    "llama: enable AVX"                                		ON)
+	option(LLAMA_AVX2                   "llama: enable AVX2"                               		ON)
+	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		OFF)
+elseif(LLAMA_CUDA)
+	option(LLAMA_AVX                    "llama: enable AVX"                                		OFF)
+	option(LLAMA_AVX2                   "llama: enable AVX2"                               		OFF)
+	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		ON)	
+elseif(OTHER_SETUPS)
+	#etc...
+endif()
+
+# Fixed Settings
+# general
+option(BUILD_SHARED_LIBS                "build shared libraries"                                ON)
+option(LLAMA_STATIC                     "llama: static link libraries"                          OFF)
+option(LLAMA_NATIVE                     "llama: enable -march=native flag"                      OFF)
+option(LLAMA_LTO                        "llama: enable link time optimization"                  OFF)
+option(LLAMA_CCACHE                     "llama: use ccache if available"                        ON)
+
+# debug
+option(LLAMA_ALL_WARNINGS               "llama: enable all compiler warnings"                   ON)
+option(LLAMA_ALL_WARNINGS_3RD_PARTY     "llama: enable all compiler warnings in 3rd party libs" OFF)
+option(LLAMA_GPROF                      "llama: enable gprof"                                   OFF)
+
+# build
+option(LLAMA_FATAL_WARNINGS             "llama: enable -Werror flag"                            OFF)
+
+# sanitizers
+option(LLAMA_SANITIZE_THREAD            "llama: enable thread sanitizer"                        OFF)
+option(LLAMA_SANITIZE_ADDRESS           "llama: enable address sanitizer"                       OFF)
+option(LLAMA_SANITIZE_UNDEFINED         "llama: enable undefined sanitizer"                     OFF)
+
+option(LLAMA_AVX512                          "llama: enable AVX512"                             OFF)
+option(LLAMA_AVX512_VBMI                     "llama: enable AVX512-VBMI"                        OFF)
+option(LLAMA_AVX512_VNNI                     "llama: enable AVX512-VNNI"                        OFF)
+option(LLAMA_FMA                             "llama: enable FMA"                                OFF)
+# in MSVC F16C is implied with AVX2/AVX512
+if (NOT MSVC)
+    option(LLAMA_F16C                        "llama: enable F16C"                               OFF)
+endif()
+
+if (WIN32)
+    set(LLAMA_WIN_VER "0x602" CACHE STRING 	  "llama: Windows Version")
+endif()
+
+# 3rd party libs
+option(LLAMA_ACCELERATE                      "llama: enable Accelerate framework"               ON)
+option(LLAMA_BLAS                            "llama: use BLAS"                                  OFF)
+set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
+#option(LLAMA_CUDA_CUBLAS                     "llama: use cuBLAS for prompt processing"          OFF)
+option(LLAMA_CUDA_FORCE_DMMV                 "llama: use dmmv instead of mmvq CUDA kernels"     OFF)
+option(LLAMA_CUDA_FORCE_MMQ                  "llama: use mmq kernels instead of cuBLAS"         OFF)
+set(LLAMA_CUDA_DMMV_X      "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
+set(LLAMA_CUDA_MMV_Y        "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
+option(LLAMA_CUDA_F16                        "llama: use 16 bit floats for some calculations"   OFF)
+set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
+set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
+                                             "llama: max. batch size for using peer access")
+option(LLAMA_HIPBLAS                         "llama: use hipBLAS"                               OFF)
+option(LLAMA_HIP_UMA                         "llama: use HIP unified memory architecture"       OFF)
+option(LLAMA_CLBLAST                         "llama: use CLBlast"                               OFF)
+option(LLAMA_VULKAN                          "llama: use Vulkan"                                OFF)
+option(LLAMA_VULKAN_CHECK_RESULTS            "llama: run Vulkan op checks"                      OFF)
+option(LLAMA_VULKAN_DEBUG                    "llama: enable Vulkan debug output"                OFF)
+option(LLAMA_VULKAN_VALIDATE                 "llama: enable Vulkan validation"                  OFF)
+option(LLAMA_VULKAN_RUN_TESTS                "llama: run Vulkan tests"                          OFF)
+option(LLAMA_METAL                           "llama: use Metal"                                 ${LLAMA_METAL_DEFAULT})
+option(LLAMA_METAL_NDEBUG                    "llama: disable Metal debugging"                   OFF)
+option(LLAMA_METAL_SHADER_DEBUG              "llama: compile Metal with -fno-fast-math"         OFF)
+option(LLAMA_METAL_EMBED_LIBRARY             "llama: embed Metal library"                       OFF)
+option(LLAMA_KOMPUTE                         "llama: use Kompute"                               OFF)
+option(LLAMA_MPI                             "llama: use MPI"                                   OFF)
+option(LLAMA_QKK_64                          "llama: use super-block size of 64 for k-quants"   OFF)
+option(LLAMA_SYCL                            "llama: use SYCL"                                  OFF)
+option(LLAMA_SYCL_F16                        "llama: use 16 bit floats for sycl calculations"   OFF)
+option(LLAMA_CPU_HBM                         "llama: use memkind for CPU HBM"                   OFF)
+
+option(LLAMA_BUILD_TESTS                     "llama: build tests"    							OFF)
+option(LLAMA_BUILD_EXAMPLES                  "llama: build examples" 							ON)
+option(LLAMA_BUILD_SERVER                    "llama: build server example"                      OFF)
+
+# add perf arguments
+option(LLAMA_PERF                            "llama: enable perf"                               OFF)
+
+include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E)
+
+include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE)
+
+include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D)
+
+include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF)
+
+include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1)
+
+include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587)
+
+include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52)
+
+add_subdirectory(./llama.cpp)
\ No newline at end of file
diff --git a/LLama.GenerateSolution/GenerateSolution.csproj b/LLama.GenerateSolution/GenerateSolution.csproj
new file mode 100644
index 00000000..f28f91ba
--- /dev/null
+++ b/LLama.GenerateSolution/GenerateSolution.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net7.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Spectre.Console" Version="0.48.0" />
+  </ItemGroup>
+
+</Project>
diff --git a/LLama.GenerateSolution/GenerateSolution.sln b/LLama.GenerateSolution/GenerateSolution.sln
new file mode 100644
index 00000000..74c9e8e1
--- /dev/null
+++ b/LLama.GenerateSolution/GenerateSolution.sln
@@ -0,0 +1,25 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 17
+VisualStudioVersion = 17.8.34525.116
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GenerateSolution", "GenerateSolution.csproj", "{89306FE9-4428-4C70-AF58-0AF871BED56B}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Any CPU = Debug|Any CPU
+		Release|Any CPU = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.Build.0 = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {32128714-30D0-4015-9184-24F609AAE564}
+	EndGlobalSection
+EndGlobal
diff --git a/LLama.GenerateSolution/Program.cs b/LLama.GenerateSolution/Program.cs
new file mode 100644
index 00000000..ebf082b1
--- /dev/null
+++ b/LLama.GenerateSolution/Program.cs
@@ -0,0 +1,137 @@
+﻿using Spectre.Console;
+using System;
+using System.Diagnostics;
+using System.Text;
+using static System.Runtime.InteropServices.JavaScript.JSType;
+
+namespace GenerateSolution
+{
+    internal class Program
+    {
+        static void Main(string[] args)
+        {
+            System.Console.InputEncoding = Encoding.Unicode;
+            System.Console.OutputEncoding = Encoding.Unicode;
+
+            // Check if we can accept key strokes
+            if (!AnsiConsole.Profile.Capabilities.Interactive)
+            {
+                AnsiConsole.MarkupLine("[red]Environment does not support interaction.[/]");
+                return;
+            }
+
+            var options = AskOptions();
+            var cmakePath = AskCMakePath();
+            if(string.IsNullOrEmpty(cmakePath) == true) 
+            {
+                cmakePath = "C:\\Program Files\\CMake\\bin\\cmake.exe";
+            }
+            AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", cmakePath);
+
+            string cmakeListsPath = @"..\..\..\..\CMakeLists.txt";
+
+            //cmake [<options>] -B <path-to-build> [-S <path-to-source>]
+            //TODO: get the chosen arguments from above (hardcoded values below)
+            //TODO: edit the CMakeList.txt.in template and create the CMakeLists.txt with the chosen options
+            cmakeListsPath += " -G \"Visual Studio 17 2022\" -A x64 -B ..\\..\\..\\..\\ -S ..\\..\\..\\..\\";
+
+            ProcessStartInfo startInfo = new ProcessStartInfo
+            {
+                FileName = cmakePath,
+                Arguments = cmakeListsPath,
+                RedirectStandardOutput = true,
+                RedirectStandardError = true,
+                UseShellExecute = false,
+                CreateNoWindow = true,
+            };
+
+            try
+            {
+                bool bSuccess = false;
+                string lastError = "";
+                AnsiConsole.Progress()
+                    .AutoClear(false)
+                    .Columns(new ProgressColumn[]
+                    {
+                            new TaskDescriptionColumn(),
+                            new SpinnerColumn(Spinner.Known.Ascii),
+                    })
+                    .Start(ctx =>
+                    {
+                        var cmakeTask = ctx.AddTask("Generating VS Solution", autoStart: false).IsIndeterminate();
+                        cmakeTask.StartTask();
+                        using (Process process = new Process())
+                        {
+                            process.StartInfo = startInfo;
+                            process.Start();
+                            string output = process.StandardOutput.ReadToEnd();
+                            lastError = process.StandardError.ReadToEnd();
+                            process.WaitForExit();
+                            cmakeTask.StopTask();
+                            if (process.ExitCode == 0)
+                            {
+                                bSuccess = true;
+                            }
+                        }
+                    });
+
+                if (bSuccess == true)
+                {
+                    AnsiConsole.WriteLine("VS solution generated successfully.");
+                }
+                else
+                {
+                    AnsiConsole.WriteLine($"Error running CMake configuration: {lastError}");
+                }
+            }
+            catch (Exception ex)
+            {
+                AnsiConsole.WriteLine("[red]ERROR[/] " + ex.Message);
+            }
+
+            Console.ReadLine();
+        }
+
+        public static string AskCMakePath()
+        {
+            return AnsiConsole.Prompt(
+                            new TextPrompt<string>("What's your [green]CMake path[/] (default: C:\\Program Files\\CMake\\bin\\cmake.exe)?")
+                                .AllowEmpty());
+        }
+
+        public static List<string> AskOptions()
+        {
+            var options = AnsiConsole.Prompt(
+                new MultiSelectionPrompt<string>()
+                    .PageSize(10)
+                    .Title("Select the preferred [green]options[/]?")
+                    .MoreChoicesText("[grey](Move up and down to reveal more options)[/]")
+                    .InstructionsText("[grey](Press [blue]<space>[/] to toggle an option, [green]<enter>[/] to accept)[/]")
+                    .AddChoiceGroup("Avx", new[]
+                    {
+                        "Avx2", "Avx512"
+                    })
+                    .AddChoiceGroup("Cuda", new[]
+                    {
+                        "Cuda"
+                    })
+                    .AddChoices(new[]
+                    {
+                        "x64",
+                    })
+                    .AddChoiceGroup("Visual Studio", new[]
+                    {
+                        "Visual Studio 16 2019", 
+                        "Visual Studio 17 2022"
+                    })
+                    );
+
+            if (options.Count > 0)
+            {
+                AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", string.Join(",",options));
+            }
+            
+            return options;
+        }
+    }
+}

From ad2c81d9574df2be5f6213ee2136a1b19afd1236 Mon Sep 17 00:00:00 2001
From: Zoli Somogyi <zsomogyi.be@gmail.com>
Date: Fri, 19 Apr 2024 17:16:52 +0200
Subject: [PATCH 05/14] Revert "Automatic Solution Generator - Work in
 progress"

This reverts commit 9c91fac20f3ebde5d1f1bc6a9feacaaa61c4d087.
---
 CMakeLists.txt                                | 126 ----------------
 LLama.GenerateSolution/CMakeLists.txt.in      | 126 ----------------
 .../GenerateSolution.csproj                   |  14 --
 LLama.GenerateSolution/GenerateSolution.sln   |  25 ----
 LLama.GenerateSolution/Program.cs             | 137 ------------------
 5 files changed, 428 deletions(-)
 delete mode 100644 CMakeLists.txt
 delete mode 100644 LLama.GenerateSolution/CMakeLists.txt.in
 delete mode 100644 LLama.GenerateSolution/GenerateSolution.csproj
 delete mode 100644 LLama.GenerateSolution/GenerateSolution.sln
 delete mode 100644 LLama.GenerateSolution/Program.cs

diff --git a/CMakeLists.txt b/CMakeLists.txt
deleted file mode 100644
index b84dc1de..00000000
--- a/CMakeLists.txt
+++ /dev/null
@@ -1,126 +0,0 @@
-#----------------------------------------------------------------------------------------
-# INFO: 
-# - How to use: change the flags in the 'Set one of these ON and all others OFF' section
-# - CUDA: it will use automatically the CUDA SDK version installed
-#
-#----------------------------------------------------------------------------------------
-cmake_minimum_required(VERSION 3.8)
-project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp)
-if(NOT MSVC)
-         message(FATAL_ERROR "This CMake file only works with MSVC.")
-endif(NOT MSVC)
-
-#--------- Set one of these ON and all others OFF ------------------->
-option(LLAMA_CUDA_AVX2           "CUDA + AVX2"          ON)
-option(LLAMA_AVX2                "AVX2 (no CUDA)"       OFF)
-option(LLAMA_CUDA                "CUDA (no AVX)"        OFF)
-#etc... add other setups
-#<--------- Set one of these ON and all others OFF -------------------
-
-# --------------- Don't change below this line -----------------------
-
-# Variable Settings
-if(LLAMA_CUDA_AVX2)
-	option(LLAMA_AVX                    "llama: enable AVX"                                		ON)
-	option(LLAMA_AVX2                   "llama: enable AVX2"                               		ON)
-	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		ON)
-elseif(LLAMA_AVX2)
-	option(LLAMA_AVX                    "llama: enable AVX"                                		ON)
-	option(LLAMA_AVX2                   "llama: enable AVX2"                               		ON)
-	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		OFF)
-elseif(LLAMA_CUDA)
-	option(LLAMA_AVX                    "llama: enable AVX"                                		OFF)
-	option(LLAMA_AVX2                   "llama: enable AVX2"                               		OFF)
-	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		ON)	
-elseif(OTHER_SETUPS)
-	#etc...
-endif()
-
-# Fixed Settings
-# general
-option(BUILD_SHARED_LIBS                "build shared libraries"                                ON)
-option(LLAMA_STATIC                     "llama: static link libraries"                          OFF)
-option(LLAMA_NATIVE                     "llama: enable -march=native flag"                      OFF)
-option(LLAMA_LTO                        "llama: enable link time optimization"                  OFF)
-option(LLAMA_CCACHE                     "llama: use ccache if available"                        ON)
-
-# debug
-option(LLAMA_ALL_WARNINGS               "llama: enable all compiler warnings"                   ON)
-option(LLAMA_ALL_WARNINGS_3RD_PARTY     "llama: enable all compiler warnings in 3rd party libs" OFF)
-option(LLAMA_GPROF                      "llama: enable gprof"                                   OFF)
-
-# build
-option(LLAMA_FATAL_WARNINGS             "llama: enable -Werror flag"                            OFF)
-
-# sanitizers
-option(LLAMA_SANITIZE_THREAD            "llama: enable thread sanitizer"                        OFF)
-option(LLAMA_SANITIZE_ADDRESS           "llama: enable address sanitizer"                       OFF)
-option(LLAMA_SANITIZE_UNDEFINED         "llama: enable undefined sanitizer"                     OFF)
-
-option(LLAMA_AVX512                          "llama: enable AVX512"                             OFF)
-option(LLAMA_AVX512_VBMI                     "llama: enable AVX512-VBMI"                        OFF)
-option(LLAMA_AVX512_VNNI                     "llama: enable AVX512-VNNI"                        OFF)
-option(LLAMA_FMA                             "llama: enable FMA"                                OFF)
-# in MSVC F16C is implied with AVX2/AVX512
-if (NOT MSVC)
-    option(LLAMA_F16C                        "llama: enable F16C"                               OFF)
-endif()
-
-if (WIN32)
-    set(LLAMA_WIN_VER "0x602" CACHE STRING 	  "llama: Windows Version")
-endif()
-
-# 3rd party libs
-option(LLAMA_ACCELERATE                      "llama: enable Accelerate framework"               ON)
-option(LLAMA_BLAS                            "llama: use BLAS"                                  OFF)
-set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
-#option(LLAMA_CUDA_CUBLAS                     "llama: use cuBLAS for prompt processing"          OFF)
-option(LLAMA_CUDA_FORCE_DMMV                 "llama: use dmmv instead of mmvq CUDA kernels"     OFF)
-option(LLAMA_CUDA_FORCE_MMQ                  "llama: use mmq kernels instead of cuBLAS"         OFF)
-set(LLAMA_CUDA_DMMV_X      "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
-set(LLAMA_CUDA_MMV_Y        "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
-option(LLAMA_CUDA_F16                        "llama: use 16 bit floats for some calculations"   OFF)
-set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
-set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
-                                             "llama: max. batch size for using peer access")
-option(LLAMA_HIPBLAS                         "llama: use hipBLAS"                               OFF)
-option(LLAMA_HIP_UMA                         "llama: use HIP unified memory architecture"       OFF)
-option(LLAMA_CLBLAST                         "llama: use CLBlast"                               OFF)
-option(LLAMA_VULKAN                          "llama: use Vulkan"                                OFF)
-option(LLAMA_VULKAN_CHECK_RESULTS            "llama: run Vulkan op checks"                      OFF)
-option(LLAMA_VULKAN_DEBUG                    "llama: enable Vulkan debug output"                OFF)
-option(LLAMA_VULKAN_VALIDATE                 "llama: enable Vulkan validation"                  OFF)
-option(LLAMA_VULKAN_RUN_TESTS                "llama: run Vulkan tests"                          OFF)
-option(LLAMA_METAL                           "llama: use Metal"                                 ${LLAMA_METAL_DEFAULT})
-option(LLAMA_METAL_NDEBUG                    "llama: disable Metal debugging"                   OFF)
-option(LLAMA_METAL_SHADER_DEBUG              "llama: compile Metal with -fno-fast-math"         OFF)
-option(LLAMA_METAL_EMBED_LIBRARY             "llama: embed Metal library"                       OFF)
-option(LLAMA_KOMPUTE                         "llama: use Kompute"                               OFF)
-option(LLAMA_MPI                             "llama: use MPI"                                   OFF)
-option(LLAMA_QKK_64                          "llama: use super-block size of 64 for k-quants"   OFF)
-option(LLAMA_SYCL                            "llama: use SYCL"                                  OFF)
-option(LLAMA_SYCL_F16                        "llama: use 16 bit floats for sycl calculations"   OFF)
-option(LLAMA_CPU_HBM                         "llama: use memkind for CPU HBM"                   OFF)
-
-option(LLAMA_BUILD_TESTS                     "llama: build tests"    							OFF)
-option(LLAMA_BUILD_EXAMPLES                  "llama: build examples" 							ON)
-option(LLAMA_BUILD_SERVER                    "llama: build server example"                      OFF)
-
-# add perf arguments
-option(LLAMA_PERF                            "llama: enable perf"                               OFF)
-
-include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E)
-
-include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE)
-
-include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D)
-
-include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF)
-
-include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1)
-
-include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587)
-
-include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52)
-
-add_subdirectory(./llama.cpp)
\ No newline at end of file
diff --git a/LLama.GenerateSolution/CMakeLists.txt.in b/LLama.GenerateSolution/CMakeLists.txt.in
deleted file mode 100644
index b84dc1de..00000000
--- a/LLama.GenerateSolution/CMakeLists.txt.in
+++ /dev/null
@@ -1,126 +0,0 @@
-#----------------------------------------------------------------------------------------
-# INFO: 
-# - How to use: change the flags in the 'Set one of these ON and all others OFF' section
-# - CUDA: it will use automatically the CUDA SDK version installed
-#
-#----------------------------------------------------------------------------------------
-cmake_minimum_required(VERSION 3.8)
-project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp)
-if(NOT MSVC)
-         message(FATAL_ERROR "This CMake file only works with MSVC.")
-endif(NOT MSVC)
-
-#--------- Set one of these ON and all others OFF ------------------->
-option(LLAMA_CUDA_AVX2           "CUDA + AVX2"          ON)
-option(LLAMA_AVX2                "AVX2 (no CUDA)"       OFF)
-option(LLAMA_CUDA                "CUDA (no AVX)"        OFF)
-#etc... add other setups
-#<--------- Set one of these ON and all others OFF -------------------
-
-# --------------- Don't change below this line -----------------------
-
-# Variable Settings
-if(LLAMA_CUDA_AVX2)
-	option(LLAMA_AVX                    "llama: enable AVX"                                		ON)
-	option(LLAMA_AVX2                   "llama: enable AVX2"                               		ON)
-	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		ON)
-elseif(LLAMA_AVX2)
-	option(LLAMA_AVX                    "llama: enable AVX"                                		ON)
-	option(LLAMA_AVX2                   "llama: enable AVX2"                               		ON)
-	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		OFF)
-elseif(LLAMA_CUDA)
-	option(LLAMA_AVX                    "llama: enable AVX"                                		OFF)
-	option(LLAMA_AVX2                   "llama: enable AVX2"                               		OFF)
-	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		ON)	
-elseif(OTHER_SETUPS)
-	#etc...
-endif()
-
-# Fixed Settings
-# general
-option(BUILD_SHARED_LIBS                "build shared libraries"                                ON)
-option(LLAMA_STATIC                     "llama: static link libraries"                          OFF)
-option(LLAMA_NATIVE                     "llama: enable -march=native flag"                      OFF)
-option(LLAMA_LTO                        "llama: enable link time optimization"                  OFF)
-option(LLAMA_CCACHE                     "llama: use ccache if available"                        ON)
-
-# debug
-option(LLAMA_ALL_WARNINGS               "llama: enable all compiler warnings"                   ON)
-option(LLAMA_ALL_WARNINGS_3RD_PARTY     "llama: enable all compiler warnings in 3rd party libs" OFF)
-option(LLAMA_GPROF                      "llama: enable gprof"                                   OFF)
-
-# build
-option(LLAMA_FATAL_WARNINGS             "llama: enable -Werror flag"                            OFF)
-
-# sanitizers
-option(LLAMA_SANITIZE_THREAD            "llama: enable thread sanitizer"                        OFF)
-option(LLAMA_SANITIZE_ADDRESS           "llama: enable address sanitizer"                       OFF)
-option(LLAMA_SANITIZE_UNDEFINED         "llama: enable undefined sanitizer"                     OFF)
-
-option(LLAMA_AVX512                          "llama: enable AVX512"                             OFF)
-option(LLAMA_AVX512_VBMI                     "llama: enable AVX512-VBMI"                        OFF)
-option(LLAMA_AVX512_VNNI                     "llama: enable AVX512-VNNI"                        OFF)
-option(LLAMA_FMA                             "llama: enable FMA"                                OFF)
-# in MSVC F16C is implied with AVX2/AVX512
-if (NOT MSVC)
-    option(LLAMA_F16C                        "llama: enable F16C"                               OFF)
-endif()
-
-if (WIN32)
-    set(LLAMA_WIN_VER "0x602" CACHE STRING 	  "llama: Windows Version")
-endif()
-
-# 3rd party libs
-option(LLAMA_ACCELERATE                      "llama: enable Accelerate framework"               ON)
-option(LLAMA_BLAS                            "llama: use BLAS"                                  OFF)
-set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
-#option(LLAMA_CUDA_CUBLAS                     "llama: use cuBLAS for prompt processing"          OFF)
-option(LLAMA_CUDA_FORCE_DMMV                 "llama: use dmmv instead of mmvq CUDA kernels"     OFF)
-option(LLAMA_CUDA_FORCE_MMQ                  "llama: use mmq kernels instead of cuBLAS"         OFF)
-set(LLAMA_CUDA_DMMV_X      "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
-set(LLAMA_CUDA_MMV_Y        "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
-option(LLAMA_CUDA_F16                        "llama: use 16 bit floats for some calculations"   OFF)
-set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
-set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
-                                             "llama: max. batch size for using peer access")
-option(LLAMA_HIPBLAS                         "llama: use hipBLAS"                               OFF)
-option(LLAMA_HIP_UMA                         "llama: use HIP unified memory architecture"       OFF)
-option(LLAMA_CLBLAST                         "llama: use CLBlast"                               OFF)
-option(LLAMA_VULKAN                          "llama: use Vulkan"                                OFF)
-option(LLAMA_VULKAN_CHECK_RESULTS            "llama: run Vulkan op checks"                      OFF)
-option(LLAMA_VULKAN_DEBUG                    "llama: enable Vulkan debug output"                OFF)
-option(LLAMA_VULKAN_VALIDATE                 "llama: enable Vulkan validation"                  OFF)
-option(LLAMA_VULKAN_RUN_TESTS                "llama: run Vulkan tests"                          OFF)
-option(LLAMA_METAL                           "llama: use Metal"                                 ${LLAMA_METAL_DEFAULT})
-option(LLAMA_METAL_NDEBUG                    "llama: disable Metal debugging"                   OFF)
-option(LLAMA_METAL_SHADER_DEBUG              "llama: compile Metal with -fno-fast-math"         OFF)
-option(LLAMA_METAL_EMBED_LIBRARY             "llama: embed Metal library"                       OFF)
-option(LLAMA_KOMPUTE                         "llama: use Kompute"                               OFF)
-option(LLAMA_MPI                             "llama: use MPI"                                   OFF)
-option(LLAMA_QKK_64                          "llama: use super-block size of 64 for k-quants"   OFF)
-option(LLAMA_SYCL                            "llama: use SYCL"                                  OFF)
-option(LLAMA_SYCL_F16                        "llama: use 16 bit floats for sycl calculations"   OFF)
-option(LLAMA_CPU_HBM                         "llama: use memkind for CPU HBM"                   OFF)
-
-option(LLAMA_BUILD_TESTS                     "llama: build tests"    							OFF)
-option(LLAMA_BUILD_EXAMPLES                  "llama: build examples" 							ON)
-option(LLAMA_BUILD_SERVER                    "llama: build server example"                      OFF)
-
-# add perf arguments
-option(LLAMA_PERF                            "llama: enable perf"                               OFF)
-
-include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E)
-
-include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE)
-
-include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D)
-
-include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF)
-
-include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1)
-
-include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587)
-
-include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52)
-
-add_subdirectory(./llama.cpp)
\ No newline at end of file
diff --git a/LLama.GenerateSolution/GenerateSolution.csproj b/LLama.GenerateSolution/GenerateSolution.csproj
deleted file mode 100644
index f28f91ba..00000000
--- a/LLama.GenerateSolution/GenerateSolution.csproj
+++ /dev/null
@@ -1,14 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <TargetFramework>net7.0</TargetFramework>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-  </PropertyGroup>
-
-  <ItemGroup>
-    <PackageReference Include="Spectre.Console" Version="0.48.0" />
-  </ItemGroup>
-
-</Project>
diff --git a/LLama.GenerateSolution/GenerateSolution.sln b/LLama.GenerateSolution/GenerateSolution.sln
deleted file mode 100644
index 74c9e8e1..00000000
--- a/LLama.GenerateSolution/GenerateSolution.sln
+++ /dev/null
@@ -1,25 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 17
-VisualStudioVersion = 17.8.34525.116
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GenerateSolution", "GenerateSolution.csproj", "{89306FE9-4428-4C70-AF58-0AF871BED56B}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
-		Release|Any CPU = Release|Any CPU
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.Build.0 = Release|Any CPU
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-	GlobalSection(ExtensibilityGlobals) = postSolution
-		SolutionGuid = {32128714-30D0-4015-9184-24F609AAE564}
-	EndGlobalSection
-EndGlobal
diff --git a/LLama.GenerateSolution/Program.cs b/LLama.GenerateSolution/Program.cs
deleted file mode 100644
index ebf082b1..00000000
--- a/LLama.GenerateSolution/Program.cs
+++ /dev/null
@@ -1,137 +0,0 @@
-﻿using Spectre.Console;
-using System;
-using System.Diagnostics;
-using System.Text;
-using static System.Runtime.InteropServices.JavaScript.JSType;
-
-namespace GenerateSolution
-{
-    internal class Program
-    {
-        static void Main(string[] args)
-        {
-            System.Console.InputEncoding = Encoding.Unicode;
-            System.Console.OutputEncoding = Encoding.Unicode;
-
-            // Check if we can accept key strokes
-            if (!AnsiConsole.Profile.Capabilities.Interactive)
-            {
-                AnsiConsole.MarkupLine("[red]Environment does not support interaction.[/]");
-                return;
-            }
-
-            var options = AskOptions();
-            var cmakePath = AskCMakePath();
-            if(string.IsNullOrEmpty(cmakePath) == true) 
-            {
-                cmakePath = "C:\\Program Files\\CMake\\bin\\cmake.exe";
-            }
-            AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", cmakePath);
-
-            string cmakeListsPath = @"..\..\..\..\CMakeLists.txt";
-
-            //cmake [<options>] -B <path-to-build> [-S <path-to-source>]
-            //TODO: get the chosen arguments from above (hardcoded values below)
-            //TODO: edit the CMakeList.txt.in template and create the CMakeLists.txt with the chosen options
-            cmakeListsPath += " -G \"Visual Studio 17 2022\" -A x64 -B ..\\..\\..\\..\\ -S ..\\..\\..\\..\\";
-
-            ProcessStartInfo startInfo = new ProcessStartInfo
-            {
-                FileName = cmakePath,
-                Arguments = cmakeListsPath,
-                RedirectStandardOutput = true,
-                RedirectStandardError = true,
-                UseShellExecute = false,
-                CreateNoWindow = true,
-            };
-
-            try
-            {
-                bool bSuccess = false;
-                string lastError = "";
-                AnsiConsole.Progress()
-                    .AutoClear(false)
-                    .Columns(new ProgressColumn[]
-                    {
-                            new TaskDescriptionColumn(),
-                            new SpinnerColumn(Spinner.Known.Ascii),
-                    })
-                    .Start(ctx =>
-                    {
-                        var cmakeTask = ctx.AddTask("Generating VS Solution", autoStart: false).IsIndeterminate();
-                        cmakeTask.StartTask();
-                        using (Process process = new Process())
-                        {
-                            process.StartInfo = startInfo;
-                            process.Start();
-                            string output = process.StandardOutput.ReadToEnd();
-                            lastError = process.StandardError.ReadToEnd();
-                            process.WaitForExit();
-                            cmakeTask.StopTask();
-                            if (process.ExitCode == 0)
-                            {
-                                bSuccess = true;
-                            }
-                        }
-                    });
-
-                if (bSuccess == true)
-                {
-                    AnsiConsole.WriteLine("VS solution generated successfully.");
-                }
-                else
-                {
-                    AnsiConsole.WriteLine($"Error running CMake configuration: {lastError}");
-                }
-            }
-            catch (Exception ex)
-            {
-                AnsiConsole.WriteLine("[red]ERROR[/] " + ex.Message);
-            }
-
-            Console.ReadLine();
-        }
-
-        public static string AskCMakePath()
-        {
-            return AnsiConsole.Prompt(
-                            new TextPrompt<string>("What's your [green]CMake path[/] (default: C:\\Program Files\\CMake\\bin\\cmake.exe)?")
-                                .AllowEmpty());
-        }
-
-        public static List<string> AskOptions()
-        {
-            var options = AnsiConsole.Prompt(
-                new MultiSelectionPrompt<string>()
-                    .PageSize(10)
-                    .Title("Select the preferred [green]options[/]?")
-                    .MoreChoicesText("[grey](Move up and down to reveal more options)[/]")
-                    .InstructionsText("[grey](Press [blue]<space>[/] to toggle an option, [green]<enter>[/] to accept)[/]")
-                    .AddChoiceGroup("Avx", new[]
-                    {
-                        "Avx2", "Avx512"
-                    })
-                    .AddChoiceGroup("Cuda", new[]
-                    {
-                        "Cuda"
-                    })
-                    .AddChoices(new[]
-                    {
-                        "x64",
-                    })
-                    .AddChoiceGroup("Visual Studio", new[]
-                    {
-                        "Visual Studio 16 2019", 
-                        "Visual Studio 17 2022"
-                    })
-                    );
-
-            if (options.Count > 0)
-            {
-                AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", string.Join(",",options));
-            }
-            
-            return options;
-        }
-    }
-}

From 5a196ec6f9a3fac03b0b57bdb97f1c5deb98d3d8 Mon Sep 17 00:00:00 2001
From: Zoli Somogyi <zsomogyi.be@gmail.com>
Date: Wed, 24 Apr 2024 07:56:58 +0200
Subject: [PATCH 06/14] Reapply "Automatic Solution Generator - Work in
 progress"

This reverts commit ad2c81d9574df2be5f6213ee2136a1b19afd1236.
---
 CMakeLists.txt                                | 126 ++++++++++++++++
 LLama.GenerateSolution/CMakeLists.txt.in      | 126 ++++++++++++++++
 .../GenerateSolution.csproj                   |  14 ++
 LLama.GenerateSolution/GenerateSolution.sln   |  25 ++++
 LLama.GenerateSolution/Program.cs             | 137 ++++++++++++++++++
 5 files changed, 428 insertions(+)
 create mode 100644 CMakeLists.txt
 create mode 100644 LLama.GenerateSolution/CMakeLists.txt.in
 create mode 100644 LLama.GenerateSolution/GenerateSolution.csproj
 create mode 100644 LLama.GenerateSolution/GenerateSolution.sln
 create mode 100644 LLama.GenerateSolution/Program.cs

diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 00000000..b84dc1de
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,126 @@
+#----------------------------------------------------------------------------------------
+# INFO: 
+# - How to use: change the flags in the 'Set one of these ON and all others OFF' section
+# - CUDA: it will use automatically the CUDA SDK version installed
+#
+#----------------------------------------------------------------------------------------
+cmake_minimum_required(VERSION 3.8)
+project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp)
+if(NOT MSVC)
+         message(FATAL_ERROR "This CMake file only works with MSVC.")
+endif(NOT MSVC)
+
+#--------- Set one of these ON and all others OFF ------------------->
+option(LLAMA_CUDA_AVX2           "CUDA + AVX2"          ON)
+option(LLAMA_AVX2                "AVX2 (no CUDA)"       OFF)
+option(LLAMA_CUDA                "CUDA (no AVX)"        OFF)
+#etc... add other setups
+#<--------- Set one of these ON and all others OFF -------------------
+
+# --------------- Don't change below this line -----------------------
+
+# Variable Settings
+if(LLAMA_CUDA_AVX2)
+	option(LLAMA_AVX                    "llama: enable AVX"                                		ON)
+	option(LLAMA_AVX2                   "llama: enable AVX2"                               		ON)
+	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		ON)
+elseif(LLAMA_AVX2)
+	option(LLAMA_AVX                    "llama: enable AVX"                                		ON)
+	option(LLAMA_AVX2                   "llama: enable AVX2"                               		ON)
+	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		OFF)
+elseif(LLAMA_CUDA)
+	option(LLAMA_AVX                    "llama: enable AVX"                                		OFF)
+	option(LLAMA_AVX2                   "llama: enable AVX2"                               		OFF)
+	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		ON)	
+elseif(OTHER_SETUPS)
+	#etc...
+endif()
+
+# Fixed Settings
+# general
+option(BUILD_SHARED_LIBS                "build shared libraries"                                ON)
+option(LLAMA_STATIC                     "llama: static link libraries"                          OFF)
+option(LLAMA_NATIVE                     "llama: enable -march=native flag"                      OFF)
+option(LLAMA_LTO                        "llama: enable link time optimization"                  OFF)
+option(LLAMA_CCACHE                     "llama: use ccache if available"                        ON)
+
+# debug
+option(LLAMA_ALL_WARNINGS               "llama: enable all compiler warnings"                   ON)
+option(LLAMA_ALL_WARNINGS_3RD_PARTY     "llama: enable all compiler warnings in 3rd party libs" OFF)
+option(LLAMA_GPROF                      "llama: enable gprof"                                   OFF)
+
+# build
+option(LLAMA_FATAL_WARNINGS             "llama: enable -Werror flag"                            OFF)
+
+# sanitizers
+option(LLAMA_SANITIZE_THREAD            "llama: enable thread sanitizer"                        OFF)
+option(LLAMA_SANITIZE_ADDRESS           "llama: enable address sanitizer"                       OFF)
+option(LLAMA_SANITIZE_UNDEFINED         "llama: enable undefined sanitizer"                     OFF)
+
+option(LLAMA_AVX512                          "llama: enable AVX512"                             OFF)
+option(LLAMA_AVX512_VBMI                     "llama: enable AVX512-VBMI"                        OFF)
+option(LLAMA_AVX512_VNNI                     "llama: enable AVX512-VNNI"                        OFF)
+option(LLAMA_FMA                             "llama: enable FMA"                                OFF)
+# in MSVC F16C is implied with AVX2/AVX512
+if (NOT MSVC)
+    option(LLAMA_F16C                        "llama: enable F16C"                               OFF)
+endif()
+
+if (WIN32)
+    set(LLAMA_WIN_VER "0x602" CACHE STRING 	  "llama: Windows Version")
+endif()
+
+# 3rd party libs
+option(LLAMA_ACCELERATE                      "llama: enable Accelerate framework"               ON)
+option(LLAMA_BLAS                            "llama: use BLAS"                                  OFF)
+set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
+#option(LLAMA_CUDA_CUBLAS                     "llama: use cuBLAS for prompt processing"          OFF)
+option(LLAMA_CUDA_FORCE_DMMV                 "llama: use dmmv instead of mmvq CUDA kernels"     OFF)
+option(LLAMA_CUDA_FORCE_MMQ                  "llama: use mmq kernels instead of cuBLAS"         OFF)
+set(LLAMA_CUDA_DMMV_X      "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
+set(LLAMA_CUDA_MMV_Y        "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
+option(LLAMA_CUDA_F16                        "llama: use 16 bit floats for some calculations"   OFF)
+set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
+set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
+                                             "llama: max. batch size for using peer access")
+option(LLAMA_HIPBLAS                         "llama: use hipBLAS"                               OFF)
+option(LLAMA_HIP_UMA                         "llama: use HIP unified memory architecture"       OFF)
+option(LLAMA_CLBLAST                         "llama: use CLBlast"                               OFF)
+option(LLAMA_VULKAN                          "llama: use Vulkan"                                OFF)
+option(LLAMA_VULKAN_CHECK_RESULTS            "llama: run Vulkan op checks"                      OFF)
+option(LLAMA_VULKAN_DEBUG                    "llama: enable Vulkan debug output"                OFF)
+option(LLAMA_VULKAN_VALIDATE                 "llama: enable Vulkan validation"                  OFF)
+option(LLAMA_VULKAN_RUN_TESTS                "llama: run Vulkan tests"                          OFF)
+option(LLAMA_METAL                           "llama: use Metal"                                 ${LLAMA_METAL_DEFAULT})
+option(LLAMA_METAL_NDEBUG                    "llama: disable Metal debugging"                   OFF)
+option(LLAMA_METAL_SHADER_DEBUG              "llama: compile Metal with -fno-fast-math"         OFF)
+option(LLAMA_METAL_EMBED_LIBRARY             "llama: embed Metal library"                       OFF)
+option(LLAMA_KOMPUTE                         "llama: use Kompute"                               OFF)
+option(LLAMA_MPI                             "llama: use MPI"                                   OFF)
+option(LLAMA_QKK_64                          "llama: use super-block size of 64 for k-quants"   OFF)
+option(LLAMA_SYCL                            "llama: use SYCL"                                  OFF)
+option(LLAMA_SYCL_F16                        "llama: use 16 bit floats for sycl calculations"   OFF)
+option(LLAMA_CPU_HBM                         "llama: use memkind for CPU HBM"                   OFF)
+
+option(LLAMA_BUILD_TESTS                     "llama: build tests"    							OFF)
+option(LLAMA_BUILD_EXAMPLES                  "llama: build examples" 							ON)
+option(LLAMA_BUILD_SERVER                    "llama: build server example"                      OFF)
+
+# add perf arguments
+option(LLAMA_PERF                            "llama: enable perf"                               OFF)
+
+include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E)
+
+include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE)
+
+include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D)
+
+include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF)
+
+include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1)
+
+include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587)
+
+include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52)
+
+add_subdirectory(./llama.cpp)
\ No newline at end of file
diff --git a/LLama.GenerateSolution/CMakeLists.txt.in b/LLama.GenerateSolution/CMakeLists.txt.in
new file mode 100644
index 00000000..b84dc1de
--- /dev/null
+++ b/LLama.GenerateSolution/CMakeLists.txt.in
@@ -0,0 +1,126 @@
+#----------------------------------------------------------------------------------------
+# INFO: 
+# - How to use: change the flags in the 'Set one of these ON and all others OFF' section
+# - CUDA: it will use automatically the CUDA SDK version installed
+#
+#----------------------------------------------------------------------------------------
+cmake_minimum_required(VERSION 3.8)
+project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp)
+if(NOT MSVC)
+         message(FATAL_ERROR "This CMake file only works with MSVC.")
+endif(NOT MSVC)
+
+#--------- Set one of these ON and all others OFF ------------------->
+option(LLAMA_CUDA_AVX2           "CUDA + AVX2"          ON)
+option(LLAMA_AVX2                "AVX2 (no CUDA)"       OFF)
+option(LLAMA_CUDA                "CUDA (no AVX)"        OFF)
+#etc... add other setups
+#<--------- Set one of these ON and all others OFF -------------------
+
+# --------------- Don't change below this line -----------------------
+
+# Variable Settings
+if(LLAMA_CUDA_AVX2)
+	option(LLAMA_AVX                    "llama: enable AVX"                                		ON)
+	option(LLAMA_AVX2                   "llama: enable AVX2"                               		ON)
+	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		ON)
+elseif(LLAMA_AVX2)
+	option(LLAMA_AVX                    "llama: enable AVX"                                		ON)
+	option(LLAMA_AVX2                   "llama: enable AVX2"                               		ON)
+	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		OFF)
+elseif(LLAMA_CUDA)
+	option(LLAMA_AVX                    "llama: enable AVX"                                		OFF)
+	option(LLAMA_AVX2                   "llama: enable AVX2"                               		OFF)
+	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		ON)	
+elseif(OTHER_SETUPS)
+	#etc...
+endif()
+
+# Fixed Settings
+# general
+option(BUILD_SHARED_LIBS                "build shared libraries"                                ON)
+option(LLAMA_STATIC                     "llama: static link libraries"                          OFF)
+option(LLAMA_NATIVE                     "llama: enable -march=native flag"                      OFF)
+option(LLAMA_LTO                        "llama: enable link time optimization"                  OFF)
+option(LLAMA_CCACHE                     "llama: use ccache if available"                        ON)
+
+# debug
+option(LLAMA_ALL_WARNINGS               "llama: enable all compiler warnings"                   ON)
+option(LLAMA_ALL_WARNINGS_3RD_PARTY     "llama: enable all compiler warnings in 3rd party libs" OFF)
+option(LLAMA_GPROF                      "llama: enable gprof"                                   OFF)
+
+# build
+option(LLAMA_FATAL_WARNINGS             "llama: enable -Werror flag"                            OFF)
+
+# sanitizers
+option(LLAMA_SANITIZE_THREAD            "llama: enable thread sanitizer"                        OFF)
+option(LLAMA_SANITIZE_ADDRESS           "llama: enable address sanitizer"                       OFF)
+option(LLAMA_SANITIZE_UNDEFINED         "llama: enable undefined sanitizer"                     OFF)
+
+option(LLAMA_AVX512                          "llama: enable AVX512"                             OFF)
+option(LLAMA_AVX512_VBMI                     "llama: enable AVX512-VBMI"                        OFF)
+option(LLAMA_AVX512_VNNI                     "llama: enable AVX512-VNNI"                        OFF)
+option(LLAMA_FMA                             "llama: enable FMA"                                OFF)
+# in MSVC F16C is implied with AVX2/AVX512
+if (NOT MSVC)
+    option(LLAMA_F16C                        "llama: enable F16C"                               OFF)
+endif()
+
+if (WIN32)
+    set(LLAMA_WIN_VER "0x602" CACHE STRING 	  "llama: Windows Version")
+endif()
+
+# 3rd party libs
+option(LLAMA_ACCELERATE                      "llama: enable Accelerate framework"               ON)
+option(LLAMA_BLAS                            "llama: use BLAS"                                  OFF)
+set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
+#option(LLAMA_CUDA_CUBLAS                     "llama: use cuBLAS for prompt processing"          OFF)
+option(LLAMA_CUDA_FORCE_DMMV                 "llama: use dmmv instead of mmvq CUDA kernels"     OFF)
+option(LLAMA_CUDA_FORCE_MMQ                  "llama: use mmq kernels instead of cuBLAS"         OFF)
+set(LLAMA_CUDA_DMMV_X      "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
+set(LLAMA_CUDA_MMV_Y        "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
+option(LLAMA_CUDA_F16                        "llama: use 16 bit floats for some calculations"   OFF)
+set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
+set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
+                                             "llama: max. batch size for using peer access")
+option(LLAMA_HIPBLAS                         "llama: use hipBLAS"                               OFF)
+option(LLAMA_HIP_UMA                         "llama: use HIP unified memory architecture"       OFF)
+option(LLAMA_CLBLAST                         "llama: use CLBlast"                               OFF)
+option(LLAMA_VULKAN                          "llama: use Vulkan"                                OFF)
+option(LLAMA_VULKAN_CHECK_RESULTS            "llama: run Vulkan op checks"                      OFF)
+option(LLAMA_VULKAN_DEBUG                    "llama: enable Vulkan debug output"                OFF)
+option(LLAMA_VULKAN_VALIDATE                 "llama: enable Vulkan validation"                  OFF)
+option(LLAMA_VULKAN_RUN_TESTS                "llama: run Vulkan tests"                          OFF)
+option(LLAMA_METAL                           "llama: use Metal"                                 ${LLAMA_METAL_DEFAULT})
+option(LLAMA_METAL_NDEBUG                    "llama: disable Metal debugging"                   OFF)
+option(LLAMA_METAL_SHADER_DEBUG              "llama: compile Metal with -fno-fast-math"         OFF)
+option(LLAMA_METAL_EMBED_LIBRARY             "llama: embed Metal library"                       OFF)
+option(LLAMA_KOMPUTE                         "llama: use Kompute"                               OFF)
+option(LLAMA_MPI                             "llama: use MPI"                                   OFF)
+option(LLAMA_QKK_64                          "llama: use super-block size of 64 for k-quants"   OFF)
+option(LLAMA_SYCL                            "llama: use SYCL"                                  OFF)
+option(LLAMA_SYCL_F16                        "llama: use 16 bit floats for sycl calculations"   OFF)
+option(LLAMA_CPU_HBM                         "llama: use memkind for CPU HBM"                   OFF)
+
+option(LLAMA_BUILD_TESTS                     "llama: build tests"    							OFF)
+option(LLAMA_BUILD_EXAMPLES                  "llama: build examples" 							ON)
+option(LLAMA_BUILD_SERVER                    "llama: build server example"                      OFF)
+
+# add perf arguments
+option(LLAMA_PERF                            "llama: enable perf"                               OFF)
+
+include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E)
+
+include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE)
+
+include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D)
+
+include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF)
+
+include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1)
+
+include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587)
+
+include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52)
+
+add_subdirectory(./llama.cpp)
\ No newline at end of file
diff --git a/LLama.GenerateSolution/GenerateSolution.csproj b/LLama.GenerateSolution/GenerateSolution.csproj
new file mode 100644
index 00000000..f28f91ba
--- /dev/null
+++ b/LLama.GenerateSolution/GenerateSolution.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net7.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Spectre.Console" Version="0.48.0" />
+  </ItemGroup>
+
+</Project>
diff --git a/LLama.GenerateSolution/GenerateSolution.sln b/LLama.GenerateSolution/GenerateSolution.sln
new file mode 100644
index 00000000..74c9e8e1
--- /dev/null
+++ b/LLama.GenerateSolution/GenerateSolution.sln
@@ -0,0 +1,25 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 17
+VisualStudioVersion = 17.8.34525.116
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GenerateSolution", "GenerateSolution.csproj", "{89306FE9-4428-4C70-AF58-0AF871BED56B}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Any CPU = Debug|Any CPU
+		Release|Any CPU = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.Build.0 = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {32128714-30D0-4015-9184-24F609AAE564}
+	EndGlobalSection
+EndGlobal
diff --git a/LLama.GenerateSolution/Program.cs b/LLama.GenerateSolution/Program.cs
new file mode 100644
index 00000000..ebf082b1
--- /dev/null
+++ b/LLama.GenerateSolution/Program.cs
@@ -0,0 +1,137 @@
+﻿using Spectre.Console;
+using System;
+using System.Diagnostics;
+using System.Text;
+using static System.Runtime.InteropServices.JavaScript.JSType;
+
+namespace GenerateSolution
+{
+    internal class Program
+    {
+        static void Main(string[] args)
+        {
+            System.Console.InputEncoding = Encoding.Unicode;
+            System.Console.OutputEncoding = Encoding.Unicode;
+
+            // Check if we can accept key strokes
+            if (!AnsiConsole.Profile.Capabilities.Interactive)
+            {
+                AnsiConsole.MarkupLine("[red]Environment does not support interaction.[/]");
+                return;
+            }
+
+            var options = AskOptions();
+            var cmakePath = AskCMakePath();
+            if(string.IsNullOrEmpty(cmakePath) == true) 
+            {
+                cmakePath = "C:\\Program Files\\CMake\\bin\\cmake.exe";
+            }
+            AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", cmakePath);
+
+            string cmakeListsPath = @"..\..\..\..\CMakeLists.txt";
+
+            //cmake [<options>] -B <path-to-build> [-S <path-to-source>]
+            //TODO: get the chosen arguments from above (hardcoded values below)
+            //TODO: edit the CMakeList.txt.in template and create the CMakeLists.txt with the chosen options
+            cmakeListsPath += " -G \"Visual Studio 17 2022\" -A x64 -B ..\\..\\..\\..\\ -S ..\\..\\..\\..\\";
+
+            ProcessStartInfo startInfo = new ProcessStartInfo
+            {
+                FileName = cmakePath,
+                Arguments = cmakeListsPath,
+                RedirectStandardOutput = true,
+                RedirectStandardError = true,
+                UseShellExecute = false,
+                CreateNoWindow = true,
+            };
+
+            try
+            {
+                bool bSuccess = false;
+                string lastError = "";
+                AnsiConsole.Progress()
+                    .AutoClear(false)
+                    .Columns(new ProgressColumn[]
+                    {
+                            new TaskDescriptionColumn(),
+                            new SpinnerColumn(Spinner.Known.Ascii),
+                    })
+                    .Start(ctx =>
+                    {
+                        var cmakeTask = ctx.AddTask("Generating VS Solution", autoStart: false).IsIndeterminate();
+                        cmakeTask.StartTask();
+                        using (Process process = new Process())
+                        {
+                            process.StartInfo = startInfo;
+                            process.Start();
+                            string output = process.StandardOutput.ReadToEnd();
+                            lastError = process.StandardError.ReadToEnd();
+                            process.WaitForExit();
+                            cmakeTask.StopTask();
+                            if (process.ExitCode == 0)
+                            {
+                                bSuccess = true;
+                            }
+                        }
+                    });
+
+                if (bSuccess == true)
+                {
+                    AnsiConsole.WriteLine("VS solution generated successfully.");
+                }
+                else
+                {
+                    AnsiConsole.WriteLine($"Error running CMake configuration: {lastError}");
+                }
+            }
+            catch (Exception ex)
+            {
+                AnsiConsole.WriteLine("[red]ERROR[/] " + ex.Message);
+            }
+
+            Console.ReadLine();
+        }
+
+        public static string AskCMakePath()
+        {
+            return AnsiConsole.Prompt(
+                            new TextPrompt<string>("What's your [green]CMake path[/] (default: C:\\Program Files\\CMake\\bin\\cmake.exe)?")
+                                .AllowEmpty());
+        }
+
+        public static List<string> AskOptions()
+        {
+            var options = AnsiConsole.Prompt(
+                new MultiSelectionPrompt<string>()
+                    .PageSize(10)
+                    .Title("Select the preferred [green]options[/]?")
+                    .MoreChoicesText("[grey](Move up and down to reveal more options)[/]")
+                    .InstructionsText("[grey](Press [blue]<space>[/] to toggle an option, [green]<enter>[/] to accept)[/]")
+                    .AddChoiceGroup("Avx", new[]
+                    {
+                        "Avx2", "Avx512"
+                    })
+                    .AddChoiceGroup("Cuda", new[]
+                    {
+                        "Cuda"
+                    })
+                    .AddChoices(new[]
+                    {
+                        "x64",
+                    })
+                    .AddChoiceGroup("Visual Studio", new[]
+                    {
+                        "Visual Studio 16 2019", 
+                        "Visual Studio 17 2022"
+                    })
+                    );
+
+            if (options.Count > 0)
+            {
+                AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", string.Join(",",options));
+            }
+            
+            return options;
+        }
+    }
+}

From 5a4c0d4637dd164850bad40ac98bbe7f8bab6559 Mon Sep 17 00:00:00 2001
From: Zoli Somogyi <zsomogyi.be@gmail.com>
Date: Wed, 24 Apr 2024 07:57:09 +0200
Subject: [PATCH 07/14] Revert "Automatic Solution Generator - Work in
 progress"

This reverts commit b1f3987fae88fa85a9655c12869b9505f58c8d3e.
---
 CMakeLists.txt                                | 126 ----------------
 LLama.GenerateSolution/CMakeLists.txt.in      | 126 ----------------
 .../GenerateSolution.csproj                   |  14 --
 LLama.GenerateSolution/GenerateSolution.sln   |  25 ----
 LLama.GenerateSolution/Program.cs             | 137 ------------------
 5 files changed, 428 deletions(-)
 delete mode 100644 CMakeLists.txt
 delete mode 100644 LLama.GenerateSolution/CMakeLists.txt.in
 delete mode 100644 LLama.GenerateSolution/GenerateSolution.csproj
 delete mode 100644 LLama.GenerateSolution/GenerateSolution.sln
 delete mode 100644 LLama.GenerateSolution/Program.cs

diff --git a/CMakeLists.txt b/CMakeLists.txt
deleted file mode 100644
index b84dc1de..00000000
--- a/CMakeLists.txt
+++ /dev/null
@@ -1,126 +0,0 @@
-#----------------------------------------------------------------------------------------
-# INFO: 
-# - How to use: change the flags in the 'Set one of these ON and all others OFF' section
-# - CUDA: it will use automatically the CUDA SDK version installed
-#
-#----------------------------------------------------------------------------------------
-cmake_minimum_required(VERSION 3.8)
-project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp)
-if(NOT MSVC)
-         message(FATAL_ERROR "This CMake file only works with MSVC.")
-endif(NOT MSVC)
-
-#--------- Set one of these ON and all others OFF ------------------->
-option(LLAMA_CUDA_AVX2           "CUDA + AVX2"          ON)
-option(LLAMA_AVX2                "AVX2 (no CUDA)"       OFF)
-option(LLAMA_CUDA                "CUDA (no AVX)"        OFF)
-#etc... add other setups
-#<--------- Set one of these ON and all others OFF -------------------
-
-# --------------- Don't change below this line -----------------------
-
-# Variable Settings
-if(LLAMA_CUDA_AVX2)
-	option(LLAMA_AVX                    "llama: enable AVX"                                		ON)
-	option(LLAMA_AVX2                   "llama: enable AVX2"                               		ON)
-	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		ON)
-elseif(LLAMA_AVX2)
-	option(LLAMA_AVX                    "llama: enable AVX"                                		ON)
-	option(LLAMA_AVX2                   "llama: enable AVX2"                               		ON)
-	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		OFF)
-elseif(LLAMA_CUDA)
-	option(LLAMA_AVX                    "llama: enable AVX"                                		OFF)
-	option(LLAMA_AVX2                   "llama: enable AVX2"                               		OFF)
-	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		ON)	
-elseif(OTHER_SETUPS)
-	#etc...
-endif()
-
-# Fixed Settings
-# general
-option(BUILD_SHARED_LIBS                "build shared libraries"                                ON)
-option(LLAMA_STATIC                     "llama: static link libraries"                          OFF)
-option(LLAMA_NATIVE                     "llama: enable -march=native flag"                      OFF)
-option(LLAMA_LTO                        "llama: enable link time optimization"                  OFF)
-option(LLAMA_CCACHE                     "llama: use ccache if available"                        ON)
-
-# debug
-option(LLAMA_ALL_WARNINGS               "llama: enable all compiler warnings"                   ON)
-option(LLAMA_ALL_WARNINGS_3RD_PARTY     "llama: enable all compiler warnings in 3rd party libs" OFF)
-option(LLAMA_GPROF                      "llama: enable gprof"                                   OFF)
-
-# build
-option(LLAMA_FATAL_WARNINGS             "llama: enable -Werror flag"                            OFF)
-
-# sanitizers
-option(LLAMA_SANITIZE_THREAD            "llama: enable thread sanitizer"                        OFF)
-option(LLAMA_SANITIZE_ADDRESS           "llama: enable address sanitizer"                       OFF)
-option(LLAMA_SANITIZE_UNDEFINED         "llama: enable undefined sanitizer"                     OFF)
-
-option(LLAMA_AVX512                          "llama: enable AVX512"                             OFF)
-option(LLAMA_AVX512_VBMI                     "llama: enable AVX512-VBMI"                        OFF)
-option(LLAMA_AVX512_VNNI                     "llama: enable AVX512-VNNI"                        OFF)
-option(LLAMA_FMA                             "llama: enable FMA"                                OFF)
-# in MSVC F16C is implied with AVX2/AVX512
-if (NOT MSVC)
-    option(LLAMA_F16C                        "llama: enable F16C"                               OFF)
-endif()
-
-if (WIN32)
-    set(LLAMA_WIN_VER "0x602" CACHE STRING 	  "llama: Windows Version")
-endif()
-
-# 3rd party libs
-option(LLAMA_ACCELERATE                      "llama: enable Accelerate framework"               ON)
-option(LLAMA_BLAS                            "llama: use BLAS"                                  OFF)
-set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
-#option(LLAMA_CUDA_CUBLAS                     "llama: use cuBLAS for prompt processing"          OFF)
-option(LLAMA_CUDA_FORCE_DMMV                 "llama: use dmmv instead of mmvq CUDA kernels"     OFF)
-option(LLAMA_CUDA_FORCE_MMQ                  "llama: use mmq kernels instead of cuBLAS"         OFF)
-set(LLAMA_CUDA_DMMV_X      "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
-set(LLAMA_CUDA_MMV_Y        "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
-option(LLAMA_CUDA_F16                        "llama: use 16 bit floats for some calculations"   OFF)
-set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
-set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
-                                             "llama: max. batch size for using peer access")
-option(LLAMA_HIPBLAS                         "llama: use hipBLAS"                               OFF)
-option(LLAMA_HIP_UMA                         "llama: use HIP unified memory architecture"       OFF)
-option(LLAMA_CLBLAST                         "llama: use CLBlast"                               OFF)
-option(LLAMA_VULKAN                          "llama: use Vulkan"                                OFF)
-option(LLAMA_VULKAN_CHECK_RESULTS            "llama: run Vulkan op checks"                      OFF)
-option(LLAMA_VULKAN_DEBUG                    "llama: enable Vulkan debug output"                OFF)
-option(LLAMA_VULKAN_VALIDATE                 "llama: enable Vulkan validation"                  OFF)
-option(LLAMA_VULKAN_RUN_TESTS                "llama: run Vulkan tests"                          OFF)
-option(LLAMA_METAL                           "llama: use Metal"                                 ${LLAMA_METAL_DEFAULT})
-option(LLAMA_METAL_NDEBUG                    "llama: disable Metal debugging"                   OFF)
-option(LLAMA_METAL_SHADER_DEBUG              "llama: compile Metal with -fno-fast-math"         OFF)
-option(LLAMA_METAL_EMBED_LIBRARY             "llama: embed Metal library"                       OFF)
-option(LLAMA_KOMPUTE                         "llama: use Kompute"                               OFF)
-option(LLAMA_MPI                             "llama: use MPI"                                   OFF)
-option(LLAMA_QKK_64                          "llama: use super-block size of 64 for k-quants"   OFF)
-option(LLAMA_SYCL                            "llama: use SYCL"                                  OFF)
-option(LLAMA_SYCL_F16                        "llama: use 16 bit floats for sycl calculations"   OFF)
-option(LLAMA_CPU_HBM                         "llama: use memkind for CPU HBM"                   OFF)
-
-option(LLAMA_BUILD_TESTS                     "llama: build tests"    							OFF)
-option(LLAMA_BUILD_EXAMPLES                  "llama: build examples" 							ON)
-option(LLAMA_BUILD_SERVER                    "llama: build server example"                      OFF)
-
-# add perf arguments
-option(LLAMA_PERF                            "llama: enable perf"                               OFF)
-
-include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E)
-
-include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE)
-
-include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D)
-
-include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF)
-
-include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1)
-
-include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587)
-
-include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52)
-
-add_subdirectory(./llama.cpp)
\ No newline at end of file
diff --git a/LLama.GenerateSolution/CMakeLists.txt.in b/LLama.GenerateSolution/CMakeLists.txt.in
deleted file mode 100644
index b84dc1de..00000000
--- a/LLama.GenerateSolution/CMakeLists.txt.in
+++ /dev/null
@@ -1,126 +0,0 @@
-#----------------------------------------------------------------------------------------
-# INFO: 
-# - How to use: change the flags in the 'Set one of these ON and all others OFF' section
-# - CUDA: it will use automatically the CUDA SDK version installed
-#
-#----------------------------------------------------------------------------------------
-cmake_minimum_required(VERSION 3.8)
-project(LLamaSharpCpp VERSION 0.10.0 LANGUAGES CXX CSharp)
-if(NOT MSVC)
-         message(FATAL_ERROR "This CMake file only works with MSVC.")
-endif(NOT MSVC)
-
-#--------- Set one of these ON and all others OFF ------------------->
-option(LLAMA_CUDA_AVX2           "CUDA + AVX2"          ON)
-option(LLAMA_AVX2                "AVX2 (no CUDA)"       OFF)
-option(LLAMA_CUDA                "CUDA (no AVX)"        OFF)
-#etc... add other setups
-#<--------- Set one of these ON and all others OFF -------------------
-
-# --------------- Don't change below this line -----------------------
-
-# Variable Settings
-if(LLAMA_CUDA_AVX2)
-	option(LLAMA_AVX                    "llama: enable AVX"                                		ON)
-	option(LLAMA_AVX2                   "llama: enable AVX2"                               		ON)
-	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		ON)
-elseif(LLAMA_AVX2)
-	option(LLAMA_AVX                    "llama: enable AVX"                                		ON)
-	option(LLAMA_AVX2                   "llama: enable AVX2"                               		ON)
-	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		OFF)
-elseif(LLAMA_CUDA)
-	option(LLAMA_AVX                    "llama: enable AVX"                                		OFF)
-	option(LLAMA_AVX2                   "llama: enable AVX2"                               		OFF)
-	option(LLAMA_CUBLAS                 "llama: use CUDA"                                  		ON)	
-elseif(OTHER_SETUPS)
-	#etc...
-endif()
-
-# Fixed Settings
-# general
-option(BUILD_SHARED_LIBS                "build shared libraries"                                ON)
-option(LLAMA_STATIC                     "llama: static link libraries"                          OFF)
-option(LLAMA_NATIVE                     "llama: enable -march=native flag"                      OFF)
-option(LLAMA_LTO                        "llama: enable link time optimization"                  OFF)
-option(LLAMA_CCACHE                     "llama: use ccache if available"                        ON)
-
-# debug
-option(LLAMA_ALL_WARNINGS               "llama: enable all compiler warnings"                   ON)
-option(LLAMA_ALL_WARNINGS_3RD_PARTY     "llama: enable all compiler warnings in 3rd party libs" OFF)
-option(LLAMA_GPROF                      "llama: enable gprof"                                   OFF)
-
-# build
-option(LLAMA_FATAL_WARNINGS             "llama: enable -Werror flag"                            OFF)
-
-# sanitizers
-option(LLAMA_SANITIZE_THREAD            "llama: enable thread sanitizer"                        OFF)
-option(LLAMA_SANITIZE_ADDRESS           "llama: enable address sanitizer"                       OFF)
-option(LLAMA_SANITIZE_UNDEFINED         "llama: enable undefined sanitizer"                     OFF)
-
-option(LLAMA_AVX512                          "llama: enable AVX512"                             OFF)
-option(LLAMA_AVX512_VBMI                     "llama: enable AVX512-VBMI"                        OFF)
-option(LLAMA_AVX512_VNNI                     "llama: enable AVX512-VNNI"                        OFF)
-option(LLAMA_FMA                             "llama: enable FMA"                                OFF)
-# in MSVC F16C is implied with AVX2/AVX512
-if (NOT MSVC)
-    option(LLAMA_F16C                        "llama: enable F16C"                               OFF)
-endif()
-
-if (WIN32)
-    set(LLAMA_WIN_VER "0x602" CACHE STRING 	  "llama: Windows Version")
-endif()
-
-# 3rd party libs
-option(LLAMA_ACCELERATE                      "llama: enable Accelerate framework"               ON)
-option(LLAMA_BLAS                            "llama: use BLAS"                                  OFF)
-set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
-#option(LLAMA_CUDA_CUBLAS                     "llama: use cuBLAS for prompt processing"          OFF)
-option(LLAMA_CUDA_FORCE_DMMV                 "llama: use dmmv instead of mmvq CUDA kernels"     OFF)
-option(LLAMA_CUDA_FORCE_MMQ                  "llama: use mmq kernels instead of cuBLAS"         OFF)
-set(LLAMA_CUDA_DMMV_X      "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
-set(LLAMA_CUDA_MMV_Y        "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
-option(LLAMA_CUDA_F16                        "llama: use 16 bit floats for some calculations"   OFF)
-set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
-set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
-                                             "llama: max. batch size for using peer access")
-option(LLAMA_HIPBLAS                         "llama: use hipBLAS"                               OFF)
-option(LLAMA_HIP_UMA                         "llama: use HIP unified memory architecture"       OFF)
-option(LLAMA_CLBLAST                         "llama: use CLBlast"                               OFF)
-option(LLAMA_VULKAN                          "llama: use Vulkan"                                OFF)
-option(LLAMA_VULKAN_CHECK_RESULTS            "llama: run Vulkan op checks"                      OFF)
-option(LLAMA_VULKAN_DEBUG                    "llama: enable Vulkan debug output"                OFF)
-option(LLAMA_VULKAN_VALIDATE                 "llama: enable Vulkan validation"                  OFF)
-option(LLAMA_VULKAN_RUN_TESTS                "llama: run Vulkan tests"                          OFF)
-option(LLAMA_METAL                           "llama: use Metal"                                 ${LLAMA_METAL_DEFAULT})
-option(LLAMA_METAL_NDEBUG                    "llama: disable Metal debugging"                   OFF)
-option(LLAMA_METAL_SHADER_DEBUG              "llama: compile Metal with -fno-fast-math"         OFF)
-option(LLAMA_METAL_EMBED_LIBRARY             "llama: embed Metal library"                       OFF)
-option(LLAMA_KOMPUTE                         "llama: use Kompute"                               OFF)
-option(LLAMA_MPI                             "llama: use MPI"                                   OFF)
-option(LLAMA_QKK_64                          "llama: use super-block size of 64 for k-quants"   OFF)
-option(LLAMA_SYCL                            "llama: use SYCL"                                  OFF)
-option(LLAMA_SYCL_F16                        "llama: use 16 bit floats for sycl calculations"   OFF)
-option(LLAMA_CPU_HBM                         "llama: use memkind for CPU HBM"                   OFF)
-
-option(LLAMA_BUILD_TESTS                     "llama: build tests"    							OFF)
-option(LLAMA_BUILD_EXAMPLES                  "llama: build examples" 							ON)
-option(LLAMA_BUILD_SERVER                    "llama: build server example"                      OFF)
-
-# add perf arguments
-option(LLAMA_PERF                            "llama: enable perf"                               OFF)
-
-include_external_msproject(LLama.Unittest ./LLama.Unittest/LLama.Unittest.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BAC1CFA9-E6AC-4BD0-A548-A8066D3C467E)
-
-include_external_msproject(LLama.Examples ./LLama.Examples/LLama.Examples.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE BD1909AD-E1F8-476E-BC49-E394FF0470CE)
-
-include_external_msproject(LLamaSharp ./LLama/LLamaSharp.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE 01A12D68-DE95-425E-AEEE-2D099305036D)
-
-include_external_msproject(LLama.WebAPI ./LLama.WebAPI/LLama.WebAPI.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D3CEC57A-9027-4DA4-AAAC-612A1EB50ADF)
-
-include_external_msproject(LLama.Web ./LLama.Web/LLama.Web.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE C3531DB2-1B2B-433C-8DE6-3541E3620DB1)
-
-include_external_msproject(LLamaSharp.SemanticKernel ./LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE D98F93E3-B344-4F9D-86BB-FDBF6768B587)
-
-include_external_msproject(LLamaSharp.KernelMemory ./LLama.KernelMemory/LLamaSharp.KernelMemory.csproj GUID 9A19103F-16F7-4668-BE54-9A1E7A4F7556 TYPE E5589AE7-B86F-4343-A1CC-8E5D34596E52)
-
-add_subdirectory(./llama.cpp)
\ No newline at end of file
diff --git a/LLama.GenerateSolution/GenerateSolution.csproj b/LLama.GenerateSolution/GenerateSolution.csproj
deleted file mode 100644
index f28f91ba..00000000
--- a/LLama.GenerateSolution/GenerateSolution.csproj
+++ /dev/null
@@ -1,14 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <TargetFramework>net7.0</TargetFramework>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-  </PropertyGroup>
-
-  <ItemGroup>
-    <PackageReference Include="Spectre.Console" Version="0.48.0" />
-  </ItemGroup>
-
-</Project>
diff --git a/LLama.GenerateSolution/GenerateSolution.sln b/LLama.GenerateSolution/GenerateSolution.sln
deleted file mode 100644
index 74c9e8e1..00000000
--- a/LLama.GenerateSolution/GenerateSolution.sln
+++ /dev/null
@@ -1,25 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 17
-VisualStudioVersion = 17.8.34525.116
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GenerateSolution", "GenerateSolution.csproj", "{89306FE9-4428-4C70-AF58-0AF871BED56B}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
-		Release|Any CPU = Release|Any CPU
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{89306FE9-4428-4C70-AF58-0AF871BED56B}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{89306FE9-4428-4C70-AF58-0AF871BED56B}.Release|Any CPU.Build.0 = Release|Any CPU
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-	GlobalSection(ExtensibilityGlobals) = postSolution
-		SolutionGuid = {32128714-30D0-4015-9184-24F609AAE564}
-	EndGlobalSection
-EndGlobal
diff --git a/LLama.GenerateSolution/Program.cs b/LLama.GenerateSolution/Program.cs
deleted file mode 100644
index ebf082b1..00000000
--- a/LLama.GenerateSolution/Program.cs
+++ /dev/null
@@ -1,137 +0,0 @@
-﻿using Spectre.Console;
-using System;
-using System.Diagnostics;
-using System.Text;
-using static System.Runtime.InteropServices.JavaScript.JSType;
-
-namespace GenerateSolution
-{
-    internal class Program
-    {
-        static void Main(string[] args)
-        {
-            System.Console.InputEncoding = Encoding.Unicode;
-            System.Console.OutputEncoding = Encoding.Unicode;
-
-            // Check if we can accept key strokes
-            if (!AnsiConsole.Profile.Capabilities.Interactive)
-            {
-                AnsiConsole.MarkupLine("[red]Environment does not support interaction.[/]");
-                return;
-            }
-
-            var options = AskOptions();
-            var cmakePath = AskCMakePath();
-            if(string.IsNullOrEmpty(cmakePath) == true) 
-            {
-                cmakePath = "C:\\Program Files\\CMake\\bin\\cmake.exe";
-            }
-            AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", cmakePath);
-
-            string cmakeListsPath = @"..\..\..\..\CMakeLists.txt";
-
-            //cmake [<options>] -B <path-to-build> [-S <path-to-source>]
-            //TODO: get the chosen arguments from above (hardcoded values below)
-            //TODO: edit the CMakeList.txt.in template and create the CMakeLists.txt with the chosen options
-            cmakeListsPath += " -G \"Visual Studio 17 2022\" -A x64 -B ..\\..\\..\\..\\ -S ..\\..\\..\\..\\";
-
-            ProcessStartInfo startInfo = new ProcessStartInfo
-            {
-                FileName = cmakePath,
-                Arguments = cmakeListsPath,
-                RedirectStandardOutput = true,
-                RedirectStandardError = true,
-                UseShellExecute = false,
-                CreateNoWindow = true,
-            };
-
-            try
-            {
-                bool bSuccess = false;
-                string lastError = "";
-                AnsiConsole.Progress()
-                    .AutoClear(false)
-                    .Columns(new ProgressColumn[]
-                    {
-                            new TaskDescriptionColumn(),
-                            new SpinnerColumn(Spinner.Known.Ascii),
-                    })
-                    .Start(ctx =>
-                    {
-                        var cmakeTask = ctx.AddTask("Generating VS Solution", autoStart: false).IsIndeterminate();
-                        cmakeTask.StartTask();
-                        using (Process process = new Process())
-                        {
-                            process.StartInfo = startInfo;
-                            process.Start();
-                            string output = process.StandardOutput.ReadToEnd();
-                            lastError = process.StandardError.ReadToEnd();
-                            process.WaitForExit();
-                            cmakeTask.StopTask();
-                            if (process.ExitCode == 0)
-                            {
-                                bSuccess = true;
-                            }
-                        }
-                    });
-
-                if (bSuccess == true)
-                {
-                    AnsiConsole.WriteLine("VS solution generated successfully.");
-                }
-                else
-                {
-                    AnsiConsole.WriteLine($"Error running CMake configuration: {lastError}");
-                }
-            }
-            catch (Exception ex)
-            {
-                AnsiConsole.WriteLine("[red]ERROR[/] " + ex.Message);
-            }
-
-            Console.ReadLine();
-        }
-
-        public static string AskCMakePath()
-        {
-            return AnsiConsole.Prompt(
-                            new TextPrompt<string>("What's your [green]CMake path[/] (default: C:\\Program Files\\CMake\\bin\\cmake.exe)?")
-                                .AllowEmpty());
-        }
-
-        public static List<string> AskOptions()
-        {
-            var options = AnsiConsole.Prompt(
-                new MultiSelectionPrompt<string>()
-                    .PageSize(10)
-                    .Title("Select the preferred [green]options[/]?")
-                    .MoreChoicesText("[grey](Move up and down to reveal more options)[/]")
-                    .InstructionsText("[grey](Press [blue]<space>[/] to toggle an option, [green]<enter>[/] to accept)[/]")
-                    .AddChoiceGroup("Avx", new[]
-                    {
-                        "Avx2", "Avx512"
-                    })
-                    .AddChoiceGroup("Cuda", new[]
-                    {
-                        "Cuda"
-                    })
-                    .AddChoices(new[]
-                    {
-                        "x64",
-                    })
-                    .AddChoiceGroup("Visual Studio", new[]
-                    {
-                        "Visual Studio 16 2019", 
-                        "Visual Studio 17 2022"
-                    })
-                    );
-
-            if (options.Count > 0)
-            {
-                AnsiConsole.MarkupLine("You have selected: [yellow]{0}[/]", string.Join(",",options));
-            }
-            
-            return options;
-        }
-    }
-}

From 8ea82bcc2855abaca3fc7a0e7c8cb7bb152585a1 Mon Sep 17 00:00:00 2001
From: Zoli Somogyi <zsomogyi.be@gmail.com>
Date: Wed, 24 Apr 2024 07:57:12 +0200
Subject: [PATCH 08/14] Revert "Embeddings correction"

This reverts commit 3ded2dd74d2200522787a1ac2f0484b3251182c4.
---
 LLama/LLamaEmbedder.cs | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs
index c29b6b25..f60f3cd5 100644
--- a/LLama/LLamaEmbedder.cs
+++ b/LLama/LLamaEmbedder.cs
@@ -97,15 +97,18 @@ namespace LLama
 
         private float[] GetEmbeddingsArray()
         {
-            var embeddings = NativeApi.llama_get_embeddings(Context.NativeHandle);
-            if (embeddings == null || embeddings.Length == 0)
+            unsafe
             {
-                embeddings = NativeApi.llama_get_embeddings_seq(Context.NativeHandle, LLamaSeqId.Zero);
-                if (embeddings == null || embeddings.Length == 0)
+                var embeddings = NativeApi.llama_get_embeddings(Context.NativeHandle);
+
+                if (embeddings == null)
+                    embeddings = NativeApi.llama_get_embeddings_seq(Context.NativeHandle, LLamaSeqId.Zero);
+
+                if (embeddings == null)
                     return Array.Empty<float>();
-            }
 
-            return embeddings.ToArray();
+                return new Span<float>(embeddings, Context.EmbeddingSize).ToArray();
+            }
         }
 
         private static void Normalize(Span<float> embeddings)
@@ -116,6 +119,7 @@ namespace LLama
                 lengthSqr += value * value;
             var length = (float)Math.Sqrt(lengthSqr);
 
+            // Do not divide by length if it is zero
             if (length <= float.Epsilon)
                 return;
 

From 6bd269da60cc3bbb56d6d2e0a1a1b1eadbaf3b91 Mon Sep 17 00:00:00 2001
From: Zoli Somogyi <zsomogyi.be@gmail.com>
Date: Wed, 24 Apr 2024 07:57:15 +0200
Subject: [PATCH 09/14] Revert "Simplifying image handling"

This reverts commit f264024666d6ff0557215f9b17d19a4ca8b3056c.
---
 .../Examples/LlavaInteractiveModeExecute.cs   |  2 +-
 LLama/Abstractions/ILLamaExecutor.cs          | 44 ++++++++++++++++++-
 LLama/LLamaStatelessExecutor.cs               |  4 +-
 3 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
index 507f041b..8cfa7376 100644
--- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
+++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
@@ -102,7 +102,7 @@ namespace LLama.Examples.Examples
                     //
                     foreach (var image in imagePaths)
                     {
-                        ex.Images.Add(File.ReadAllBytes(image));
+                        ex.Images.Add(new ImageData(ImageData.DataType.ImagePath, image));
                     }
                 }
 
diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs
index d6c8d2ce..977cbc5e 100644
--- a/LLama/Abstractions/ILLamaExecutor.cs
+++ b/LLama/Abstractions/ILLamaExecutor.cs
@@ -27,7 +27,7 @@ namespace LLama.Abstractions
         /// <summary>
         /// List of images: Image filen path, uri or image byte array. See ImageData.
         /// </summary>
-        public List<byte[]> Images { get; }
+        public List<ImageData> Images { get; }
 
         /// <summary>
         /// Asynchronously infers a response from the model.
@@ -38,4 +38,46 @@ namespace LLama.Abstractions
         /// <returns></returns>
         IAsyncEnumerable<string> InferAsync(string text, IInferenceParams? inferenceParams = null, CancellationToken token = default);
     }
+
+    /// <summary>
+    /// Holds image data
+    /// </summary>
+    public class ImageData
+    {
+        /// <summary>
+        /// constructor
+        /// </summary>
+        /// <param name="type"></param>
+        /// <param name="data"></param>
+        public ImageData(DataType type, object data) { Type = type; Data = data; }
+
+        /// <summary>
+        /// the possible types of image data
+        /// </summary>
+        public enum DataType 
+        { 
+            /// <summary>
+            /// file path
+            /// </summary>
+            ImagePath, 
+            /// <summary>
+            /// byte array
+            /// </summary>
+            ImageBytes, 
+            /// <summary>
+            /// uri
+            /// </summary>
+            ImageURL 
+        }
+
+        /// <summary>
+        /// the type of this image data
+        /// </summary>
+        public DataType Type { get; set; }
+
+        /// <summary>
+        /// the image data (string, byte array or uri)
+        /// </summary>
+        public object? Data { get; set; }
+    }
 }
diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs
index f9d6ca5b..9d2f8c78 100644
--- a/LLama/LLamaStatelessExecutor.cs
+++ b/LLama/LLamaStatelessExecutor.cs
@@ -34,7 +34,7 @@ namespace LLama
         public LLavaWeights? ClipModel { get;  }
 
         /// <inheritdoc />
-        public List<byte[]> Images { get; set; }
+        public List<ImageData> Images { get; set; }
 
         /// <summary>
         /// The context used by the executor when running the inference.
@@ -49,7 +49,7 @@ namespace LLama
         /// <param name="logger"></param>
         public StatelessExecutor(LLamaWeights weights, IContextParams @params, ILogger? logger = null)
         {
-            Images = new List<byte[]>();
+            Images = new List<ImageData>();
             _weights = weights;
             _params = @params;
             _logger = logger;

From 156d7bb4636646c3d595d697d70d0f2a6a61f3fa Mon Sep 17 00:00:00 2001
From: Zoli Somogyi <zsomogyi.be@gmail.com>
Date: Wed, 24 Apr 2024 07:57:17 +0200
Subject: [PATCH 10/14] Revert "Standardizing Image Data implementation"

This reverts commit b2423fe6e9fd468a3c435b3e714dd05bac4c54d9.
---
 .../Examples/LlavaInteractiveModeExecute.cs   | 31 ++++++-------
 LLama/Abstractions/ILLamaExecutor.cs          | 46 +------------------
 LLama/LLamaStatelessExecutor.cs               |  8 ++--
 3 files changed, 20 insertions(+), 65 deletions(-)

diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
index 8cfa7376..112fe23f 100644
--- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
+++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
@@ -1,8 +1,7 @@
 ﻿using System.Text.RegularExpressions;
-using LLama.Batched;
 using LLama.Common;
 using Spectre.Console;
-using LLama.Abstractions;
+using LLama.Native;
 
 namespace LLama.Examples.Examples
 {
@@ -19,12 +18,8 @@ namespace LLama.Examples.Examples
 
             var prompt = $"{{{modelImage}}}\nUSER:\nProvide a full description of the image.\nASSISTANT:\n";
 
-            var parameters = new ModelParams(modelPath)
-            {
-                ContextSize = 4096,
-                Seed = 1337,
-                GpuLayerCount = 10
-            };
+            var parameters = new ModelParams(modelPath);
+
             using var model = LLamaWeights.LoadFromFile(parameters);
             using var context = model.CreateContext(parameters);
             
@@ -47,16 +42,16 @@ namespace LLama.Examples.Examples
                 var imageMatches = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value);
                 var imageCount = imageMatches.Count();
                 var hasImages = imageCount > 0;
-                byte[][] imageBytes = null;
 
                 if (hasImages)
                 {
                     var imagePathsWithCurlyBraces = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value);
-                    var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value);
+                    var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value).ToList();
 
+                    List<byte[]> imageBytes;
                     try
                     {
-                        imageBytes = imagePaths.Select(File.ReadAllBytes).ToArray();
+                        imageBytes = imagePaths.Select(File.ReadAllBytes).ToList();
                     }
                     catch (IOException exception)
                     {
@@ -69,15 +64,17 @@ namespace LLama.Examples.Examples
                         break;
                     }
 
+                    // Each prompt with images we clear cache
+                    // When the prompt contains images we clear KV_CACHE to restart conversation
+                    // See:
+                    // https://github.com/ggerganov/llama.cpp/discussions/3620
+                    ex.Context.NativeHandle.KvCacheRemove( LLamaSeqId.Zero, -1, -1 );
 
                     int index = 0;
                     foreach (var path in imagePathsWithCurlyBraces)
                     {
                         // First image replace to tag <image, the rest of the images delete the tag
-                        if (index++ == 0)
-                            prompt = prompt.Replace(path, "<image>");
-                        else
-                            prompt = prompt.Replace(path, "");
+                        prompt = prompt.Replace(path, index++ == 0 ? "<image>" : "");
                     }
 
                   
@@ -102,7 +99,7 @@ namespace LLama.Examples.Examples
                     //
                     foreach (var image in imagePaths)
                     {
-                        ex.Images.Add(new ImageData(ImageData.DataType.ImagePath, image));
+                        ex.Images.Add(await File.ReadAllBytesAsync(image));
                     }
                 }
 
@@ -118,7 +115,7 @@ namespace LLama.Examples.Examples
                 
                 // let the user finish with exit
                 //
-                if (prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase))
+                if (prompt != null && prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase))
                     break;
 
             }
diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs
index 977cbc5e..574a27d8 100644
--- a/LLama/Abstractions/ILLamaExecutor.cs
+++ b/LLama/Abstractions/ILLamaExecutor.cs
@@ -25,9 +25,9 @@ namespace LLama.Abstractions
         public LLavaWeights? ClipModel { get;  }
 
         /// <summary>
-        /// List of images: Image filen path, uri or image byte array. See ImageData.
+        /// List of images: List of images in byte array format.
         /// </summary>
-        public List<ImageData> Images { get; }
+        public List<byte[]> Images { get; }
 
         /// <summary>
         /// Asynchronously infers a response from the model.
@@ -38,46 +38,4 @@ namespace LLama.Abstractions
         /// <returns></returns>
         IAsyncEnumerable<string> InferAsync(string text, IInferenceParams? inferenceParams = null, CancellationToken token = default);
     }
-
-    /// <summary>
-    /// Holds image data
-    /// </summary>
-    public class ImageData
-    {
-        /// <summary>
-        /// constructor
-        /// </summary>
-        /// <param name="type"></param>
-        /// <param name="data"></param>
-        public ImageData(DataType type, object data) { Type = type; Data = data; }
-
-        /// <summary>
-        /// the possible types of image data
-        /// </summary>
-        public enum DataType 
-        { 
-            /// <summary>
-            /// file path
-            /// </summary>
-            ImagePath, 
-            /// <summary>
-            /// byte array
-            /// </summary>
-            ImageBytes, 
-            /// <summary>
-            /// uri
-            /// </summary>
-            ImageURL 
-        }
-
-        /// <summary>
-        /// the type of this image data
-        /// </summary>
-        public DataType Type { get; set; }
-
-        /// <summary>
-        /// the image data (string, byte array or uri)
-        /// </summary>
-        public object? Data { get; set; }
-    }
 }
diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs
index 9d2f8c78..a3c52a02 100644
--- a/LLama/LLamaStatelessExecutor.cs
+++ b/LLama/LLamaStatelessExecutor.cs
@@ -34,7 +34,7 @@ namespace LLama
         public LLavaWeights? ClipModel { get;  }
 
         /// <inheritdoc />
-        public List<ImageData> Images { get; set; }
+        public List<byte[]> Images { get; set; }
 
         /// <summary>
         /// The context used by the executor when running the inference.
@@ -49,7 +49,7 @@ namespace LLama
         /// <param name="logger"></param>
         public StatelessExecutor(LLamaWeights weights, IContextParams @params, ILogger? logger = null)
         {
-            Images = new List<ImageData>();
+            Images = new List<byte[]>();
             _weights = weights;
             _params = @params;
             _logger = logger;
@@ -90,7 +90,7 @@ namespace LLama
                 lastTokens.Add(0);
 
             // Tokenize the prompt
-            var tokens = Context.Tokenize(prompt).ToList();
+            var tokens = Context.Tokenize(prompt, special: true).ToList();
             lastTokens.AddRange(tokens);
 
             // Evaluate the prompt, in chunks smaller than the max batch size
@@ -124,7 +124,7 @@ namespace LLama
                 }
 
                 // Check if this is the EOS token
-                if (id == _weights.EndOfSentenceToken)
+                if (id == _weights.Tokens.EOS)
                     break;
 
                 // Decode this token into text

From ab8dd0dfc7604249b70cf73334245e953377949f Mon Sep 17 00:00:00 2001
From: Zoli Somogyi <zsomogyi.be@gmail.com>
Date: Wed, 24 Apr 2024 08:06:40 +0200
Subject: [PATCH 11/14] Correcting non-standard way of working with
 PromptExecutionSettings

The extension of PromptExecutionSettings is not only for ChatCompletion, but also for text completion and text embedding.
---
 .../Examples/SemanticKernelPrompt.cs          |  4 +--
 .../LLamaSharpChatCompletion.cs               | 12 ++++----
 .../ChatRequestSettings.cs                    | 30 +++++++++++++------
 .../ChatRequestSettingsConverter.cs           | 10 +++----
 LLama.SemanticKernel/ExtensionMethods.cs      |  7 ++---
 .../LLamaSharpTextCompletion.cs               |  5 ++--
 .../ChatRequestSettingsConverterTests.cs      | 15 +++++-----
 .../ChatRequestSettingsTests.cs               | 16 +++++-----
 .../SemanticKernel/ExtensionMethodsTests.cs   |  2 +-
 9 files changed, 56 insertions(+), 45 deletions(-)
 rename LLama.SemanticKernel/{ChatCompletion => }/ChatRequestSettings.cs (76%)
 rename LLama.SemanticKernel/{ChatCompletion => }/ChatRequestSettingsConverter.cs (88%)

diff --git a/LLama.Examples/Examples/SemanticKernelPrompt.cs b/LLama.Examples/Examples/SemanticKernelPrompt.cs
index fdf58b3a..38002d3d 100644
--- a/LLama.Examples/Examples/SemanticKernelPrompt.cs
+++ b/LLama.Examples/Examples/SemanticKernelPrompt.cs
@@ -1,9 +1,9 @@
 ﻿using LLama.Common;
-using LLamaSharp.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel;
 using LLamaSharp.SemanticKernel.TextCompletion;
 using Microsoft.SemanticKernel.TextGeneration;
 using Microsoft.Extensions.DependencyInjection;
+using LLamaSharp.SemanticKernel;
 
 namespace LLama.Examples.Examples
 {
@@ -31,7 +31,7 @@ namespace LLama.Examples.Examples
 
 One line TLDR with the fewest words.";
 
-            ChatRequestSettings settings = new() { MaxTokens = 100 };
+            LLamaSharpPromptExecutionSettings settings = new() { MaxTokens = 100 };
             var summarize = kernel.CreateFunctionFromPrompt(prompt, settings);
 
             string text1 = @"
diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
index 7bcbaf7b..26ecdccc 100644
--- a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
+++ b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
@@ -17,7 +17,7 @@ namespace LLamaSharp.SemanticKernel.ChatCompletion;
 public sealed class LLamaSharpChatCompletion : IChatCompletionService
 {
     private readonly ILLamaExecutor _model;
-    private ChatRequestSettings defaultRequestSettings;
+    private LLamaSharpPromptExecutionSettings defaultRequestSettings;
     private readonly IHistoryTransform historyTransform;
     private readonly ITextStreamTransform outputTransform;
 
@@ -25,9 +25,9 @@ public sealed class LLamaSharpChatCompletion : IChatCompletionService
 
     public IReadOnlyDictionary<string, object?> Attributes => this._attributes;
 
-    static ChatRequestSettings GetDefaultSettings()
+    static LLamaSharpPromptExecutionSettings GetDefaultSettings()
     {
-        return new ChatRequestSettings
+        return new LLamaSharpPromptExecutionSettings
         {
             MaxTokens = 256,
             Temperature = 0,
@@ -37,7 +37,7 @@ public sealed class LLamaSharpChatCompletion : IChatCompletionService
     }
 
     public LLamaSharpChatCompletion(ILLamaExecutor model,
-        ChatRequestSettings? defaultRequestSettings = default,
+        LLamaSharpPromptExecutionSettings? defaultRequestSettings = default,
         IHistoryTransform? historyTransform = null,
         ITextStreamTransform? outputTransform = null)
     {
@@ -65,7 +65,7 @@ public sealed class LLamaSharpChatCompletion : IChatCompletionService
     public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
     {
         var settings = executionSettings != null
-           ? ChatRequestSettings.FromRequestSettings(executionSettings)
+           ? LLamaSharpPromptExecutionSettings.FromRequestSettings(executionSettings)
            : defaultRequestSettings;
         var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory());
 
@@ -86,7 +86,7 @@ public sealed class LLamaSharpChatCompletion : IChatCompletionService
     public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
     {
         var settings = executionSettings != null
-          ? ChatRequestSettings.FromRequestSettings(executionSettings)
+          ? LLamaSharpPromptExecutionSettings.FromRequestSettings(executionSettings)
           : defaultRequestSettings;
         var prompt = historyTransform.HistoryToText(chatHistory.ToLLamaSharpChatHistory());
 
diff --git a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs b/LLama.SemanticKernel/ChatRequestSettings.cs
similarity index 76%
rename from LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs
rename to LLama.SemanticKernel/ChatRequestSettings.cs
index ac22e1fc..87dda39e 100644
--- a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs
+++ b/LLama.SemanticKernel/ChatRequestSettings.cs
@@ -1,10 +1,22 @@
-﻿using Microsoft.SemanticKernel;
+﻿
+/* Unmerged change from project 'LLamaSharp.SemanticKernel (netstandard2.0)'
+Before:
+using Microsoft.SemanticKernel;
+After:
+using LLamaSharp;
+using LLamaSharp.SemanticKernel;
+using LLamaSharp.SemanticKernel;
+using LLamaSharp.SemanticKernel.ChatCompletion;
+using Microsoft.SemanticKernel;
+*/
+using LLamaSharp.SemanticKernel.ChatCompletion;
+using Microsoft.SemanticKernel;
 using System.Text.Json;
 using System.Text.Json.Serialization;
 
-namespace LLamaSharp.SemanticKernel.ChatCompletion;
+namespace LLamaSharp.SemanticKernel;
 
-public class ChatRequestSettings : PromptExecutionSettings
+public class LLamaSharpPromptExecutionSettings : PromptExecutionSettings
 {
     /// <summary>
     /// Temperature controls the randomness of the completion.
@@ -68,30 +80,30 @@ public class ChatRequestSettings : PromptExecutionSettings
     /// <param name="requestSettings">Template configuration</param>
     /// <param name="defaultMaxTokens">Default max tokens</param>
     /// <returns>An instance of OpenAIRequestSettings</returns>
-    public static ChatRequestSettings FromRequestSettings(PromptExecutionSettings? requestSettings, int? defaultMaxTokens = null)
+    public static LLamaSharpPromptExecutionSettings FromRequestSettings(PromptExecutionSettings? requestSettings, int? defaultMaxTokens = null)
     {
         if (requestSettings is null)
         {
-            return new ChatRequestSettings()
+            return new LLamaSharpPromptExecutionSettings()
             {
                 MaxTokens = defaultMaxTokens
             };
         }
 
-        if (requestSettings is ChatRequestSettings requestSettingsChatRequestSettings)
+        if (requestSettings is LLamaSharpPromptExecutionSettings requestSettingsChatRequestSettings)
         {
             return requestSettingsChatRequestSettings;
         }
 
         var json = JsonSerializer.Serialize(requestSettings);
-        var chatRequestSettings = JsonSerializer.Deserialize<ChatRequestSettings>(json, s_options);
+        var chatRequestSettings = JsonSerializer.Deserialize<LLamaSharpPromptExecutionSettings>(json, s_options);
 
         if (chatRequestSettings is not null)
         {
             return chatRequestSettings;
         }
 
-        throw new ArgumentException($"Invalid request settings, cannot convert to {nameof(ChatRequestSettings)}", nameof(requestSettings));
+        throw new ArgumentException($"Invalid request settings, cannot convert to {nameof(LLamaSharpPromptExecutionSettings)}", nameof(requestSettings));
     }
 
     private static readonly JsonSerializerOptions s_options = CreateOptions();
@@ -105,7 +117,7 @@ public class ChatRequestSettings : PromptExecutionSettings
             AllowTrailingCommas = true,
             PropertyNameCaseInsensitive = true,
             ReadCommentHandling = JsonCommentHandling.Skip,
-            Converters = { new ChatRequestSettingsConverter() }
+            Converters = { new LLamaSharpPromptExecutionSettingsConverter() }
         };
 
         return options;
diff --git a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs b/LLama.SemanticKernel/ChatRequestSettingsConverter.cs
similarity index 88%
rename from LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs
rename to LLama.SemanticKernel/ChatRequestSettingsConverter.cs
index e320ea3f..36ca9c6c 100644
--- a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs
+++ b/LLama.SemanticKernel/ChatRequestSettingsConverter.cs
@@ -3,17 +3,17 @@ using System.Collections.Generic;
 using System.Text.Json;
 using System.Text.Json.Serialization;
 
-namespace LLamaSharp.SemanticKernel.ChatCompletion;
+namespace LLamaSharp.SemanticKernel;
 
 /// <summary>
 /// JSON converter for <see cref="OpenAIRequestSettings"/>
 /// </summary>
-public class ChatRequestSettingsConverter : JsonConverter<ChatRequestSettings>
+public class LLamaSharpPromptExecutionSettingsConverter : JsonConverter<LLamaSharpPromptExecutionSettings>
 {
     /// <inheritdoc/>
-    public override ChatRequestSettings? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
+    public override LLamaSharpPromptExecutionSettings? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
     {
-        var requestSettings = new ChatRequestSettings();
+        var requestSettings = new LLamaSharpPromptExecutionSettings();
 
         while (reader.Read() && reader.TokenType != JsonTokenType.EndObject)
         {
@@ -77,7 +77,7 @@ public class ChatRequestSettingsConverter : JsonConverter<ChatRequestSettings>
     }
 
     /// <inheritdoc/>
-    public override void Write(Utf8JsonWriter writer, ChatRequestSettings value, JsonSerializerOptions options)
+    public override void Write(Utf8JsonWriter writer, LLamaSharpPromptExecutionSettings value, JsonSerializerOptions options)
     {
         writer.WriteStartObject();
 
diff --git a/LLama.SemanticKernel/ExtensionMethods.cs b/LLama.SemanticKernel/ExtensionMethods.cs
index 85f9064c..086999aa 100644
--- a/LLama.SemanticKernel/ExtensionMethods.cs
+++ b/LLama.SemanticKernel/ExtensionMethods.cs
@@ -1,5 +1,4 @@
-﻿using LLamaSharp.SemanticKernel.ChatCompletion;
-using Microsoft.SemanticKernel.ChatCompletion;
+﻿using Microsoft.SemanticKernel.ChatCompletion;
 namespace LLamaSharp.SemanticKernel;
 
 public static class ExtensionMethods
@@ -23,11 +22,11 @@ public static class ExtensionMethods
     }
 
     /// <summary>
-    /// Convert ChatRequestSettings to LLamaSharp InferenceParams
+    /// Convert LLamaSharpPromptExecutionSettings to LLamaSharp InferenceParams
     /// </summary>
     /// <param name="requestSettings"></param>
     /// <returns></returns>
-    internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this ChatRequestSettings requestSettings)
+    internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this LLamaSharpPromptExecutionSettings requestSettings)
     {
         if (requestSettings is null)
         {
diff --git a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
index 08ec33e1..31e07b2b 100644
--- a/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
+++ b/LLama.SemanticKernel/TextCompletion/LLamaSharpTextCompletion.cs
@@ -1,5 +1,4 @@
 ﻿using LLama.Abstractions;
-using LLamaSharp.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.Services;
 using Microsoft.SemanticKernel.TextGeneration;
@@ -24,7 +23,7 @@ public sealed class LLamaSharpTextCompletion : ITextGenerationService
     /// <inheritdoc/>
     public async Task<IReadOnlyList<TextContent>> GetTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
     {
-        var settings = ChatRequestSettings.FromRequestSettings(executionSettings);
+        var settings = LLamaSharpPromptExecutionSettings.FromRequestSettings(executionSettings);
         var result = executor.InferAsync(prompt, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
         var sb = new StringBuilder();
         await foreach (var token in result)
@@ -37,7 +36,7 @@ public sealed class LLamaSharpTextCompletion : ITextGenerationService
     /// <inheritdoc/>
     public async IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
     {
-        var settings = ChatRequestSettings.FromRequestSettings(executionSettings);
+        var settings = LLamaSharpPromptExecutionSettings.FromRequestSettings(executionSettings);
         var result = executor.InferAsync(prompt, settings?.ToLLamaSharpInferenceParams(), cancellationToken);
         await foreach (var token in result)
         {
diff --git a/LLama.Unittest/SemanticKernel/ChatRequestSettingsConverterTests.cs b/LLama.Unittest/SemanticKernel/ChatRequestSettingsConverterTests.cs
index 4190e852..4828a407 100644
--- a/LLama.Unittest/SemanticKernel/ChatRequestSettingsConverterTests.cs
+++ b/LLama.Unittest/SemanticKernel/ChatRequestSettingsConverterTests.cs
@@ -1,4 +1,5 @@
-﻿using LLamaSharp.SemanticKernel.ChatCompletion;
+﻿using LLamaSharp.SemanticKernel;
+using LLamaSharp.SemanticKernel.ChatCompletion;
 using System.Text.Json;
 
 namespace LLama.Unittest.SemanticKernel
@@ -10,11 +11,11 @@ namespace LLama.Unittest.SemanticKernel
         {
             // Arrange
             var options = new JsonSerializerOptions();
-            options.Converters.Add(new ChatRequestSettingsConverter());
+            options.Converters.Add(new LLamaSharpPromptExecutionSettingsConverter());
             var json = "{}";
 
             // Act
-            var requestSettings = JsonSerializer.Deserialize<ChatRequestSettings>(json, options);
+            var requestSettings = JsonSerializer.Deserialize<LLamaSharpPromptExecutionSettings>(json, options);
 
             // Assert
             Assert.NotNull(requestSettings);
@@ -36,7 +37,7 @@ namespace LLama.Unittest.SemanticKernel
             // Arrange
             var options = new JsonSerializerOptions();
             options.AllowTrailingCommas = true;
-            options.Converters.Add(new ChatRequestSettingsConverter());
+            options.Converters.Add(new LLamaSharpPromptExecutionSettingsConverter());
             var json = @"{
     ""frequency_penalty"": 0.5,
     ""max_tokens"": 250,
@@ -49,7 +50,7 @@ namespace LLama.Unittest.SemanticKernel
 }";
 
             // Act
-            var requestSettings = JsonSerializer.Deserialize<ChatRequestSettings>(json, options);
+            var requestSettings = JsonSerializer.Deserialize<LLamaSharpPromptExecutionSettings>(json, options);
 
             // Assert
             Assert.NotNull(requestSettings);
@@ -73,7 +74,7 @@ namespace LLama.Unittest.SemanticKernel
             // Arrange
             var options = new JsonSerializerOptions();
             options.AllowTrailingCommas = true;
-            options.Converters.Add(new ChatRequestSettingsConverter());
+            options.Converters.Add(new LLamaSharpPromptExecutionSettingsConverter());
             var json = @"{
     ""FrequencyPenalty"": 0.5,
     ""MaxTokens"": 250,
@@ -86,7 +87,7 @@ namespace LLama.Unittest.SemanticKernel
 }";
 
             // Act
-            var requestSettings = JsonSerializer.Deserialize<ChatRequestSettings>(json, options);
+            var requestSettings = JsonSerializer.Deserialize<LLamaSharpPromptExecutionSettings>(json, options);
 
             // Assert
             Assert.NotNull(requestSettings);
diff --git a/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs b/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs
index ef5d9670..d75a8d4b 100644
--- a/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs
+++ b/LLama.Unittest/SemanticKernel/ChatRequestSettingsTests.cs
@@ -1,4 +1,4 @@
-﻿using LLamaSharp.SemanticKernel.ChatCompletion;
+﻿using LLamaSharp.SemanticKernel;
 using Microsoft.SemanticKernel;
 
 namespace LLama.Unittest.SemanticKernel
@@ -10,7 +10,7 @@ namespace LLama.Unittest.SemanticKernel
         {
             // Arrange
             // Act
-            var requestSettings = ChatRequestSettings.FromRequestSettings(null, null);
+            var requestSettings = LLamaSharpPromptExecutionSettings.FromRequestSettings(null, null);
 
             // Assert
             Assert.NotNull(requestSettings);
@@ -31,7 +31,7 @@ namespace LLama.Unittest.SemanticKernel
         {
             // Arrange
             // Act
-            var requestSettings = ChatRequestSettings.FromRequestSettings(null, 200);
+            var requestSettings = LLamaSharpPromptExecutionSettings.FromRequestSettings(null, 200);
 
             // Assert
             Assert.NotNull(requestSettings);
@@ -51,7 +51,7 @@ namespace LLama.Unittest.SemanticKernel
         public void ChatRequestSettings_FromExistingRequestSettings()
         {
             // Arrange
-            var originalRequestSettings = new ChatRequestSettings()
+            var originalRequestSettings = new LLamaSharpPromptExecutionSettings()
             {
                 FrequencyPenalty = 0.5,
                 MaxTokens = 100,
@@ -64,7 +64,7 @@ namespace LLama.Unittest.SemanticKernel
             };
 
             // Act
-            var requestSettings = ChatRequestSettings.FromRequestSettings(originalRequestSettings);
+            var requestSettings = LLamaSharpPromptExecutionSettings.FromRequestSettings(originalRequestSettings);
 
             // Assert
             Assert.NotNull(requestSettings);
@@ -81,7 +81,7 @@ namespace LLama.Unittest.SemanticKernel
             };
 
             // Act
-            var requestSettings = ChatRequestSettings.FromRequestSettings(originalRequestSettings);
+            var requestSettings = LLamaSharpPromptExecutionSettings.FromRequestSettings(originalRequestSettings);
 
             // Assert
             Assert.NotNull(requestSettings);
@@ -109,7 +109,7 @@ namespace LLama.Unittest.SemanticKernel
             };
 
             // Act
-            var requestSettings = ChatRequestSettings.FromRequestSettings(originalRequestSettings);
+            var requestSettings = LLamaSharpPromptExecutionSettings.FromRequestSettings(originalRequestSettings);
 
             // Assert
             Assert.NotNull(requestSettings);
@@ -148,7 +148,7 @@ namespace LLama.Unittest.SemanticKernel
             };
 
             // Act
-            var requestSettings = ChatRequestSettings.FromRequestSettings(originalRequestSettings);
+            var requestSettings = LLamaSharpPromptExecutionSettings.FromRequestSettings(originalRequestSettings);
 
             // Assert
             Assert.NotNull(requestSettings);
diff --git a/LLama.Unittest/SemanticKernel/ExtensionMethodsTests.cs b/LLama.Unittest/SemanticKernel/ExtensionMethodsTests.cs
index dfcef182..574611fc 100644
--- a/LLama.Unittest/SemanticKernel/ExtensionMethodsTests.cs
+++ b/LLama.Unittest/SemanticKernel/ExtensionMethodsTests.cs
@@ -37,7 +37,7 @@ namespace LLamaSharp.SemanticKernel.Tests
         public void ToLLamaSharpInferenceParams_StateUnderTest_ExpectedBehavior()
         {
             // Arrange
-            var requestSettings = new ChatRequestSettings();
+            var requestSettings = new LLamaSharpPromptExecutionSettings();
 
             // Act
             var result = ExtensionMethods.ToLLamaSharpInferenceParams(

From 59a0afdb778b77b5ed1930108e4d154f0c5ac9ac Mon Sep 17 00:00:00 2001
From: Zoli Somogyi <zsomogyi.be@gmail.com>
Date: Wed, 24 Apr 2024 08:24:02 +0200
Subject: [PATCH 12/14] Renaming files to correspond to class names

---
 ...hatRequestSettings.cs => LLamaSharpPromptExecutionSettings.cs} | 0
 ...Converter.cs => LLamaSharpPromptExecutionSettingsConverter.cs} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename LLama.SemanticKernel/{ChatRequestSettings.cs => LLamaSharpPromptExecutionSettings.cs} (100%)
 rename LLama.SemanticKernel/{ChatRequestSettingsConverter.cs => LLamaSharpPromptExecutionSettingsConverter.cs} (100%)

diff --git a/LLama.SemanticKernel/ChatRequestSettings.cs b/LLama.SemanticKernel/LLamaSharpPromptExecutionSettings.cs
similarity index 100%
rename from LLama.SemanticKernel/ChatRequestSettings.cs
rename to LLama.SemanticKernel/LLamaSharpPromptExecutionSettings.cs
diff --git a/LLama.SemanticKernel/ChatRequestSettingsConverter.cs b/LLama.SemanticKernel/LLamaSharpPromptExecutionSettingsConverter.cs
similarity index 100%
rename from LLama.SemanticKernel/ChatRequestSettingsConverter.cs
rename to LLama.SemanticKernel/LLamaSharpPromptExecutionSettingsConverter.cs

From 2aa96b206f88f1a9a715e6d59e371d72c9d03e31 Mon Sep 17 00:00:00 2001
From: Zoli Somogyi <zsomogyi.be@gmail.com>
Date: Sat, 27 Apr 2024 09:39:40 +0200
Subject: [PATCH 13/14] Adding Response Format - Correcting non-standard way of
 working with PromptExecutionSettings

can be used downstream to post-process the messages based on the requested format
---
 LLama.SemanticKernel/LLamaSharpPromptExecutionSettings.cs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/LLama.SemanticKernel/LLamaSharpPromptExecutionSettings.cs b/LLama.SemanticKernel/LLamaSharpPromptExecutionSettings.cs
index 87dda39e..5e8a6669 100644
--- a/LLama.SemanticKernel/LLamaSharpPromptExecutionSettings.cs
+++ b/LLama.SemanticKernel/LLamaSharpPromptExecutionSettings.cs
@@ -74,6 +74,12 @@ public class LLamaSharpPromptExecutionSettings : PromptExecutionSettings
     [JsonPropertyName("token_selection_biases")]
     public IDictionary<int, int> TokenSelectionBiases { get; set; } = new Dictionary<int, int>();
 
+    /// <summary>
+    /// Indicates the format of the response which can be used downstream to post-process the messages. Handlebars: handlebars_object. JSON: json_object, etc.
+    /// </summary>
+    [JsonPropertyName("response_format")]
+    public string ResponseFormat { get; set; } = string.Empty;
+
     /// <summary>
     /// Create a new settings object with the values from another settings object.
     /// </summary>

From 54c01d4c2c295ba88e8beb70827cc8af323baaf2 Mon Sep 17 00:00:00 2001
From: Zoli Somogyi <zsomogyi.be@gmail.com>
Date: Tue, 30 Apr 2024 19:28:31 +0200
Subject: [PATCH 14/14] Making old code obsolete - SemanticKernel: Correcting
 working with PromptExecutionSettings

---
 .../ChatCompletion/ChatRequestSettings.cs     | 114 ++++++++++++++++++
 .../ChatRequestSettingsConverter.cs           | 105 ++++++++++++++++
 2 files changed, 219 insertions(+)
 create mode 100644 LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs
 create mode 100644 LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs

diff --git a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs
new file mode 100644
index 00000000..683f8c45
--- /dev/null
+++ b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettings.cs
@@ -0,0 +1,114 @@
+﻿using Microsoft.SemanticKernel;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+
+namespace LLamaSharp.SemanticKernel.ChatCompletion;
+
+[Obsolete("Use LLamaSharpPromptExecutionSettings instead")]
+public class ChatRequestSettings : PromptExecutionSettings
+{
+    /// <summary>
+    /// Temperature controls the randomness of the completion.
+    /// The higher the temperature, the more random the completion.
+    /// </summary>
+    [JsonPropertyName("temperature")]
+    public double Temperature { get; set; } = 0;
+
+    /// <summary>
+    /// TopP controls the diversity of the completion.
+    /// The higher the TopP, the more diverse the completion.
+    /// </summary>
+    [JsonPropertyName("top_p")]
+    public double TopP { get; set; } = 0;
+
+    /// <summary>
+    /// Number between -2.0 and 2.0. Positive values penalize new tokens
+    /// based on whether they appear in the text so far, increasing the
+    /// model's likelihood to talk about new topics.
+    /// </summary>
+    [JsonPropertyName("presence_penalty")]
+    public double PresencePenalty { get; set; } = 0;
+
+    /// <summary>
+    /// Number between -2.0 and 2.0. Positive values penalize new tokens
+    /// based on their existing frequency in the text so far, decreasing
+    /// the model's likelihood to repeat the same line verbatim.
+    /// </summary>
+    [JsonPropertyName("frequency_penalty")]
+    public double FrequencyPenalty { get; set; } = 0;
+
+    /// <summary>
+    /// Sequences where the completion will stop generating further tokens.
+    /// </summary>
+    [JsonPropertyName("stop_sequences")]
+    public IList<string> StopSequences { get; set; } = Array.Empty<string>();
+
+    /// <summary>
+    /// How many completions to generate for each prompt. Default is 1.
+    /// Note: Because this parameter generates many completions, it can quickly consume your token quota.
+    /// Use carefully and ensure that you have reasonable settings for max_tokens and stop.
+    /// </summary>
+    [JsonPropertyName("results_per_prompt")]
+    public int ResultsPerPrompt { get; set; } = 1;
+
+    /// <summary>
+    /// The maximum number of tokens to generate in the completion.
+    /// </summary>
+    [JsonPropertyName("max_tokens")]
+    public int? MaxTokens { get; set; }
+
+    /// <summary>
+    /// Modify the likelihood of specified tokens appearing in the completion.
+    /// </summary>
+    [JsonPropertyName("token_selection_biases")]
+    public IDictionary<int, int> TokenSelectionBiases { get; set; } = new Dictionary<int, int>();
+
+    /// <summary>
+    /// Create a new settings object with the values from another settings object.
+    /// </summary>
+    /// <param name="requestSettings">Template configuration</param>
+    /// <param name="defaultMaxTokens">Default max tokens</param>
+    /// <returns>An instance of OpenAIRequestSettings</returns>
+    public static ChatRequestSettings FromRequestSettings(PromptExecutionSettings? requestSettings, int? defaultMaxTokens = null)
+    {
+        if (requestSettings is null)
+        {
+            return new ChatRequestSettings()
+            {
+                MaxTokens = defaultMaxTokens
+            };
+        }
+
+        if (requestSettings is ChatRequestSettings requestSettingsChatRequestSettings)
+        {
+            return requestSettingsChatRequestSettings;
+        }
+
+        var json = JsonSerializer.Serialize(requestSettings);
+        var chatRequestSettings = JsonSerializer.Deserialize<ChatRequestSettings>(json, s_options);
+
+        if (chatRequestSettings is not null)
+        {
+            return chatRequestSettings;
+        }
+
+        throw new ArgumentException($"Invalid request settings, cannot convert to {nameof(ChatRequestSettings)}", nameof(requestSettings));
+    }
+
+    private static readonly JsonSerializerOptions s_options = CreateOptions();
+
+    private static JsonSerializerOptions CreateOptions()
+    {
+        JsonSerializerOptions options = new()
+        {
+            WriteIndented = true,
+            MaxDepth = 20,
+            AllowTrailingCommas = true,
+            PropertyNameCaseInsensitive = true,
+            ReadCommentHandling = JsonCommentHandling.Skip,
+            Converters = { new ChatRequestSettingsConverter() }
+        };
+
+        return options;
+    }
+}
diff --git a/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs
new file mode 100644
index 00000000..15bc45cd
--- /dev/null
+++ b/LLama.SemanticKernel/ChatCompletion/ChatRequestSettingsConverter.cs
@@ -0,0 +1,105 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+
+namespace LLamaSharp.SemanticKernel.ChatCompletion;
+
+/// <summary>
+/// JSON converter for <see cref="OpenAIRequestSettings"/>
+/// </summary>
+[Obsolete("Use LLamaSharpPromptExecutionSettingsConverter instead")]
+public class ChatRequestSettingsConverter : JsonConverter<ChatRequestSettings>
+{
+    /// <inheritdoc/>
+    public override ChatRequestSettings? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
+    {
+        var requestSettings = new ChatRequestSettings();
+
+        while (reader.Read() && reader.TokenType != JsonTokenType.EndObject)
+        {
+            if (reader.TokenType == JsonTokenType.PropertyName)
+            {
+                string? propertyName = reader.GetString();
+
+                if (propertyName is not null)
+                {
+                    // normalise property name to uppercase
+                    propertyName = propertyName.ToUpperInvariant();
+                }
+
+                reader.Read();
+
+                switch (propertyName)
+                {
+                    case "MODELID":
+                    case "MODEL_ID":
+                        requestSettings.ModelId = reader.GetString();
+                        break;
+                    case "TEMPERATURE":
+                        requestSettings.Temperature = reader.GetDouble();
+                        break;
+                    case "TOPP":
+                    case "TOP_P":
+                        requestSettings.TopP = reader.GetDouble();
+                        break;
+                    case "FREQUENCYPENALTY":
+                    case "FREQUENCY_PENALTY":
+                        requestSettings.FrequencyPenalty = reader.GetDouble();
+                        break;
+                    case "PRESENCEPENALTY":
+                    case "PRESENCE_PENALTY":
+                        requestSettings.PresencePenalty = reader.GetDouble();
+                        break;
+                    case "MAXTOKENS":
+                    case "MAX_TOKENS":
+                        requestSettings.MaxTokens = reader.GetInt32();
+                        break;
+                    case "STOPSEQUENCES":
+                    case "STOP_SEQUENCES":
+                        requestSettings.StopSequences = JsonSerializer.Deserialize<IList<string>>(ref reader, options) ?? Array.Empty<string>();
+                        break;
+                    case "RESULTSPERPROMPT":
+                    case "RESULTS_PER_PROMPT":
+                        requestSettings.ResultsPerPrompt = reader.GetInt32();
+                        break;
+                    case "TOKENSELECTIONBIASES":
+                    case "TOKEN_SELECTION_BIASES":
+                        requestSettings.TokenSelectionBiases = JsonSerializer.Deserialize<IDictionary<int, int>>(ref reader, options) ?? new Dictionary<int, int>();
+                        break;
+                    default:
+                        reader.Skip();
+                        break;
+                }
+            }
+        }
+
+        return requestSettings;
+    }
+
+    /// <inheritdoc/>
+    public override void Write(Utf8JsonWriter writer, ChatRequestSettings value, JsonSerializerOptions options)
+    {
+        writer.WriteStartObject();
+
+        writer.WriteNumber("temperature", value.Temperature);
+        writer.WriteNumber("top_p", value.TopP);
+        writer.WriteNumber("frequency_penalty", value.FrequencyPenalty);
+        writer.WriteNumber("presence_penalty", value.PresencePenalty);
+        if (value.MaxTokens is null)
+        {
+            writer.WriteNull("max_tokens");
+        }
+        else
+        {
+            writer.WriteNumber("max_tokens", (decimal)value.MaxTokens);
+        }
+        writer.WritePropertyName("stop_sequences");
+        JsonSerializer.Serialize(writer, value.StopSequences, options);
+        writer.WriteNumber("results_per_prompt", value.ResultsPerPrompt);
+        writer.WritePropertyName("token_selection_biases");
+        JsonSerializer.Serialize(writer, value.TokenSelectionBiases, options);
+
+        writer.WriteEndObject();
+    }
+}
\ No newline at end of file