This commit (5fe721bdbe) accidentally removed a load of stuff that it shouldn't. Fixed that.

Originally from these PRs: - https://github.com/SciSharp/LLamaSharp/pull/263 - https://github.com/SciSharp/LLamaSharp/pull/259
2 years ago · 89fef05362
--- a/LLama.Web/Common/ModelOptions.cs
+++ b/LLama.Web/Common/ModelOptions.cs
@@ -20,7 +20,7 @@ namespace LLama.Web.Common
        /// <summary>
        /// Model context size (n_ctx)
        /// </summary>
        public uint ContextSize { get; set; } = 512;
        public uint? ContextSize { get; set; }

        /// <summary>
        /// the GPU that is used for scratch and small tensors
--- a/LLama/Abstractions/IContextParams.cs
+++ b/LLama/Abstractions/IContextParams.cs
@@ -11,7 +11,7 @@ public interface IContextParams
    /// <summary>
    /// Model context size (n_ctx)
    /// </summary>
    uint ContextSize { get; set; }
    uint? ContextSize { get; set; }

    /// <summary>
    /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
--- a/LLama/Common/FixedSizeQueue.cs
+++ b/LLama/Common/FixedSizeQueue.cs
@@ -43,7 +43,7 @@ namespace LLama.Common
        /// <param name="data"></param>
        public FixedSizeQueue(int size, IEnumerable<T> data)
        {
 #if !NETSTANDARD2_0 
 #if NET6_0_OR_GREATER
            // Try to check the size without enumerating the entire IEnumerable. This may not be able to get the count,
            // in which case we'll have to check later
            if (data.TryGetNonEnumeratedCount(out var dataCount) && dataCount > size)
--- a/LLama/Common/ModelParams.cs
+++ b/LLama/Common/ModelParams.cs
@@ -13,92 +13,60 @@ namespace LLama.Common
    public record ModelParams
        : ILLamaParams
    {
        /// <summary>
        /// Model context size (n_ctx)
        /// </summary>
        public uint ContextSize { get; set; } = 512;
        /// <summary>
        /// the GPU that is used for scratch and small tensors
        /// </summary>
        /// <inheritdoc />
        public uint? ContextSize { get; set; }

        /// <inheritdoc />
        public int MainGpu { get; set; } = 0;

        /// <summary>
        /// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
        /// </summary>
        /// <inheritdoc />
        public int GpuLayerCount { get; set; } = 20;
        /// <summary>
        /// Seed for the random number generator (seed)
        /// </summary>

        /// <inheritdoc />
        public uint Seed { get; set; } = 0xFFFFFFFF;
        /// <summary>
        /// Use f16 instead of f32 for memory kv (memory_f16)
        /// </summary>

        /// <inheritdoc />
        public bool UseFp16Memory { get; set; } = true;
        /// <summary>
        /// Use mmap for faster loads (use_mmap)
        /// </summary>

        /// <inheritdoc />
        public bool UseMemorymap { get; set; } = true;
        /// <summary>
        /// Use mlock to keep model in memory (use_mlock)
        /// </summary>

        /// <inheritdoc />
        public bool UseMemoryLock { get; set; }
        /// <summary>
        /// Compute perplexity over the prompt (perplexity)
        /// </summary>

        /// <inheritdoc />
        public bool Perplexity { get; set; }
        /// <summary>
        /// Model path (model)
        /// </summary>

        /// <inheritdoc />
        public string ModelPath { get; set; }

        /// <summary>
        /// List of LoRAs to apply
        /// </summary>
        /// <inheritdoc />
        public AdapterCollection LoraAdapters { get; set; } = new();

        /// <summary>
        /// base model path for the lora adapter (lora_base)
        /// </summary>
        /// <inheritdoc />
        public string LoraBase { get; set; } = string.Empty;

        /// <summary>
        /// Number of threads (null = autodetect) (n_threads)
        /// </summary>
        /// <inheritdoc />
        public uint? Threads { get; set; }

        /// <summary>
        /// Number of threads to use for batch processing (null = autodetect) (n_threads)
        /// </summary>
        /// <inheritdoc />
        public uint? BatchThreads { get; set; }

        /// <summary>
        /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
        /// </summary>
        /// <inheritdoc />
        public uint BatchSize { get; set; } = 512;

        /// <summary>
        /// Whether to use embedding mode. (embedding) Note that if this is set to true, 
        /// The LLamaModel won't produce text response anymore.
        /// </summary>
        /// <inheritdoc />
        public bool EmbeddingMode { get; set; }

        /// <summary>
        /// how split tensors should be distributed across GPUs.
        /// </summary>
        /// <remarks>"[ 3, 2 ]" will assign 60% of the data to GPU 0 and 40% to GPU 1.</remarks>
        /// <inheritdoc />
        [JsonConverter(typeof(TensorSplitsCollectionConverter))]
        public TensorSplitsCollection TensorSplits { get; set; } = new();

 		/// <summary>
 		/// RoPE base frequency
 		/// </summary>
 		public float? RopeFrequencyBase { get; set; }

 		/// <summary>
 		/// RoPE frequency scaling factor
 		/// </summary>
 		public float? RopeFrequencyScale { get; set; }
        /// <inheritdoc />
        public float? RopeFrequencyBase { get; set; }

        /// <inheritdoc />
        public float? RopeFrequencyScale { get; set; }

        /// <inheritdoc />
        public float? YarnExtrapolationFactor { get; set; }
@@ -118,20 +86,13 @@ namespace LLama.Common
        /// <inheritdoc />
        public RopeScalingType? YarnScalingType { get; set; }

        /// <summary>
        /// Use experimental mul_mat_q kernels
        /// </summary>
        /// <inheritdoc />
        public bool MulMatQ { get; set; }


        /// <summary>
        /// Load vocab only (no weights)
        /// </summary>
        /// <inheritdoc />
        public bool VocabOnly { get; set; }

        /// <summary>
        /// The encoding to use to convert text for the model
        /// </summary>
        /// <inheritdoc />
        [JsonConverter(typeof(EncodingConverter))]
        public Encoding Encoding { get; set; } = Encoding.UTF8;

--- a/LLama/Extensions/DictionaryExtensions.cs
+++ b/LLama/Extensions/DictionaryExtensions.cs
@@ -9,6 +9,8 @@ namespace LLama.Extensions
        {
            return GetValueOrDefaultImpl(dictionary, key, defaultValue);
        }
 #elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
    #error Target framework not supported!
 #endif

        internal static TValue GetValueOrDefaultImpl<TKey, TValue>(IReadOnlyDictionary<TKey, TValue> dictionary, TKey key, TValue defaultValue)
--- a/LLama/Extensions/EncodingExtensions.cs
+++ b/LLama/Extensions/EncodingExtensions.cs
@@ -15,6 +15,8 @@ internal static class EncodingExtensions
    {
        return GetCharCountImpl(encoding, bytes);
    }
 #elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
    #error Target framework not supported!
 #endif

    internal static int GetCharsImpl(Encoding encoding, ReadOnlySpan<byte> bytes, Span<char> output)
--- a/LLama/Extensions/IContextParamsExtensions.cs
+++ b/LLama/Extensions/IContextParamsExtensions.cs
@@ -21,7 +21,7 @@ namespace LLama.Extensions
        public static void ToLlamaContextParams(this IContextParams @params, out LLamaContextParams result)
        {
            result = NativeApi.llama_context_default_params();
            result.n_ctx = @params.ContextSize;
            result.n_ctx = @params.ContextSize ?? 0;
            result.n_batch = @params.BatchSize;
            result.seed = @params.Seed;
            result.f16_kv = @params.UseFp16Memory;
--- a/LLama/Extensions/IEnumerableExtensions.cs
+++ b/LLama/Extensions/IEnumerableExtensions.cs
@@ -10,6 +10,8 @@ namespace LLama.Extensions
        {
            return TakeLastImpl(source, count);
        }
 #elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
    #error Target framework not supported!
 #endif

        internal static IEnumerable<T> TakeLastImpl<T>(IEnumerable<T> source, int count)
--- a/LLama/Extensions/KeyValuePairExtensions.cs
+++ b/LLama/Extensions/KeyValuePairExtensions.cs
@@ -19,5 +19,7 @@ internal static class KeyValuePairExtensions
        first = pair.Key;
        second = pair.Value;
    }
 #elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
    #error Target framework not supported!
 #endif
 }
--- a/LLama/Extensions/ListExtensions.cs
+++ b/LLama/Extensions/ListExtensions.cs
@@ -5,7 +5,7 @@ namespace LLama.Extensions
 {
    internal static class ListExtensions
    {
 #if NETSTANDARD2_0
 #if !NET6_0_OR_GREATER
        public static void EnsureCapacity<T>(this List<T> list, int capacity)
        {
            if (list.Capacity < capacity)
--- a/LLama/Native/LLamaContextParams.cs
+++ b/LLama/Native/LLamaContextParams.cs
@@ -22,7 +22,7 @@ namespace LLama.Native
        public uint seed;

        /// <summary>
        /// text context
        /// text context, 0 = from model
        /// </summary>
        public uint n_ctx;