diff --git a/LLama.Web/Common/ModelOptions.cs b/LLama.Web/Common/ModelOptions.cs index 182ace00..da840fe9 100644 --- a/LLama.Web/Common/ModelOptions.cs +++ b/LLama.Web/Common/ModelOptions.cs @@ -20,7 +20,7 @@ namespace LLama.Web.Common /// /// Model context size (n_ctx) /// - public uint ContextSize { get; set; } = 512; + public uint? ContextSize { get; set; } /// /// the GPU that is used for scratch and small tensors diff --git a/LLama/Abstractions/IContextParams.cs b/LLama/Abstractions/IContextParams.cs index 0f129217..da749852 100644 --- a/LLama/Abstractions/IContextParams.cs +++ b/LLama/Abstractions/IContextParams.cs @@ -11,7 +11,7 @@ public interface IContextParams /// /// Model context size (n_ctx) /// - uint ContextSize { get; set; } + uint? ContextSize { get; set; } /// /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch) diff --git a/LLama/Common/FixedSizeQueue.cs b/LLama/Common/FixedSizeQueue.cs index d4577a47..37fb1cf5 100644 --- a/LLama/Common/FixedSizeQueue.cs +++ b/LLama/Common/FixedSizeQueue.cs @@ -43,7 +43,7 @@ namespace LLama.Common /// public FixedSizeQueue(int size, IEnumerable data) { -#if !NETSTANDARD2_0 +#if NET6_0_OR_GREATER // Try to check the size without enumerating the entire IEnumerable. This may not be able to get the count, // in which case we'll have to check later if (data.TryGetNonEnumeratedCount(out var dataCount) && dataCount > size) diff --git a/LLama/Common/ModelParams.cs b/LLama/Common/ModelParams.cs index 8bf59fa5..6d19a8de 100644 --- a/LLama/Common/ModelParams.cs +++ b/LLama/Common/ModelParams.cs @@ -13,92 +13,60 @@ namespace LLama.Common public record ModelParams : ILLamaParams { - /// - /// Model context size (n_ctx) - /// - public uint ContextSize { get; set; } = 512; - /// - /// the GPU that is used for scratch and small tensors - /// + /// + public uint? ContextSize { get; set; } + + /// public int MainGpu { get; set; } = 0; - /// - /// Number of layers to run in VRAM / GPU memory (n_gpu_layers) - /// + /// public int GpuLayerCount { get; set; } = 20; - /// - /// Seed for the random number generator (seed) - /// + + /// public uint Seed { get; set; } = 0xFFFFFFFF; - /// - /// Use f16 instead of f32 for memory kv (memory_f16) - /// + + /// public bool UseFp16Memory { get; set; } = true; - /// - /// Use mmap for faster loads (use_mmap) - /// + + /// public bool UseMemorymap { get; set; } = true; - /// - /// Use mlock to keep model in memory (use_mlock) - /// + + /// public bool UseMemoryLock { get; set; } - /// - /// Compute perplexity over the prompt (perplexity) - /// + + /// public bool Perplexity { get; set; } - /// - /// Model path (model) - /// + + /// public string ModelPath { get; set; } - /// - /// List of LoRAs to apply - /// + /// public AdapterCollection LoraAdapters { get; set; } = new(); - /// - /// base model path for the lora adapter (lora_base) - /// + /// public string LoraBase { get; set; } = string.Empty; - /// - /// Number of threads (null = autodetect) (n_threads) - /// + /// public uint? Threads { get; set; } - /// - /// Number of threads to use for batch processing (null = autodetect) (n_threads) - /// + /// public uint? BatchThreads { get; set; } - /// - /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch) - /// + /// public uint BatchSize { get; set; } = 512; - /// - /// Whether to use embedding mode. (embedding) Note that if this is set to true, - /// The LLamaModel won't produce text response anymore. - /// + /// public bool EmbeddingMode { get; set; } - /// - /// how split tensors should be distributed across GPUs. - /// - /// "[ 3, 2 ]" will assign 60% of the data to GPU 0 and 40% to GPU 1. + /// [JsonConverter(typeof(TensorSplitsCollectionConverter))] public TensorSplitsCollection TensorSplits { get; set; } = new(); - /// - /// RoPE base frequency - /// - public float? RopeFrequencyBase { get; set; } - - /// - /// RoPE frequency scaling factor - /// - public float? RopeFrequencyScale { get; set; } + /// + public float? RopeFrequencyBase { get; set; } + /// + public float? RopeFrequencyScale { get; set; } /// public float? YarnExtrapolationFactor { get; set; } @@ -118,20 +86,13 @@ namespace LLama.Common /// public RopeScalingType? YarnScalingType { get; set; } - /// - /// Use experimental mul_mat_q kernels - /// + /// public bool MulMatQ { get; set; } - - /// - /// Load vocab only (no weights) - /// + /// public bool VocabOnly { get; set; } - /// - /// The encoding to use to convert text for the model - /// + /// [JsonConverter(typeof(EncodingConverter))] public Encoding Encoding { get; set; } = Encoding.UTF8; diff --git a/LLama/Extensions/DictionaryExtensions.cs b/LLama/Extensions/DictionaryExtensions.cs index a39ed7e8..b3643fae 100644 --- a/LLama/Extensions/DictionaryExtensions.cs +++ b/LLama/Extensions/DictionaryExtensions.cs @@ -9,6 +9,8 @@ namespace LLama.Extensions { return GetValueOrDefaultImpl(dictionary, key, defaultValue); } +#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER + #error Target framework not supported! #endif internal static TValue GetValueOrDefaultImpl(IReadOnlyDictionary dictionary, TKey key, TValue defaultValue) diff --git a/LLama/Extensions/EncodingExtensions.cs b/LLama/Extensions/EncodingExtensions.cs index e88d83a7..3f2bd776 100644 --- a/LLama/Extensions/EncodingExtensions.cs +++ b/LLama/Extensions/EncodingExtensions.cs @@ -15,6 +15,8 @@ internal static class EncodingExtensions { return GetCharCountImpl(encoding, bytes); } +#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER + #error Target framework not supported! #endif internal static int GetCharsImpl(Encoding encoding, ReadOnlySpan bytes, Span output) diff --git a/LLama/Extensions/IContextParamsExtensions.cs b/LLama/Extensions/IContextParamsExtensions.cs index 16716b53..bb029c16 100644 --- a/LLama/Extensions/IContextParamsExtensions.cs +++ b/LLama/Extensions/IContextParamsExtensions.cs @@ -21,7 +21,7 @@ namespace LLama.Extensions public static void ToLlamaContextParams(this IContextParams @params, out LLamaContextParams result) { result = NativeApi.llama_context_default_params(); - result.n_ctx = @params.ContextSize; + result.n_ctx = @params.ContextSize ?? 0; result.n_batch = @params.BatchSize; result.seed = @params.Seed; result.f16_kv = @params.UseFp16Memory; diff --git a/LLama/Extensions/IEnumerableExtensions.cs b/LLama/Extensions/IEnumerableExtensions.cs index 9e01feb8..3d1e2e81 100644 --- a/LLama/Extensions/IEnumerableExtensions.cs +++ b/LLama/Extensions/IEnumerableExtensions.cs @@ -10,6 +10,8 @@ namespace LLama.Extensions { return TakeLastImpl(source, count); } +#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER + #error Target framework not supported! #endif internal static IEnumerable TakeLastImpl(IEnumerable source, int count) diff --git a/LLama/Extensions/KeyValuePairExtensions.cs b/LLama/Extensions/KeyValuePairExtensions.cs index 6e12654d..595c49e8 100644 --- a/LLama/Extensions/KeyValuePairExtensions.cs +++ b/LLama/Extensions/KeyValuePairExtensions.cs @@ -19,5 +19,7 @@ internal static class KeyValuePairExtensions first = pair.Key; second = pair.Value; } +#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER + #error Target framework not supported! #endif } \ No newline at end of file diff --git a/LLama/Extensions/ListExtensions.cs b/LLama/Extensions/ListExtensions.cs index 11a1d4f0..eb30a07a 100644 --- a/LLama/Extensions/ListExtensions.cs +++ b/LLama/Extensions/ListExtensions.cs @@ -5,7 +5,7 @@ namespace LLama.Extensions { internal static class ListExtensions { -#if NETSTANDARD2_0 +#if !NET6_0_OR_GREATER public static void EnsureCapacity(this List list, int capacity) { if (list.Capacity < capacity) diff --git a/LLama/Native/LLamaContextParams.cs b/LLama/Native/LLamaContextParams.cs index f1ba569d..c0f2afa2 100644 --- a/LLama/Native/LLamaContextParams.cs +++ b/LLama/Native/LLamaContextParams.cs @@ -22,7 +22,7 @@ namespace LLama.Native public uint seed; /// - /// text context + /// text context, 0 = from model /// public uint n_ctx;