diff --git a/LLama.Web/Common/ModelOptions.cs b/LLama.Web/Common/ModelOptions.cs
index 182ace00..da840fe9 100644
--- a/LLama.Web/Common/ModelOptions.cs
+++ b/LLama.Web/Common/ModelOptions.cs
@@ -20,7 +20,7 @@ namespace LLama.Web.Common
///
/// Model context size (n_ctx)
///
- public uint ContextSize { get; set; } = 512;
+ public uint? ContextSize { get; set; }
///
/// the GPU that is used for scratch and small tensors
diff --git a/LLama/Abstractions/IContextParams.cs b/LLama/Abstractions/IContextParams.cs
index 0f129217..da749852 100644
--- a/LLama/Abstractions/IContextParams.cs
+++ b/LLama/Abstractions/IContextParams.cs
@@ -11,7 +11,7 @@ public interface IContextParams
///
/// Model context size (n_ctx)
///
- uint ContextSize { get; set; }
+ uint? ContextSize { get; set; }
///
/// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
diff --git a/LLama/Common/FixedSizeQueue.cs b/LLama/Common/FixedSizeQueue.cs
index d4577a47..37fb1cf5 100644
--- a/LLama/Common/FixedSizeQueue.cs
+++ b/LLama/Common/FixedSizeQueue.cs
@@ -43,7 +43,7 @@ namespace LLama.Common
///
public FixedSizeQueue(int size, IEnumerable data)
{
-#if !NETSTANDARD2_0
+#if NET6_0_OR_GREATER
// Try to check the size without enumerating the entire IEnumerable. This may not be able to get the count,
// in which case we'll have to check later
if (data.TryGetNonEnumeratedCount(out var dataCount) && dataCount > size)
diff --git a/LLama/Common/ModelParams.cs b/LLama/Common/ModelParams.cs
index 8bf59fa5..6d19a8de 100644
--- a/LLama/Common/ModelParams.cs
+++ b/LLama/Common/ModelParams.cs
@@ -13,92 +13,60 @@ namespace LLama.Common
public record ModelParams
: ILLamaParams
{
- ///
- /// Model context size (n_ctx)
- ///
- public uint ContextSize { get; set; } = 512;
- ///
- /// the GPU that is used for scratch and small tensors
- ///
+ ///
+ public uint? ContextSize { get; set; }
+
+ ///
public int MainGpu { get; set; } = 0;
- ///
- /// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
- ///
+ ///
public int GpuLayerCount { get; set; } = 20;
- ///
- /// Seed for the random number generator (seed)
- ///
+
+ ///
public uint Seed { get; set; } = 0xFFFFFFFF;
- ///
- /// Use f16 instead of f32 for memory kv (memory_f16)
- ///
+
+ ///
public bool UseFp16Memory { get; set; } = true;
- ///
- /// Use mmap for faster loads (use_mmap)
- ///
+
+ ///
public bool UseMemorymap { get; set; } = true;
- ///
- /// Use mlock to keep model in memory (use_mlock)
- ///
+
+ ///
public bool UseMemoryLock { get; set; }
- ///
- /// Compute perplexity over the prompt (perplexity)
- ///
+
+ ///
public bool Perplexity { get; set; }
- ///
- /// Model path (model)
- ///
+
+ ///
public string ModelPath { get; set; }
- ///
- /// List of LoRAs to apply
- ///
+ ///
public AdapterCollection LoraAdapters { get; set; } = new();
- ///
- /// base model path for the lora adapter (lora_base)
- ///
+ ///
public string LoraBase { get; set; } = string.Empty;
- ///
- /// Number of threads (null = autodetect) (n_threads)
- ///
+ ///
public uint? Threads { get; set; }
- ///
- /// Number of threads to use for batch processing (null = autodetect) (n_threads)
- ///
+ ///
public uint? BatchThreads { get; set; }
- ///
- /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
- ///
+ ///
public uint BatchSize { get; set; } = 512;
- ///
- /// Whether to use embedding mode. (embedding) Note that if this is set to true,
- /// The LLamaModel won't produce text response anymore.
- ///
+ ///
public bool EmbeddingMode { get; set; }
- ///
- /// how split tensors should be distributed across GPUs.
- ///
- /// "[ 3, 2 ]" will assign 60% of the data to GPU 0 and 40% to GPU 1.
+ ///
[JsonConverter(typeof(TensorSplitsCollectionConverter))]
public TensorSplitsCollection TensorSplits { get; set; } = new();
- ///
- /// RoPE base frequency
- ///
- public float? RopeFrequencyBase { get; set; }
-
- ///
- /// RoPE frequency scaling factor
- ///
- public float? RopeFrequencyScale { get; set; }
+ ///
+ public float? RopeFrequencyBase { get; set; }
+ ///
+ public float? RopeFrequencyScale { get; set; }
///
public float? YarnExtrapolationFactor { get; set; }
@@ -118,20 +86,13 @@ namespace LLama.Common
///
public RopeScalingType? YarnScalingType { get; set; }
- ///
- /// Use experimental mul_mat_q kernels
- ///
+ ///
public bool MulMatQ { get; set; }
-
- ///
- /// Load vocab only (no weights)
- ///
+ ///
public bool VocabOnly { get; set; }
- ///
- /// The encoding to use to convert text for the model
- ///
+ ///
[JsonConverter(typeof(EncodingConverter))]
public Encoding Encoding { get; set; } = Encoding.UTF8;
diff --git a/LLama/Extensions/DictionaryExtensions.cs b/LLama/Extensions/DictionaryExtensions.cs
index a39ed7e8..b3643fae 100644
--- a/LLama/Extensions/DictionaryExtensions.cs
+++ b/LLama/Extensions/DictionaryExtensions.cs
@@ -9,6 +9,8 @@ namespace LLama.Extensions
{
return GetValueOrDefaultImpl(dictionary, key, defaultValue);
}
+#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
+ #error Target framework not supported!
#endif
internal static TValue GetValueOrDefaultImpl(IReadOnlyDictionary dictionary, TKey key, TValue defaultValue)
diff --git a/LLama/Extensions/EncodingExtensions.cs b/LLama/Extensions/EncodingExtensions.cs
index e88d83a7..3f2bd776 100644
--- a/LLama/Extensions/EncodingExtensions.cs
+++ b/LLama/Extensions/EncodingExtensions.cs
@@ -15,6 +15,8 @@ internal static class EncodingExtensions
{
return GetCharCountImpl(encoding, bytes);
}
+#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
+ #error Target framework not supported!
#endif
internal static int GetCharsImpl(Encoding encoding, ReadOnlySpan bytes, Span output)
diff --git a/LLama/Extensions/IContextParamsExtensions.cs b/LLama/Extensions/IContextParamsExtensions.cs
index 16716b53..bb029c16 100644
--- a/LLama/Extensions/IContextParamsExtensions.cs
+++ b/LLama/Extensions/IContextParamsExtensions.cs
@@ -21,7 +21,7 @@ namespace LLama.Extensions
public static void ToLlamaContextParams(this IContextParams @params, out LLamaContextParams result)
{
result = NativeApi.llama_context_default_params();
- result.n_ctx = @params.ContextSize;
+ result.n_ctx = @params.ContextSize ?? 0;
result.n_batch = @params.BatchSize;
result.seed = @params.Seed;
result.f16_kv = @params.UseFp16Memory;
diff --git a/LLama/Extensions/IEnumerableExtensions.cs b/LLama/Extensions/IEnumerableExtensions.cs
index 9e01feb8..3d1e2e81 100644
--- a/LLama/Extensions/IEnumerableExtensions.cs
+++ b/LLama/Extensions/IEnumerableExtensions.cs
@@ -10,6 +10,8 @@ namespace LLama.Extensions
{
return TakeLastImpl(source, count);
}
+#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
+ #error Target framework not supported!
#endif
internal static IEnumerable TakeLastImpl(IEnumerable source, int count)
diff --git a/LLama/Extensions/KeyValuePairExtensions.cs b/LLama/Extensions/KeyValuePairExtensions.cs
index 6e12654d..595c49e8 100644
--- a/LLama/Extensions/KeyValuePairExtensions.cs
+++ b/LLama/Extensions/KeyValuePairExtensions.cs
@@ -19,5 +19,7 @@ internal static class KeyValuePairExtensions
first = pair.Key;
second = pair.Value;
}
+#elif !NET6_0_OR_GREATER && !NETSTANDARD2_1_OR_GREATER
+ #error Target framework not supported!
#endif
}
\ No newline at end of file
diff --git a/LLama/Extensions/ListExtensions.cs b/LLama/Extensions/ListExtensions.cs
index 11a1d4f0..eb30a07a 100644
--- a/LLama/Extensions/ListExtensions.cs
+++ b/LLama/Extensions/ListExtensions.cs
@@ -5,7 +5,7 @@ namespace LLama.Extensions
{
internal static class ListExtensions
{
-#if NETSTANDARD2_0
+#if !NET6_0_OR_GREATER
public static void EnsureCapacity(this List list, int capacity)
{
if (list.Capacity < capacity)
diff --git a/LLama/Native/LLamaContextParams.cs b/LLama/Native/LLamaContextParams.cs
index f1ba569d..c0f2afa2 100644
--- a/LLama/Native/LLamaContextParams.cs
+++ b/LLama/Native/LLamaContextParams.cs
@@ -22,7 +22,7 @@ namespace LLama.Native
public uint seed;
///
- /// text context
+ /// text context, 0 = from model
///
public uint n_ctx;