diff --git a/LLama.Unittest/LLamaEmbedderTests.cs b/LLama.Unittest/LLamaEmbedderTests.cs
index b8fede8f..4c8fb37f 100644
--- a/LLama.Unittest/LLamaEmbedderTests.cs
+++ b/LLama.Unittest/LLamaEmbedderTests.cs
@@ -9,7 +9,10 @@ public sealed class LLamaEmbedderTests
public LLamaEmbedderTests()
{
- var @params = new ModelParams(Constants.ModelPath);
+ var @params = new ModelParams(Constants.ModelPath)
+ {
+ EmbeddingMode = true,
+ };
using var weights = LLamaWeights.LoadFromFile(@params);
_embedder = new(weights, @params);
}
diff --git a/LLama/Abstractions/IContextParams.cs b/LLama/Abstractions/IContextParams.cs
index d09a6a7c..2f4e7fea 100644
--- a/LLama/Abstractions/IContextParams.cs
+++ b/LLama/Abstractions/IContextParams.cs
@@ -11,91 +11,91 @@ public interface IContextParams
///
/// Model context size (n_ctx)
///
- uint? ContextSize { get; set; }
+ uint? ContextSize { get; }
///
/// batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
///
- uint BatchSize { get; set; }
+ uint BatchSize { get; }
///
/// Seed for the random number generator (seed)
///
- uint Seed { get; set; }
+ uint Seed { get; }
///
/// Whether to use embedding mode. (embedding) Note that if this is set to true,
/// The LLamaModel won't produce text response anymore.
///
- bool EmbeddingMode { get; set; }
+ bool EmbeddingMode { get; }
///
/// RoPE base frequency (null to fetch from the model)
///
- float? RopeFrequencyBase { get; set; }
+ float? RopeFrequencyBase { get; }
///
/// RoPE frequency scaling factor (null to fetch from the model)
///
- float? RopeFrequencyScale { get; set; }
+ float? RopeFrequencyScale { get; }
///
/// The encoding to use for models
///
- Encoding Encoding { get; set; }
+ Encoding Encoding { get; }
///
/// Number of threads (null = autodetect) (n_threads)
///
- uint? Threads { get; set; }
+ uint? Threads { get; }
///
/// Number of threads to use for batch processing (null = autodetect) (n_threads)
///
- uint? BatchThreads { get; set; }
+ uint? BatchThreads { get; }
///
/// YaRN extrapolation mix factor (null = from model)
///
- float? YarnExtrapolationFactor { get; set; }
+ float? YarnExtrapolationFactor { get; }
///
/// YaRN magnitude scaling factor (null = from model)
///
- float? YarnAttentionFactor { get; set; }
+ float? YarnAttentionFactor { get; }
///
/// YaRN low correction dim (null = from model)
///
- float? YarnBetaFast { get; set; }
+ float? YarnBetaFast { get; }
///
/// YaRN high correction dim (null = from model)
///
- float? YarnBetaSlow { get; set; }
+ float? YarnBetaSlow { get; }
///
/// YaRN original context length (null = from model)
///
- uint? YarnOriginalContext { get; set; }
+ uint? YarnOriginalContext { get; }
///
/// YaRN scaling method to use.
///
- RopeScalingType? YarnScalingType { get; set; }
+ RopeScalingType? YarnScalingType { get; }
///
/// Override the type of the K cache
///
- GGMLType? TypeK { get; set; }
+ GGMLType? TypeK { get; }
///
/// Override the type of the V cache
///
- GGMLType? TypeV { get; set; }
+ GGMLType? TypeV { get; }
///
/// Whether to disable offloading the KQV cache to the GPU
///
- bool NoKqvOffload { get; set; }
+ bool NoKqvOffload { get; }
}
\ No newline at end of file
diff --git a/LLama/Abstractions/IModelParams.cs b/LLama/Abstractions/IModelParams.cs
index 902a37d2..3ef41bec 100644
--- a/LLama/Abstractions/IModelParams.cs
+++ b/LLama/Abstractions/IModelParams.cs
@@ -18,37 +18,37 @@ namespace LLama.Abstractions
///
/// the GPU that is used for scratch and small tensors
///
- int MainGpu { get; set; }
+ int MainGpu { get; }
///
/// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
///
- int GpuLayerCount { get; set; }
+ int GpuLayerCount { get; }
///
/// Use mmap for faster loads (use_mmap)
///
- bool UseMemorymap { get; set; }
+ bool UseMemorymap { get; }
///
/// Use mlock to keep model in memory (use_mlock)
///
- bool UseMemoryLock { get; set; }
+ bool UseMemoryLock { get; }
///
/// Model path (model)
///
- string ModelPath { get; set; }
+ string ModelPath { get; }
///
/// how split tensors should be distributed across GPUs
///
- TensorSplitsCollection TensorSplits { get; set; }
+ TensorSplitsCollection TensorSplits { get; }
///
/// Load vocab only (no weights)
///
- bool VocabOnly { get; set; }
+ bool VocabOnly { get; }
///
/// List of LoRA adapters to apply
@@ -58,7 +58,7 @@ namespace LLama.Abstractions
///
/// base model path for the lora adapter (lora_base)
///
- string LoraBase { get; set; }
+ string LoraBase { get; }
///
/// Override specific metadata items in the model
diff --git a/LLama/Extensions/IModelParamsExtensions.cs b/LLama/Extensions/IModelParamsExtensions.cs
index 5883fb46..f7fadece 100644
--- a/LLama/Extensions/IModelParamsExtensions.cs
+++ b/LLama/Extensions/IModelParamsExtensions.cs
@@ -25,14 +25,12 @@ public static class IModelParamsExtensions
throw new NotSupportedException("'UseMemoryLock' is not supported (llama_mlock_supported() == false)");
if (@params.UseMemorymap && !NativeApi.llama_mmap_supported())
throw new NotSupportedException("'UseMemorymap' is not supported (llama_mmap_supported() == false)");
- if (@params.GpuLayerCount < 0)
- @params.GpuLayerCount = int.MaxValue;
var disposer = new GroupDisposable();
result = NativeApi.llama_model_default_params();
result.main_gpu = @params.MainGpu;
- result.n_gpu_layers = @params.GpuLayerCount;
+ result.n_gpu_layers = @params.GpuLayerCount < 0 ? int.MaxValue : @params.GpuLayerCount;
result.use_mlock = @params.UseMemoryLock;
result.use_mmap = @params.UseMemorymap;
result.vocab_only = @params.VocabOnly;
diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs
index bccfd141..8dfc4aab 100644
--- a/LLama/LLamaEmbedder.cs
+++ b/LLama/LLamaEmbedder.cs
@@ -30,7 +30,9 @@ namespace LLama
///
public LLamaEmbedder(LLamaWeights weights, IContextParams @params, ILogger? logger = null)
{
- @params.EmbeddingMode = true;
+ if (!@params.EmbeddingMode)
+ throw new ArgumentException("EmbeddingMode must be true", nameof(@params));
+
Context = weights.CreateContext(@params, logger);
}