diff --git a/LLama.Unittest/LLamaEmbedderTests.cs b/LLama.Unittest/LLamaEmbedderTests.cs index b8fede8f..4c8fb37f 100644 --- a/LLama.Unittest/LLamaEmbedderTests.cs +++ b/LLama.Unittest/LLamaEmbedderTests.cs @@ -9,7 +9,10 @@ public sealed class LLamaEmbedderTests public LLamaEmbedderTests() { - var @params = new ModelParams(Constants.ModelPath); + var @params = new ModelParams(Constants.ModelPath) + { + EmbeddingMode = true, + }; using var weights = LLamaWeights.LoadFromFile(@params); _embedder = new(weights, @params); } diff --git a/LLama/Abstractions/IContextParams.cs b/LLama/Abstractions/IContextParams.cs index d09a6a7c..2f4e7fea 100644 --- a/LLama/Abstractions/IContextParams.cs +++ b/LLama/Abstractions/IContextParams.cs @@ -11,91 +11,91 @@ public interface IContextParams /// /// Model context size (n_ctx) /// - uint? ContextSize { get; set; } + uint? ContextSize { get; } /// /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch) /// - uint BatchSize { get; set; } + uint BatchSize { get; } /// /// Seed for the random number generator (seed) /// - uint Seed { get; set; } + uint Seed { get; } /// /// Whether to use embedding mode. (embedding) Note that if this is set to true, /// The LLamaModel won't produce text response anymore. /// - bool EmbeddingMode { get; set; } + bool EmbeddingMode { get; } /// /// RoPE base frequency (null to fetch from the model) /// - float? RopeFrequencyBase { get; set; } + float? RopeFrequencyBase { get; } /// /// RoPE frequency scaling factor (null to fetch from the model) /// - float? RopeFrequencyScale { get; set; } + float? RopeFrequencyScale { get; } /// /// The encoding to use for models /// - Encoding Encoding { get; set; } + Encoding Encoding { get; } /// /// Number of threads (null = autodetect) (n_threads) /// - uint? Threads { get; set; } + uint? Threads { get; } /// /// Number of threads to use for batch processing (null = autodetect) (n_threads) /// - uint? BatchThreads { get; set; } + uint? BatchThreads { get; } /// /// YaRN extrapolation mix factor (null = from model) /// - float? YarnExtrapolationFactor { get; set; } + float? YarnExtrapolationFactor { get; } /// /// YaRN magnitude scaling factor (null = from model) /// - float? YarnAttentionFactor { get; set; } + float? YarnAttentionFactor { get; } /// /// YaRN low correction dim (null = from model) /// - float? YarnBetaFast { get; set; } + float? YarnBetaFast { get; } /// /// YaRN high correction dim (null = from model) /// - float? YarnBetaSlow { get; set; } + float? YarnBetaSlow { get; } /// /// YaRN original context length (null = from model) /// - uint? YarnOriginalContext { get; set; } + uint? YarnOriginalContext { get; } /// /// YaRN scaling method to use. /// - RopeScalingType? YarnScalingType { get; set; } + RopeScalingType? YarnScalingType { get; } /// /// Override the type of the K cache /// - GGMLType? TypeK { get; set; } + GGMLType? TypeK { get; } /// /// Override the type of the V cache /// - GGMLType? TypeV { get; set; } + GGMLType? TypeV { get; } /// /// Whether to disable offloading the KQV cache to the GPU /// - bool NoKqvOffload { get; set; } + bool NoKqvOffload { get; } } \ No newline at end of file diff --git a/LLama/Abstractions/IModelParams.cs b/LLama/Abstractions/IModelParams.cs index 902a37d2..3ef41bec 100644 --- a/LLama/Abstractions/IModelParams.cs +++ b/LLama/Abstractions/IModelParams.cs @@ -18,37 +18,37 @@ namespace LLama.Abstractions /// /// the GPU that is used for scratch and small tensors /// - int MainGpu { get; set; } + int MainGpu { get; } /// /// Number of layers to run in VRAM / GPU memory (n_gpu_layers) /// - int GpuLayerCount { get; set; } + int GpuLayerCount { get; } /// /// Use mmap for faster loads (use_mmap) /// - bool UseMemorymap { get; set; } + bool UseMemorymap { get; } /// /// Use mlock to keep model in memory (use_mlock) /// - bool UseMemoryLock { get; set; } + bool UseMemoryLock { get; } /// /// Model path (model) /// - string ModelPath { get; set; } + string ModelPath { get; } /// /// how split tensors should be distributed across GPUs /// - TensorSplitsCollection TensorSplits { get; set; } + TensorSplitsCollection TensorSplits { get; } /// /// Load vocab only (no weights) /// - bool VocabOnly { get; set; } + bool VocabOnly { get; } /// /// List of LoRA adapters to apply @@ -58,7 +58,7 @@ namespace LLama.Abstractions /// /// base model path for the lora adapter (lora_base) /// - string LoraBase { get; set; } + string LoraBase { get; } /// /// Override specific metadata items in the model diff --git a/LLama/Extensions/IModelParamsExtensions.cs b/LLama/Extensions/IModelParamsExtensions.cs index 5883fb46..f7fadece 100644 --- a/LLama/Extensions/IModelParamsExtensions.cs +++ b/LLama/Extensions/IModelParamsExtensions.cs @@ -25,14 +25,12 @@ public static class IModelParamsExtensions throw new NotSupportedException("'UseMemoryLock' is not supported (llama_mlock_supported() == false)"); if (@params.UseMemorymap && !NativeApi.llama_mmap_supported()) throw new NotSupportedException("'UseMemorymap' is not supported (llama_mmap_supported() == false)"); - if (@params.GpuLayerCount < 0) - @params.GpuLayerCount = int.MaxValue; var disposer = new GroupDisposable(); result = NativeApi.llama_model_default_params(); result.main_gpu = @params.MainGpu; - result.n_gpu_layers = @params.GpuLayerCount; + result.n_gpu_layers = @params.GpuLayerCount < 0 ? int.MaxValue : @params.GpuLayerCount; result.use_mlock = @params.UseMemoryLock; result.use_mmap = @params.UseMemorymap; result.vocab_only = @params.VocabOnly; diff --git a/LLama/LLamaEmbedder.cs b/LLama/LLamaEmbedder.cs index bccfd141..8dfc4aab 100644 --- a/LLama/LLamaEmbedder.cs +++ b/LLama/LLamaEmbedder.cs @@ -30,7 +30,9 @@ namespace LLama /// public LLamaEmbedder(LLamaWeights weights, IContextParams @params, ILogger? logger = null) { - @params.EmbeddingMode = true; + if (!@params.EmbeddingMode) + throw new ArgumentException("EmbeddingMode must be true", nameof(@params)); + Context = weights.CreateContext(@params, logger); }