diff --git a/LLama/Abstractions/IModelParams.cs b/LLama/Abstractions/IModelParams.cs index 3ed4a84f..4e4a2b4e 100644 --- a/LLama/Abstractions/IModelParams.cs +++ b/LLama/Abstractions/IModelParams.cs @@ -2,6 +2,9 @@ namespace LLama.Abstractions { + /// + /// The parameters for initializing a LLama model. + /// public interface IModelParams { /// diff --git a/LLama/ChatSession.cs b/LLama/ChatSession.cs index 56ef47f5..5ed6a459 100644 --- a/LLama/ChatSession.cs +++ b/LLama/ChatSession.cs @@ -5,6 +5,7 @@ using System.IO; using System.Runtime.CompilerServices; using System.Text; using System.Threading; +using System.Threading.Tasks; namespace LLama { @@ -13,10 +14,12 @@ namespace LLama /// public class ChatSession { - private ILLamaExecutor _executor; - private ChatHistory _history; - private static readonly string _executorStateFilename = "ExecutorState.json"; - private static readonly string _modelStateFilename = "ModelState.st"; + private readonly ILLamaExecutor _executor; + private readonly ChatHistory _history; + + private const string _executorStateFilename = "ExecutorState.json"; + private const string _modelStateFilename = "ModelState.st"; + /// /// The executor for this session. /// @@ -227,7 +230,7 @@ namespace LLama private async IAsyncEnumerable ChatAsyncInternal(string prompt, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { var results = _executor.InferAsync(prompt, inferenceParams, cancellationToken); - await foreach (var item in OutputTransform.TransformAsync(results)) + await foreach (var item in OutputTransform.TransformAsync(results).WithCancellation(cancellationToken)) { yield return item; } diff --git a/LLama/Common/FixedSizeQueue.cs b/LLama/Common/FixedSizeQueue.cs index 28ae53fc..2c331e5a 100644 --- a/LLama/Common/FixedSizeQueue.cs +++ b/LLama/Common/FixedSizeQueue.cs @@ -15,9 +15,20 @@ namespace LLama.Common private readonly int _maxSize; private readonly List _storage; + /// + /// Number of items in this queue + /// public int Count => _storage.Count; + + /// + /// Maximum number of items allowed in this queue + /// public int Capacity => _maxSize; + /// + /// Create a new queue + /// + /// the maximum number of items to store in this queue public FixedSizeQueue(int size) { _maxSize = size; diff --git a/LLama/Exceptions/RuntimeError.cs b/LLama/Exceptions/RuntimeError.cs index 789f035a..6b839ff0 100644 --- a/LLama/Exceptions/RuntimeError.cs +++ b/LLama/Exceptions/RuntimeError.cs @@ -2,14 +2,16 @@ namespace LLama.Exceptions { - public class RuntimeError: Exception + public class RuntimeError + : Exception { public RuntimeError() { } - public RuntimeError(string message): base(message) + public RuntimeError(string message) + : base(message) { } diff --git a/LLama/LLamaContext.cs b/LLama/LLamaContext.cs index 9705912f..d83baf3d 100644 --- a/LLama/LLamaContext.cs +++ b/LLama/LLamaContext.cs @@ -1,7 +1,6 @@ using LLama.Exceptions; using LLama.Native; using System; -using System.Buffers; using System.Collections.Generic; using System.Linq; using System.Text; @@ -421,7 +420,7 @@ namespace LLama // the list. Instead rent an array and copy the data into it. This avoids an allocation, but can't // avoid the copying. - var rented = ArrayPool.Shared.Rent(tokens.Count); + var rented = System.Buffers.ArrayPool.Shared.Rent(tokens.Count); try { tokens.CopyTo(rented, 0); @@ -429,7 +428,7 @@ namespace LLama } finally { - ArrayPool.Shared.Return(rented); + System.Buffers.ArrayPool.Shared.Return(rented); } #endif } diff --git a/LLama/LLamaInstructExecutor.cs b/LLama/LLamaInstructExecutor.cs index 7d9a4ed4..1a84ad2f 100644 --- a/LLama/LLamaInstructExecutor.cs +++ b/LLama/LLamaInstructExecutor.cs @@ -84,16 +84,16 @@ namespace LLama /// public override void SaveState(string filename) { - InstructExecutorState state = (InstructExecutorState)GetStateData(); - using (FileStream fs = new FileStream(filename, FileMode.OpenOrCreate, FileAccess.Write)) + var state = (InstructExecutorState)GetStateData(); + using (var fs = new FileStream(filename, FileMode.OpenOrCreate, FileAccess.Write)) { - JsonSerializer.Serialize(fs, state); + JsonSerializer.Serialize(fs, state); } } /// public override void LoadState(string filename) { - using (FileStream fs = new FileStream(filename, FileMode.Open, FileAccess.Read)) + using (var fs = new FileStream(filename, FileMode.Open, FileAccess.Read)) { var state = JsonSerializer.Deserialize(fs); LoadState(state); diff --git a/LLama/Native/LLamaContextParams.cs b/LLama/Native/LLamaContextParams.cs index 4e8a02cf..74340c42 100644 --- a/LLama/Native/LLamaContextParams.cs +++ b/LLama/Native/LLamaContextParams.cs @@ -3,8 +3,16 @@ using System.Runtime.InteropServices; namespace LLama.Native { + /// + /// Called by llama.cpp with a progress value between 0 and 1 + /// + /// + /// public delegate void LlamaProgressCallback(float progress, IntPtr ctx); + /// + /// A C# representation of the llama.cpp `llama_context_params` struct + /// [StructLayout(LayoutKind.Sequential)] public struct LLamaContextParams { @@ -48,7 +56,6 @@ namespace LLama.Native /// public nint tensor_split; - /// /// ref: https://github.com/ggerganov/llama.cpp/pull/2054 /// RoPE base frequency @@ -71,7 +78,6 @@ namespace LLama.Native /// public IntPtr progress_callback_user_data; - /// /// if true, reduce VRAM usage at the cost of performance /// diff --git a/LLama/Native/SafeLlamaModelHandle.cs b/LLama/Native/SafeLlamaModelHandle.cs index d9afb6ee..665ad59f 100644 --- a/LLama/Native/SafeLlamaModelHandle.cs +++ b/LLama/Native/SafeLlamaModelHandle.cs @@ -1,5 +1,4 @@ using System; -using System.Diagnostics; using System.Text; using LLama.Exceptions; diff --git a/LLama/OldVersion/ChatSession.cs b/LLama/OldVersion/ChatSession.cs index 42f589b3..f8409d30 100644 --- a/LLama/OldVersion/ChatSession.cs +++ b/LLama/OldVersion/ChatSession.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.IO; #pragma warning disable +// ReSharper disable all namespace LLama.OldVersion { diff --git a/LLama/OldVersion/IChatModel.cs b/LLama/OldVersion/IChatModel.cs index 697e05cd..de32fc09 100644 --- a/LLama/OldVersion/IChatModel.cs +++ b/LLama/OldVersion/IChatModel.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; #pragma warning disable +// ReSharper disable all namespace LLama.OldVersion { diff --git a/LLama/OldVersion/LLamaEmbedder.cs b/LLama/OldVersion/LLamaEmbedder.cs index 04a8abc9..7b6aedb6 100644 --- a/LLama/OldVersion/LLamaEmbedder.cs +++ b/LLama/OldVersion/LLamaEmbedder.cs @@ -3,6 +3,7 @@ using System; using LLama.Exceptions; #pragma warning disable +// ReSharper disable all namespace LLama.OldVersion { diff --git a/LLama/OldVersion/LLamaModel.cs b/LLama/OldVersion/LLamaModel.cs index 4a1a0b2c..ec528ec4 100644 --- a/LLama/OldVersion/LLamaModel.cs +++ b/LLama/OldVersion/LLamaModel.cs @@ -10,6 +10,7 @@ using System.Text; using LLama.Common; #pragma warning disable +// ReSharper disable all namespace LLama.OldVersion { @@ -32,7 +33,6 @@ namespace LLama.OldVersion bool _is_interacting; bool _is_antiprompt; bool _input_echo; - bool _verbose; // HACK - because session saving incurs a non-negligible delay, for now skip re-saving session // if we loaded a session with at least 75% similarity. It's currently just used to speed up the @@ -45,17 +45,8 @@ namespace LLama.OldVersion List _embed; public string Name { get; set; } - public bool Verbose - { - get - { - return _verbose; - } - set - { - _verbose = value; - } - } + public bool Verbose { get; set; } + public SafeLLamaContextHandle NativeHandle => _ctx; /// @@ -178,7 +169,7 @@ namespace LLama.OldVersion { Name = name; _params = @params; - _verbose = verbose; + Verbose = verbose; _ctx = Utils.llama_init_from_gpt_params(ref _params); // Add a space in front of the first character to match OG llama tokenizer behavior @@ -514,7 +505,7 @@ namespace LLama.OldVersion } if (_is_interacting) { - if (_verbose) + if (Verbose) { LLamaDefaultLogger.Default.Warn("In interacting when calling the model, automatically changed it."); } @@ -625,7 +616,7 @@ namespace LLama.OldVersion NativeApi.llama_save_session_file(_ctx, _path_session, _session_tokens.ToArray(), (ulong)_session_tokens.Count); } - llama_token id = 0; + llama_token id; { var n_vocab = NativeApi.llama_n_vocab(_ctx); diff --git a/LLama/OldVersion/LLamaParams.cs b/LLama/OldVersion/LLamaParams.cs index f58daa0c..2fa512ad 100644 --- a/LLama/OldVersion/LLamaParams.cs +++ b/LLama/OldVersion/LLamaParams.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; #pragma warning disable +// ReSharper disable all namespace LLama.OldVersion { @@ -62,7 +63,7 @@ namespace LLama.OldVersion public LLamaParams(int seed = 0, int n_threads = -1, int n_predict = -1, int n_ctx = 512, int n_batch = 512, int n_keep = 0, int n_gpu_layers = -1, - Dictionary logit_bias = null, int top_k = 40, float top_p = 0.95f, + Dictionary? logit_bias = null, int top_k = 40, float top_p = 0.95f, float tfs_z = 1.00f, float typical_p = 1.00f, float temp = 0.80f, float repeat_penalty = 1.10f, int repeat_last_n = 64, float frequency_penalty = 0.00f, float presence_penalty = 0.00f, int mirostat = 0, float mirostat_tau = 5.00f, float mirostat_eta = 0.10f, diff --git a/LLama/OldVersion/LLamaTypes.cs b/LLama/OldVersion/LLamaTypes.cs index c6823f56..0cc4ed59 100644 --- a/LLama/OldVersion/LLamaTypes.cs +++ b/LLama/OldVersion/LLamaTypes.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; #pragma warning disable +// ReSharper disable all namespace LLama.OldVersion { diff --git a/LLama/OldVersion/Utils.cs b/LLama/OldVersion/Utils.cs index ec85ca30..5aa7876f 100644 --- a/LLama/OldVersion/Utils.cs +++ b/LLama/OldVersion/Utils.cs @@ -8,6 +8,7 @@ using System.Runtime.InteropServices; using System.IO; #pragma warning disable +// ReSharper disable all namespace LLama.OldVersion { @@ -56,7 +57,7 @@ namespace LLama.OldVersion return res.Take(n).ToList(); } - public unsafe static Span llama_get_logits(SafeLLamaContextHandle ctx, int length) + public static unsafe Span llama_get_logits(SafeLLamaContextHandle ctx, int length) { var logits = NativeApi.llama_get_logits(ctx); return new Span(logits, length); @@ -67,21 +68,24 @@ namespace LLama.OldVersion #if NET6_0_OR_GREATER return Marshal.PtrToStringUTF8(ptr); #else - byte* tp = (byte*)ptr.ToPointer(); - List bytes = new(); - while (true) + unsafe { - byte c = *tp++; - if (c == '\0') + byte* tp = (byte*)ptr.ToPointer(); + List bytes = new(); + while (true) { - break; - } - else - { - bytes.Add(c); + byte c = *tp++; + if (c == '\0') + { + break; + } + else + { + bytes.Add(c); + } } + return Encoding.UTF8.GetString(bytes.ToArray()); } - return Encoding.UTF8.GetString(bytes.ToArray()); #endif } diff --git a/LLama/Utils.cs b/LLama/Utils.cs index bfce9f3b..f3584c81 100644 --- a/LLama/Utils.cs +++ b/LLama/Utils.cs @@ -10,9 +10,15 @@ namespace LLama { using llama_token = Int32; + /// + /// Assorted llama utilities + /// public static class Utils { + [Obsolete("Use LLamaWeights.LoadFromFile and LLamaWeights.CreateContext instead")] + #pragma warning disable CS1591 // Missing XML comment for publicly visible type or member public static SafeLLamaContextHandle InitLLamaContextFromModelParams(IModelParams @params) + #pragma warning restore CS1591 // Missing XML comment for publicly visible type or member { using var weights = LLamaWeights.LoadFromFile(@params);