diff --git a/LLama/Abstractions/IModelParams.cs b/LLama/Abstractions/IModelParams.cs
index 3ed4a84f..4e4a2b4e 100644
--- a/LLama/Abstractions/IModelParams.cs
+++ b/LLama/Abstractions/IModelParams.cs
@@ -2,6 +2,9 @@
namespace LLama.Abstractions
{
+ ///
+ /// The parameters for initializing a LLama model.
+ ///
public interface IModelParams
{
///
diff --git a/LLama/ChatSession.cs b/LLama/ChatSession.cs
index 56ef47f5..5ed6a459 100644
--- a/LLama/ChatSession.cs
+++ b/LLama/ChatSession.cs
@@ -5,6 +5,7 @@ using System.IO;
using System.Runtime.CompilerServices;
using System.Text;
using System.Threading;
+using System.Threading.Tasks;
namespace LLama
{
@@ -13,10 +14,12 @@ namespace LLama
///
public class ChatSession
{
- private ILLamaExecutor _executor;
- private ChatHistory _history;
- private static readonly string _executorStateFilename = "ExecutorState.json";
- private static readonly string _modelStateFilename = "ModelState.st";
+ private readonly ILLamaExecutor _executor;
+ private readonly ChatHistory _history;
+
+ private const string _executorStateFilename = "ExecutorState.json";
+ private const string _modelStateFilename = "ModelState.st";
+
///
/// The executor for this session.
///
@@ -227,7 +230,7 @@ namespace LLama
private async IAsyncEnumerable ChatAsyncInternal(string prompt, IInferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
var results = _executor.InferAsync(prompt, inferenceParams, cancellationToken);
- await foreach (var item in OutputTransform.TransformAsync(results))
+ await foreach (var item in OutputTransform.TransformAsync(results).WithCancellation(cancellationToken))
{
yield return item;
}
diff --git a/LLama/Common/FixedSizeQueue.cs b/LLama/Common/FixedSizeQueue.cs
index 28ae53fc..2c331e5a 100644
--- a/LLama/Common/FixedSizeQueue.cs
+++ b/LLama/Common/FixedSizeQueue.cs
@@ -15,9 +15,20 @@ namespace LLama.Common
private readonly int _maxSize;
private readonly List _storage;
+ ///
+ /// Number of items in this queue
+ ///
public int Count => _storage.Count;
+
+ ///
+ /// Maximum number of items allowed in this queue
+ ///
public int Capacity => _maxSize;
+ ///
+ /// Create a new queue
+ ///
+ /// the maximum number of items to store in this queue
public FixedSizeQueue(int size)
{
_maxSize = size;
diff --git a/LLama/Exceptions/RuntimeError.cs b/LLama/Exceptions/RuntimeError.cs
index 789f035a..6b839ff0 100644
--- a/LLama/Exceptions/RuntimeError.cs
+++ b/LLama/Exceptions/RuntimeError.cs
@@ -2,14 +2,16 @@
namespace LLama.Exceptions
{
- public class RuntimeError: Exception
+ public class RuntimeError
+ : Exception
{
public RuntimeError()
{
}
- public RuntimeError(string message): base(message)
+ public RuntimeError(string message)
+ : base(message)
{
}
diff --git a/LLama/LLamaContext.cs b/LLama/LLamaContext.cs
index 9705912f..d83baf3d 100644
--- a/LLama/LLamaContext.cs
+++ b/LLama/LLamaContext.cs
@@ -1,7 +1,6 @@
using LLama.Exceptions;
using LLama.Native;
using System;
-using System.Buffers;
using System.Collections.Generic;
using System.Linq;
using System.Text;
@@ -421,7 +420,7 @@ namespace LLama
// the list. Instead rent an array and copy the data into it. This avoids an allocation, but can't
// avoid the copying.
- var rented = ArrayPool.Shared.Rent(tokens.Count);
+ var rented = System.Buffers.ArrayPool.Shared.Rent(tokens.Count);
try
{
tokens.CopyTo(rented, 0);
@@ -429,7 +428,7 @@ namespace LLama
}
finally
{
- ArrayPool.Shared.Return(rented);
+ System.Buffers.ArrayPool.Shared.Return(rented);
}
#endif
}
diff --git a/LLama/LLamaInstructExecutor.cs b/LLama/LLamaInstructExecutor.cs
index 7d9a4ed4..1a84ad2f 100644
--- a/LLama/LLamaInstructExecutor.cs
+++ b/LLama/LLamaInstructExecutor.cs
@@ -84,16 +84,16 @@ namespace LLama
///
public override void SaveState(string filename)
{
- InstructExecutorState state = (InstructExecutorState)GetStateData();
- using (FileStream fs = new FileStream(filename, FileMode.OpenOrCreate, FileAccess.Write))
+ var state = (InstructExecutorState)GetStateData();
+ using (var fs = new FileStream(filename, FileMode.OpenOrCreate, FileAccess.Write))
{
- JsonSerializer.Serialize(fs, state);
+ JsonSerializer.Serialize(fs, state);
}
}
///
public override void LoadState(string filename)
{
- using (FileStream fs = new FileStream(filename, FileMode.Open, FileAccess.Read))
+ using (var fs = new FileStream(filename, FileMode.Open, FileAccess.Read))
{
var state = JsonSerializer.Deserialize(fs);
LoadState(state);
diff --git a/LLama/Native/LLamaContextParams.cs b/LLama/Native/LLamaContextParams.cs
index 4e8a02cf..74340c42 100644
--- a/LLama/Native/LLamaContextParams.cs
+++ b/LLama/Native/LLamaContextParams.cs
@@ -3,8 +3,16 @@ using System.Runtime.InteropServices;
namespace LLama.Native
{
+ ///
+ /// Called by llama.cpp with a progress value between 0 and 1
+ ///
+ ///
+ ///
public delegate void LlamaProgressCallback(float progress, IntPtr ctx);
+ ///
+ /// A C# representation of the llama.cpp `llama_context_params` struct
+ ///
[StructLayout(LayoutKind.Sequential)]
public struct LLamaContextParams
{
@@ -48,7 +56,6 @@ namespace LLama.Native
///
public nint tensor_split;
-
///
/// ref: https://github.com/ggerganov/llama.cpp/pull/2054
/// RoPE base frequency
@@ -71,7 +78,6 @@ namespace LLama.Native
///
public IntPtr progress_callback_user_data;
-
///
/// if true, reduce VRAM usage at the cost of performance
///
diff --git a/LLama/Native/SafeLlamaModelHandle.cs b/LLama/Native/SafeLlamaModelHandle.cs
index d9afb6ee..665ad59f 100644
--- a/LLama/Native/SafeLlamaModelHandle.cs
+++ b/LLama/Native/SafeLlamaModelHandle.cs
@@ -1,5 +1,4 @@
using System;
-using System.Diagnostics;
using System.Text;
using LLama.Exceptions;
diff --git a/LLama/OldVersion/ChatSession.cs b/LLama/OldVersion/ChatSession.cs
index 42f589b3..f8409d30 100644
--- a/LLama/OldVersion/ChatSession.cs
+++ b/LLama/OldVersion/ChatSession.cs
@@ -3,6 +3,7 @@ using System.Collections.Generic;
using System.IO;
#pragma warning disable
+// ReSharper disable all
namespace LLama.OldVersion
{
diff --git a/LLama/OldVersion/IChatModel.cs b/LLama/OldVersion/IChatModel.cs
index 697e05cd..de32fc09 100644
--- a/LLama/OldVersion/IChatModel.cs
+++ b/LLama/OldVersion/IChatModel.cs
@@ -2,6 +2,7 @@
using System.Collections.Generic;
#pragma warning disable
+// ReSharper disable all
namespace LLama.OldVersion
{
diff --git a/LLama/OldVersion/LLamaEmbedder.cs b/LLama/OldVersion/LLamaEmbedder.cs
index 04a8abc9..7b6aedb6 100644
--- a/LLama/OldVersion/LLamaEmbedder.cs
+++ b/LLama/OldVersion/LLamaEmbedder.cs
@@ -3,6 +3,7 @@ using System;
using LLama.Exceptions;
#pragma warning disable
+// ReSharper disable all
namespace LLama.OldVersion
{
diff --git a/LLama/OldVersion/LLamaModel.cs b/LLama/OldVersion/LLamaModel.cs
index 4a1a0b2c..ec528ec4 100644
--- a/LLama/OldVersion/LLamaModel.cs
+++ b/LLama/OldVersion/LLamaModel.cs
@@ -10,6 +10,7 @@ using System.Text;
using LLama.Common;
#pragma warning disable
+// ReSharper disable all
namespace LLama.OldVersion
{
@@ -32,7 +33,6 @@ namespace LLama.OldVersion
bool _is_interacting;
bool _is_antiprompt;
bool _input_echo;
- bool _verbose;
// HACK - because session saving incurs a non-negligible delay, for now skip re-saving session
// if we loaded a session with at least 75% similarity. It's currently just used to speed up the
@@ -45,17 +45,8 @@ namespace LLama.OldVersion
List _embed;
public string Name { get; set; }
- public bool Verbose
- {
- get
- {
- return _verbose;
- }
- set
- {
- _verbose = value;
- }
- }
+ public bool Verbose { get; set; }
+
public SafeLLamaContextHandle NativeHandle => _ctx;
///
@@ -178,7 +169,7 @@ namespace LLama.OldVersion
{
Name = name;
_params = @params;
- _verbose = verbose;
+ Verbose = verbose;
_ctx = Utils.llama_init_from_gpt_params(ref _params);
// Add a space in front of the first character to match OG llama tokenizer behavior
@@ -514,7 +505,7 @@ namespace LLama.OldVersion
}
if (_is_interacting)
{
- if (_verbose)
+ if (Verbose)
{
LLamaDefaultLogger.Default.Warn("In interacting when calling the model, automatically changed it.");
}
@@ -625,7 +616,7 @@ namespace LLama.OldVersion
NativeApi.llama_save_session_file(_ctx, _path_session, _session_tokens.ToArray(), (ulong)_session_tokens.Count);
}
- llama_token id = 0;
+ llama_token id;
{
var n_vocab = NativeApi.llama_n_vocab(_ctx);
diff --git a/LLama/OldVersion/LLamaParams.cs b/LLama/OldVersion/LLamaParams.cs
index f58daa0c..2fa512ad 100644
--- a/LLama/OldVersion/LLamaParams.cs
+++ b/LLama/OldVersion/LLamaParams.cs
@@ -2,6 +2,7 @@
using System.Collections.Generic;
#pragma warning disable
+// ReSharper disable all
namespace LLama.OldVersion
{
@@ -62,7 +63,7 @@ namespace LLama.OldVersion
public LLamaParams(int seed = 0, int n_threads = -1, int n_predict = -1,
int n_ctx = 512, int n_batch = 512, int n_keep = 0, int n_gpu_layers = -1,
- Dictionary logit_bias = null, int top_k = 40, float top_p = 0.95f,
+ Dictionary? logit_bias = null, int top_k = 40, float top_p = 0.95f,
float tfs_z = 1.00f, float typical_p = 1.00f, float temp = 0.80f, float repeat_penalty = 1.10f,
int repeat_last_n = 64, float frequency_penalty = 0.00f, float presence_penalty = 0.00f,
int mirostat = 0, float mirostat_tau = 5.00f, float mirostat_eta = 0.10f,
diff --git a/LLama/OldVersion/LLamaTypes.cs b/LLama/OldVersion/LLamaTypes.cs
index c6823f56..0cc4ed59 100644
--- a/LLama/OldVersion/LLamaTypes.cs
+++ b/LLama/OldVersion/LLamaTypes.cs
@@ -2,6 +2,7 @@
using System.Collections.Generic;
#pragma warning disable
+// ReSharper disable all
namespace LLama.OldVersion
{
diff --git a/LLama/OldVersion/Utils.cs b/LLama/OldVersion/Utils.cs
index ec85ca30..5aa7876f 100644
--- a/LLama/OldVersion/Utils.cs
+++ b/LLama/OldVersion/Utils.cs
@@ -8,6 +8,7 @@ using System.Runtime.InteropServices;
using System.IO;
#pragma warning disable
+// ReSharper disable all
namespace LLama.OldVersion
{
@@ -56,7 +57,7 @@ namespace LLama.OldVersion
return res.Take(n).ToList();
}
- public unsafe static Span llama_get_logits(SafeLLamaContextHandle ctx, int length)
+ public static unsafe Span llama_get_logits(SafeLLamaContextHandle ctx, int length)
{
var logits = NativeApi.llama_get_logits(ctx);
return new Span(logits, length);
@@ -67,21 +68,24 @@ namespace LLama.OldVersion
#if NET6_0_OR_GREATER
return Marshal.PtrToStringUTF8(ptr);
#else
- byte* tp = (byte*)ptr.ToPointer();
- List bytes = new();
- while (true)
+ unsafe
{
- byte c = *tp++;
- if (c == '\0')
+ byte* tp = (byte*)ptr.ToPointer();
+ List bytes = new();
+ while (true)
{
- break;
- }
- else
- {
- bytes.Add(c);
+ byte c = *tp++;
+ if (c == '\0')
+ {
+ break;
+ }
+ else
+ {
+ bytes.Add(c);
+ }
}
+ return Encoding.UTF8.GetString(bytes.ToArray());
}
- return Encoding.UTF8.GetString(bytes.ToArray());
#endif
}
diff --git a/LLama/Utils.cs b/LLama/Utils.cs
index bfce9f3b..f3584c81 100644
--- a/LLama/Utils.cs
+++ b/LLama/Utils.cs
@@ -10,9 +10,15 @@ namespace LLama
{
using llama_token = Int32;
+ ///
+ /// Assorted llama utilities
+ ///
public static class Utils
{
+ [Obsolete("Use LLamaWeights.LoadFromFile and LLamaWeights.CreateContext instead")]
+ #pragma warning disable CS1591 // Missing XML comment for publicly visible type or member
public static SafeLLamaContextHandle InitLLamaContextFromModelParams(IModelParams @params)
+ #pragma warning restore CS1591 // Missing XML comment for publicly visible type or member
{
using var weights = LLamaWeights.LoadFromFile(@params);