Debugging slowdown by removing some things:

- Removed all `record struct` uses in native code - Removed usage of `readonly` in native structs Minor fix: - Added sequential layout to `LLamaModelQuantizeParams`
2 years ago · b6d242193e
--- a/LLama/Native/LLamaBeamView.cs
+++ b/LLama/Native/LLamaBeamView.cs
@@ -11,13 +11,13 @@ using llama_token = Int32;
 [StructLayout(LayoutKind.Sequential)]
 public struct LLamaBeamView
 {
    private readonly unsafe llama_token* tokens;
    private readonly nint n_tokens;
    private unsafe llama_token* tokens;
    private nint n_tokens;
    /// <summary>
    /// Cumulative beam probability (renormalized relative to all beams)
    /// </summary>
    public readonly float CumulativeProbability;
    public float CumulativeProbability;
    /// <summary>
    /// Callback should set this to true when a beam is at end-of-beam.
--- a/LLama/Native/LLamaBeamsState.cs
+++ b/LLama/Native/LLamaBeamsState.cs
@@ -9,27 +9,27 @@ namespace LLama.Native;
 /// (e.g. beams[0]) as they will be removed (shifted) from all beams in all subsequent callbacks.
 /// </summary>
 [StructLayout(LayoutKind.Sequential)]
 public readonly struct LLamaBeamsState
 public struct LLamaBeamsState
 {
    /// <summary>
    /// The state of each individual beam
    /// </summary>
    private readonly unsafe LLamaBeamView* beam_views;
    private unsafe LLamaBeamView* beam_views;
    /// <summary>
    /// Number of elements in beam_views
    /// </summary>
    private readonly nint n_beams;
    private nint n_beams;
    /// <summary>
    /// Current max length of prefix tokens shared by all beams.
    /// </summary>
    public readonly ulong CommonPrefixLength;
    public ulong CommonPrefixLength;
    /// <summary>
    /// True iff this is the last callback invocation.
    /// </summary>
    public readonly bool LastCall;
    public bool LastCall;
    /// <summary>
    /// The current state of each beam
--- a/LLama/Native/LLamaGrammarElement.cs
+++ b/LLama/Native/LLamaGrammarElement.cs
@@ -52,18 +52,18 @@ namespace LLama.Native
    /// </summary>
    [StructLayout(LayoutKind.Sequential)]
    [DebuggerDisplay("{Type} {Value}")]
    public readonly struct LLamaGrammarElement
    public struct LLamaGrammarElement
        : IEquatable<LLamaGrammarElement>
    {
        /// <summary>
        /// The type of this element
        /// </summary>
        public readonly LLamaGrammarElementType Type;
        public LLamaGrammarElementType Type;
        /// <summary>
        /// Unicode code point or rule ID
        /// </summary>
        public readonly uint Value;
        public uint Value;
        /// <summary>
        /// Construct a new LLamaGrammarElement
--- a/LLama/Native/LLamaModelQuantizeParams.cs
+++ b/LLama/Native/LLamaModelQuantizeParams.cs
@@ -1,10 +1,12 @@
 using System;
 using System.Runtime.InteropServices;
 namespace LLama.Native
 {
    /// <summary>
    /// Quantizer parameters used in the native API
    /// </summary>
    [StructLayout(LayoutKind.Sequential)]
    public struct LLamaModelQuantizeParams
    {
        /// <summary>
--- a/LLama/Native/LLamaNativeBatch.cs
+++ b/LLama/Native/LLamaNativeBatch.cs
@@ -21,32 +21,32 @@ public unsafe struct LLamaNativeBatch
    /// <summary>
    /// Either `n_tokens` of `llama_token`, or `NULL`, depending on how this batch was created
    /// </summary>
    public readonly llama_token* token;
    public llama_token* token;
    /// <summary>
    /// Either `n_tokens * embd * sizeof(float)` or `NULL`, depending on how this batch was created
    /// </summary>
    public readonly float* embd;
    public float* embd;
    /// <summary>
    /// the positions of the respective token in the sequence
    /// </summary>
    public readonly LLamaPos* pos;
    public LLamaPos* pos;
    /// <summary>
    /// https://github.com/ggerganov/llama.cpp/blob/master/llama.h#L139 ???
    /// </summary>
    public readonly int* n_seq_id;
    public int* n_seq_id;
    /// <summary>
    /// the sequence to which the respective token belongs
    /// </summary>
    public readonly LLamaSeqId** seq_id;
    public LLamaSeqId** seq_id;
    /// <summary>
    /// if zero, the logits for the respective token will not be output
    /// </summary>
    public readonly byte* logits;
    public byte* logits;
    // Note from llama.cpp:
    // > helpers for smooth API transition - can be deprecated in the future
--- a/LLama/Native/LLamaPos.cs
+++ b/LLama/Native/LLamaPos.cs
@@ -1,14 +1,26 @@
 namespace LLama.Native;
 using System.Runtime.InteropServices;
 namespace LLama.Native;
 /// <summary>
 /// Indicates position in a sequence
 /// </summary>
 public readonly record struct LLamaPos(int Value)
 [StructLayout(LayoutKind.Sequential)]
 public struct LLamaPos
 {
    /// <summary>
    /// The raw value
    /// </summary>
    public readonly int Value = Value;
    public int Value;
    /// <summary>
    /// Create a new LLamaPos
    /// </summary>
    /// <param name="value"></param>
    public LLamaPos(int value)
    {
        Value = value;
    }
    /// <summary>
    /// Convert a LLamaPos into an integer (extract the raw value)
--- a/LLama/Native/LLamaSeqId.cs
+++ b/LLama/Native/LLamaSeqId.cs
@@ -1,15 +1,26 @@
 namespace LLama.Native;
 using System.Runtime.InteropServices;
 namespace LLama.Native;
 /// <summary>
 /// ID for a sequence in a batch
 /// </summary>
 /// <param name="Value"></param>
 public record struct LLamaSeqId(int Value)
 [StructLayout(LayoutKind.Sequential)]
 public struct LLamaSeqId
 {
    /// <summary>
    /// The raw value
    /// </summary>
    public int Value = Value;
    public int Value;
    /// <summary>
    /// Create a new LLamaSeqId 
    /// </summary>
    /// <param name="value"></param>
    public LLamaSeqId(int value)
    {
        Value = value;
    }
    /// <summary>
    /// Convert a LLamaSeqId into an integer (extract the raw value)
--- a/LLama/Native/LLamaTokenData.cs
+++ b/LLama/Native/LLamaTokenData.cs
@@ -5,24 +5,34 @@ namespace LLama.Native;
 /// <summary>
 /// A single token along with probability of this token being selected
 /// </summary>
 /// <param name="id"></param>
 /// <param name="logit"></param>
 /// <param name="p"></param>
 [StructLayout(LayoutKind.Sequential)]
 public record struct LLamaTokenData(int id, float logit, float p)
 public struct LLamaTokenData
 {
    /// <summary>
    /// token id
    /// </summary>
    public int id = id;
    public int id;
    /// <summary>
    /// log-odds of the token
    /// </summary>
    public float logit = logit;
    public float logit;
    /// <summary>
    /// probability of the token
    /// </summary>
    public float p = p;
    public float p;
    /// <summary>
    /// Create a new LLamaTokenData
    /// </summary>
    /// <param name="id"></param>
    /// <param name="logit"></param>
    /// <param name="p"></param>
    public LLamaTokenData(int id, float logit, float p)
    {
        this.id = id;
        this.logit = logit;
        this.p = p;
    }
 }