Browse Source

Debugging slowdown by removing some things:

- Removed all `record struct` uses in native code
 - Removed usage of `readonly` in native structs

Minor fix:
 - Added sequential layout to `LLamaModelQuantizeParams`
tags/v0.7.0^2
Martin Evans 2 years ago
parent
commit
b6d242193e
8 changed files with 66 additions and 31 deletions
  1. +3
    -3
      LLama/Native/LLamaBeamView.cs
  2. +5
    -5
      LLama/Native/LLamaBeamsState.cs
  3. +3
    -3
      LLama/Native/LLamaGrammarElement.cs
  4. +2
    -0
      LLama/Native/LLamaModelQuantizeParams.cs
  5. +6
    -6
      LLama/Native/LLamaNativeBatch.cs
  6. +15
    -3
      LLama/Native/LLamaPos.cs
  7. +15
    -4
      LLama/Native/LLamaSeqId.cs
  8. +17
    -7
      LLama/Native/LLamaTokenData.cs

+ 3
- 3
LLama/Native/LLamaBeamView.cs View File

@@ -11,13 +11,13 @@ using llama_token = Int32;
[StructLayout(LayoutKind.Sequential)] [StructLayout(LayoutKind.Sequential)]
public struct LLamaBeamView public struct LLamaBeamView
{ {
private readonly unsafe llama_token* tokens;
private readonly nint n_tokens;
private unsafe llama_token* tokens;
private nint n_tokens;


/// <summary> /// <summary>
/// Cumulative beam probability (renormalized relative to all beams) /// Cumulative beam probability (renormalized relative to all beams)
/// </summary> /// </summary>
public readonly float CumulativeProbability;
public float CumulativeProbability;


/// <summary> /// <summary>
/// Callback should set this to true when a beam is at end-of-beam. /// Callback should set this to true when a beam is at end-of-beam.


+ 5
- 5
LLama/Native/LLamaBeamsState.cs View File

@@ -9,27 +9,27 @@ namespace LLama.Native;
/// (e.g. beams[0]) as they will be removed (shifted) from all beams in all subsequent callbacks. /// (e.g. beams[0]) as they will be removed (shifted) from all beams in all subsequent callbacks.
/// </summary> /// </summary>
[StructLayout(LayoutKind.Sequential)] [StructLayout(LayoutKind.Sequential)]
public readonly struct LLamaBeamsState
public struct LLamaBeamsState
{ {
/// <summary> /// <summary>
/// The state of each individual beam /// The state of each individual beam
/// </summary> /// </summary>
private readonly unsafe LLamaBeamView* beam_views;
private unsafe LLamaBeamView* beam_views;


/// <summary> /// <summary>
/// Number of elements in beam_views /// Number of elements in beam_views
/// </summary> /// </summary>
private readonly nint n_beams;
private nint n_beams;


/// <summary> /// <summary>
/// Current max length of prefix tokens shared by all beams. /// Current max length of prefix tokens shared by all beams.
/// </summary> /// </summary>
public readonly ulong CommonPrefixLength;
public ulong CommonPrefixLength;


/// <summary> /// <summary>
/// True iff this is the last callback invocation. /// True iff this is the last callback invocation.
/// </summary> /// </summary>
public readonly bool LastCall;
public bool LastCall;


/// <summary> /// <summary>
/// The current state of each beam /// The current state of each beam


+ 3
- 3
LLama/Native/LLamaGrammarElement.cs View File

@@ -52,18 +52,18 @@ namespace LLama.Native
/// </summary> /// </summary>
[StructLayout(LayoutKind.Sequential)] [StructLayout(LayoutKind.Sequential)]
[DebuggerDisplay("{Type} {Value}")] [DebuggerDisplay("{Type} {Value}")]
public readonly struct LLamaGrammarElement
public struct LLamaGrammarElement
: IEquatable<LLamaGrammarElement> : IEquatable<LLamaGrammarElement>
{ {
/// <summary> /// <summary>
/// The type of this element /// The type of this element
/// </summary> /// </summary>
public readonly LLamaGrammarElementType Type;
public LLamaGrammarElementType Type;


/// <summary> /// <summary>
/// Unicode code point or rule ID /// Unicode code point or rule ID
/// </summary> /// </summary>
public readonly uint Value;
public uint Value;


/// <summary> /// <summary>
/// Construct a new LLamaGrammarElement /// Construct a new LLamaGrammarElement


+ 2
- 0
LLama/Native/LLamaModelQuantizeParams.cs View File

@@ -1,10 +1,12 @@
using System; using System;
using System.Runtime.InteropServices;


namespace LLama.Native namespace LLama.Native
{ {
/// <summary> /// <summary>
/// Quantizer parameters used in the native API /// Quantizer parameters used in the native API
/// </summary> /// </summary>
[StructLayout(LayoutKind.Sequential)]
public struct LLamaModelQuantizeParams public struct LLamaModelQuantizeParams
{ {
/// <summary> /// <summary>


+ 6
- 6
LLama/Native/LLamaNativeBatch.cs View File

@@ -21,32 +21,32 @@ public unsafe struct LLamaNativeBatch
/// <summary> /// <summary>
/// Either `n_tokens` of `llama_token`, or `NULL`, depending on how this batch was created /// Either `n_tokens` of `llama_token`, or `NULL`, depending on how this batch was created
/// </summary> /// </summary>
public readonly llama_token* token;
public llama_token* token;


/// <summary> /// <summary>
/// Either `n_tokens * embd * sizeof(float)` or `NULL`, depending on how this batch was created /// Either `n_tokens * embd * sizeof(float)` or `NULL`, depending on how this batch was created
/// </summary> /// </summary>
public readonly float* embd;
public float* embd;


/// <summary> /// <summary>
/// the positions of the respective token in the sequence /// the positions of the respective token in the sequence
/// </summary> /// </summary>
public readonly LLamaPos* pos;
public LLamaPos* pos;


/// <summary> /// <summary>
/// https://github.com/ggerganov/llama.cpp/blob/master/llama.h#L139 ??? /// https://github.com/ggerganov/llama.cpp/blob/master/llama.h#L139 ???
/// </summary> /// </summary>
public readonly int* n_seq_id;
public int* n_seq_id;


/// <summary> /// <summary>
/// the sequence to which the respective token belongs /// the sequence to which the respective token belongs
/// </summary> /// </summary>
public readonly LLamaSeqId** seq_id;
public LLamaSeqId** seq_id;


/// <summary> /// <summary>
/// if zero, the logits for the respective token will not be output /// if zero, the logits for the respective token will not be output
/// </summary> /// </summary>
public readonly byte* logits;
public byte* logits;


// Note from llama.cpp: // Note from llama.cpp:
// > helpers for smooth API transition - can be deprecated in the future // > helpers for smooth API transition - can be deprecated in the future


+ 15
- 3
LLama/Native/LLamaPos.cs View File

@@ -1,14 +1,26 @@
namespace LLama.Native;
using System.Runtime.InteropServices;

namespace LLama.Native;


/// <summary> /// <summary>
/// Indicates position in a sequence /// Indicates position in a sequence
/// </summary> /// </summary>
public readonly record struct LLamaPos(int Value)
[StructLayout(LayoutKind.Sequential)]
public struct LLamaPos
{ {
/// <summary> /// <summary>
/// The raw value /// The raw value
/// </summary> /// </summary>
public readonly int Value = Value;
public int Value;

/// <summary>
/// Create a new LLamaPos
/// </summary>
/// <param name="value"></param>
public LLamaPos(int value)
{
Value = value;
}


/// <summary> /// <summary>
/// Convert a LLamaPos into an integer (extract the raw value) /// Convert a LLamaPos into an integer (extract the raw value)


+ 15
- 4
LLama/Native/LLamaSeqId.cs View File

@@ -1,15 +1,26 @@
namespace LLama.Native;
using System.Runtime.InteropServices;

namespace LLama.Native;


/// <summary> /// <summary>
/// ID for a sequence in a batch /// ID for a sequence in a batch
/// </summary> /// </summary>
/// <param name="Value"></param>
public record struct LLamaSeqId(int Value)
[StructLayout(LayoutKind.Sequential)]
public struct LLamaSeqId
{ {
/// <summary> /// <summary>
/// The raw value /// The raw value
/// </summary> /// </summary>
public int Value = Value;
public int Value;

/// <summary>
/// Create a new LLamaSeqId
/// </summary>
/// <param name="value"></param>
public LLamaSeqId(int value)
{
Value = value;
}


/// <summary> /// <summary>
/// Convert a LLamaSeqId into an integer (extract the raw value) /// Convert a LLamaSeqId into an integer (extract the raw value)


+ 17
- 7
LLama/Native/LLamaTokenData.cs View File

@@ -5,24 +5,34 @@ namespace LLama.Native;
/// <summary> /// <summary>
/// A single token along with probability of this token being selected /// A single token along with probability of this token being selected
/// </summary> /// </summary>
/// <param name="id"></param>
/// <param name="logit"></param>
/// <param name="p"></param>
[StructLayout(LayoutKind.Sequential)] [StructLayout(LayoutKind.Sequential)]
public record struct LLamaTokenData(int id, float logit, float p)
public struct LLamaTokenData
{ {
/// <summary> /// <summary>
/// token id /// token id
/// </summary> /// </summary>
public int id = id;
public int id;


/// <summary> /// <summary>
/// log-odds of the token /// log-odds of the token
/// </summary> /// </summary>
public float logit = logit;
public float logit;


/// <summary> /// <summary>
/// probability of the token /// probability of the token
/// </summary> /// </summary>
public float p = p;
public float p;

/// <summary>
/// Create a new LLamaTokenData
/// </summary>
/// <param name="id"></param>
/// <param name="logit"></param>
/// <param name="p"></param>
public LLamaTokenData(int id, float logit, float p)
{
this.id = id;
this.logit = logit;
this.p = p;
}
} }

Loading…
Cancel
Save