Browse Source

Added a safe `TensorSplitsCollection` to the params which prevents incorrectly setting the `tensor_splits` collection

tags/v0.7.0
Martin Evans 2 years ago
parent
commit
6a4cd506bd
4 changed files with 46 additions and 25 deletions
  1. +1
    -1
      LLama.Web/Common/ModelOptions.cs
  2. +41
    -1
      LLama/Abstractions/IModelParams.cs
  3. +3
    -2
      LLama/Common/ModelParams.cs
  4. +1
    -21
      LLama/Extensions/IModelParamsExtensions.cs

+ 1
- 1
LLama.Web/Common/ModelOptions.cs View File

@@ -106,7 +106,7 @@ namespace LLama.Web.Common
/// <summary>
/// how split tensors should be distributed across GPUs
/// </summary>
public float[] TensorSplits { get; set; }
public TensorSplitsCollection TensorSplits { get; set; } = new();

/// <summary>
/// RoPE base frequency


+ 41
- 1
LLama/Abstractions/IModelParams.cs View File

@@ -1,6 +1,8 @@
using System;
using System.Buffers;
using System.Collections.Generic;
using System.Linq;
using LLama.Native;

namespace LLama.Abstractions
{
@@ -37,7 +39,7 @@ namespace LLama.Abstractions
/// <summary>
/// how split tensors should be distributed across GPUs
/// </summary>
float[]? TensorSplits { get; set; }
TensorSplitsCollection TensorSplits { get; set; }

/// <summary>
/// Load vocab only (no weights)
@@ -98,4 +100,42 @@ namespace LLama.Abstractions
}
}
}

/// <summary>
/// A fixed size array to set the tensor splits across multiple GPUs
/// </summary>
public sealed class TensorSplitsCollection
{
private readonly float[] _array = new float[NativeApi.llama_max_devices()];

/// <summary>
/// The size of this array
/// </summary>
public int Length => _array.Length;

/// <summary>
/// Get or set the proportion of work to do on the given device.
/// </summary>
/// <remarks>"[ 3, 2 ]" will assign 60% of the data to GPU 0 and 40% to GPU 1.</remarks>
/// <param name="index"></param>
/// <returns></returns>
public float this[int index]
{
get => _array[index];
set => _array[index] = value;
}

/// <summary>
/// Set all values to zero
/// </summary>
public void Clear()
{
Array.Clear(_array, 0, _array.Length);
}

internal MemoryHandle Pin()
{
return _array.AsMemory().Pin();
}
}
}

+ 3
- 2
LLama/Common/ModelParams.cs View File

@@ -82,9 +82,10 @@ namespace LLama.Common
public bool EmbeddingMode { get; set; }

/// <summary>
/// how split tensors should be distributed across GPUs
/// how split tensors should be distributed across GPUs.
/// </summary>
public float[]? TensorSplits { get; set; }
/// <remarks>"[ 3, 2 ]" will assign 60% of the data to GPU 0 and 40% to GPU 1.</remarks>
public TensorSplitsCollection TensorSplits { get; set; }

/// <summary>
/// RoPE base frequency


+ 1
- 21
LLama/Extensions/IModelParamsExtensions.cs View File

@@ -1,7 +1,6 @@
using System.IO;
using System;
using System.Buffers;
using System.Diagnostics;
using LLama.Abstractions;
using LLama.Native;

@@ -22,25 +21,6 @@ namespace LLama.Extensions
/// <exception cref="ArgumentException"></exception>
public static MemoryHandle ToLlamaModelParams(this IModelParams @params, out LLamaModelParams result)
{
var maxDevices = NativeApi.llama_max_devices();
var splits = @params.TensorSplits;
if (splits != null)
{
Debug.Assert(@params.TensorSplits != null);

// If the splits array is too large just throw
if (splits.Length > maxDevices)
throw new ArgumentException($"TensorSplits size must be <= NativeApi.llama_max_devices() ({maxDevices})");

// If the splits array is too small pad it up to the necessary size
if (splits.Length < maxDevices)
{
splits = new float[maxDevices];
for (var i = 0; i < @params.TensorSplits.Length; i++)
splits[i] = @params.TensorSplits[i];
}
}

result = NativeApi.llama_model_default_params();

result.main_gpu = @params.MainGpu;
@@ -49,7 +29,7 @@ namespace LLama.Extensions
result.use_mmap = @params.UseMemorymap;
result.vocab_only = @params.VocabOnly;

var pin = splits.AsMemory().Pin();
var pin = @params.TensorSplits.Pin();
unsafe
{
result.tensor_split = (float*)pin.Pointer;


Loading…
Cancel
Save