You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

IModelParams.cs 4.9 kB

2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. using System;
  2. using System.Buffers;
  3. using System.Collections;
  4. using System.Collections.Generic;
  5. using System.Linq;
  6. using LLama.Native;
  7. namespace LLama.Abstractions
  8. {
  9. /// <summary>
  10. /// The parameters for initializing a LLama model.
  11. /// </summary>
  12. public interface IModelParams
  13. {
  14. /// <summary>
  15. /// the GPU that is used for scratch and small tensors
  16. /// </summary>
  17. int MainGpu { get; set; }
  18. /// <summary>
  19. /// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
  20. /// </summary>
  21. int GpuLayerCount { get; set; }
  22. /// <summary>
  23. /// Use mmap for faster loads (use_mmap)
  24. /// </summary>
  25. bool UseMemorymap { get; set; }
  26. /// <summary>
  27. /// Use mlock to keep model in memory (use_mlock)
  28. /// </summary>
  29. bool UseMemoryLock { get; set; }
  30. /// <summary>
  31. /// Model path (model)
  32. /// </summary>
  33. string ModelPath { get; set; }
  34. /// <summary>
  35. /// how split tensors should be distributed across GPUs
  36. /// </summary>
  37. TensorSplitsCollection TensorSplits { get; set; }
  38. /// <summary>
  39. /// Load vocab only (no weights)
  40. /// </summary>
  41. bool VocabOnly { get; set; }
  42. /// <summary>
  43. /// List of LoRA adapters to apply
  44. /// </summary>
  45. AdapterCollection LoraAdapters { get; }
  46. /// <summary>
  47. /// base model path for the lora adapter (lora_base)
  48. /// </summary>
  49. string LoraBase { get; set; }
  50. }
  51. /// <summary>
  52. /// A LoRA adapter to apply to a model
  53. /// </summary>
  54. /// <param name="Path">Path to the LoRA file</param>
  55. /// <param name="Scale">Strength of this LoRA</param>
  56. public readonly record struct LoraAdapter(string Path, float Scale);
  57. /// <summary>
  58. /// A list of LoraAdapter objects
  59. /// </summary>
  60. public sealed class AdapterCollection
  61. : List<LoraAdapter>, IEquatable<AdapterCollection>
  62. {
  63. /// <inheritdoc />
  64. public bool Equals(AdapterCollection? other)
  65. {
  66. if (other == null)
  67. return false;
  68. return this.SequenceEqual(other);
  69. }
  70. /// <inheritdoc/>
  71. public override bool Equals(object? obj)
  72. {
  73. return Equals(obj as AdapterCollection);
  74. }
  75. /// <inheritdoc/>
  76. public override int GetHashCode()
  77. {
  78. unchecked
  79. {
  80. var hash = 17;
  81. for (var i = 0; i < Count; i++)
  82. {
  83. hash += this[i].GetHashCode();
  84. hash *= 7823;
  85. }
  86. return hash;
  87. }
  88. }
  89. }
  90. /// <summary>
  91. /// A fixed size array to set the tensor splits across multiple GPUs
  92. /// </summary>
  93. public sealed class TensorSplitsCollection
  94. : IEnumerable<float>
  95. {
  96. internal readonly float[] Splits = new float[NativeApi.llama_max_devices()];
  97. /// <summary>
  98. /// The size of this array
  99. /// </summary>
  100. public int Length => Splits.Length;
  101. /// <summary>
  102. /// Get or set the proportion of work to do on the given device.
  103. /// </summary>
  104. /// <remarks>"[ 3, 2 ]" will assign 60% of the data to GPU 0 and 40% to GPU 1.</remarks>
  105. /// <param name="index"></param>
  106. /// <returns></returns>
  107. public float this[int index]
  108. {
  109. get => Splits[index];
  110. set => Splits[index] = value;
  111. }
  112. /// <summary>
  113. /// Create a new tensor splits collection, copying the given values
  114. /// </summary>
  115. /// <param name="splits"></param>
  116. /// <exception cref="ArgumentException"></exception>
  117. public TensorSplitsCollection(float[] splits)
  118. {
  119. if (splits.Length > Splits.Length)
  120. throw new ArgumentException($"Must supply at most {Splits.Length} tensor splits", nameof(splits));
  121. splits.CopyTo(Splits.AsSpan());
  122. }
  123. /// <summary>
  124. /// Create a new tensor splits collection with all values initialised to the default
  125. /// </summary>
  126. public TensorSplitsCollection()
  127. {
  128. }
  129. /// <summary>
  130. /// Set all values to zero
  131. /// </summary>
  132. public void Clear()
  133. {
  134. Array.Clear(Splits, 0, Splits.Length);
  135. }
  136. internal MemoryHandle Pin()
  137. {
  138. return Splits.AsMemory().Pin();
  139. }
  140. #region IEnumerator
  141. /// <inheritdoc />
  142. public IEnumerator<float> GetEnumerator()
  143. {
  144. return ((IEnumerable<float>)Splits).GetEnumerator();
  145. }
  146. /// <inheritdoc />
  147. IEnumerator IEnumerable.GetEnumerator()
  148. {
  149. return Splits.GetEnumerator();
  150. }
  151. #endregion
  152. }
  153. }