You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaContextParams.cs 4.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Runtime.InteropServices;
  4. using System.Text;
  5. namespace LLama.Native
  6. {
  7. public delegate void LlamaProgressCallback(float progress, IntPtr ctx);
  8. [StructLayout(LayoutKind.Sequential)]
  9. public struct LLamaContextParams
  10. {
  11. /// <summary>
  12. /// RNG seed, -1 for random
  13. /// </summary>
  14. public int seed;
  15. /// <summary>
  16. /// text context
  17. /// </summary>
  18. public int n_ctx;
  19. /// <summary>
  20. /// prompt processing batch size
  21. /// </summary>
  22. public int n_batch;
  23. /// <summary>
  24. /// grouped-query attention (TEMP - will be moved to model hparams)
  25. /// </summary>
  26. public int n_gqa;
  27. /// <summary>
  28. /// rms norm epsilon (TEMP - will be moved to model hparams)
  29. /// </summary>
  30. public float rms_norm_eps;
  31. /// <summary>
  32. /// number of layers to store in VRAM
  33. /// </summary>
  34. public int n_gpu_layers;
  35. /// <summary>
  36. /// the GPU that is used for scratch and small tensors
  37. /// </summary>
  38. public int main_gpu;
  39. /// <summary>
  40. /// how to split layers across multiple GPUs
  41. /// </summary>
  42. public nint tensor_split;
  43. /// <summary>
  44. /// ref: https://github.com/ggerganov/llama.cpp/pull/2054
  45. /// RoPE base frequency
  46. /// </summary>
  47. public float rope_freq_base;
  48. /// <summary>
  49. /// ref: https://github.com/ggerganov/llama.cpp/pull/2054
  50. /// RoPE frequency scaling factor
  51. /// </summary>
  52. public float rope_freq_scale;
  53. /// <summary>
  54. /// called with a progress value between 0 and 1, pass NULL to disable
  55. /// </summary>
  56. public IntPtr progress_callback;
  57. /// <summary>
  58. /// context pointer passed to the progress callback
  59. /// </summary>
  60. public IntPtr progress_callback_user_data;
  61. /// <summary>
  62. /// if true, reduce VRAM usage at the cost of performance
  63. /// </summary>
  64. public bool low_vram
  65. {
  66. get => Utils.SignedByteToBool(_low_vram);
  67. set => _low_vram = Utils.BoolToSignedByte(value);
  68. }
  69. private sbyte _low_vram;
  70. /// <summary>
  71. /// if true, use experimental mul_mat_q kernels
  72. /// </summary>
  73. public bool mul_mat_q
  74. {
  75. get => Utils.SignedByteToBool(_mul_mat_q);
  76. set => _mul_mat_q = Utils.BoolToSignedByte(value);
  77. }
  78. private sbyte _mul_mat_q;
  79. /// <summary>
  80. /// use fp16 for KV cache
  81. /// </summary>
  82. public bool f16_kv
  83. {
  84. get => Utils.SignedByteToBool(_f16_kv);
  85. set => _f16_kv = Utils.BoolToSignedByte(value);
  86. }
  87. private sbyte _f16_kv;
  88. /// <summary>
  89. /// the llama_eval() call computes all logits, not just the last one
  90. /// </summary>
  91. public bool logits_all
  92. {
  93. get => Utils.SignedByteToBool(_logits_all);
  94. set => _logits_all = Utils.BoolToSignedByte(value);
  95. }
  96. private sbyte _logits_all;
  97. /// <summary>
  98. /// only load the vocabulary, no weights
  99. /// </summary>
  100. public bool vocab_only
  101. {
  102. get => Utils.SignedByteToBool(_vocab_only);
  103. set => _vocab_only = Utils.BoolToSignedByte(value);
  104. }
  105. private sbyte _vocab_only;
  106. /// <summary>
  107. /// use mmap if possible
  108. /// </summary>
  109. public bool use_mmap
  110. {
  111. get => Utils.SignedByteToBool(_use_mmap);
  112. set => _use_mmap = Utils.BoolToSignedByte(value);
  113. }
  114. private sbyte _use_mmap;
  115. /// <summary>
  116. /// force system to keep model in RAM
  117. /// </summary>
  118. public bool use_mlock
  119. {
  120. get => Utils.SignedByteToBool(_use_mlock);
  121. set => _use_mlock = Utils.BoolToSignedByte(value);
  122. }
  123. private sbyte _use_mlock;
  124. /// <summary>
  125. /// embedding mode only
  126. /// </summary>
  127. public bool embedding
  128. {
  129. get => Utils.SignedByteToBool(_embedding);
  130. set => _embedding = Utils.BoolToSignedByte(value);
  131. }
  132. private sbyte _embedding;
  133. }
  134. }