You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaContextParams.cs 4.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. using System;
  2. using System.Runtime.InteropServices;
  3. namespace LLama.Native
  4. {
  5. /// <summary>
  6. /// Called by llama.cpp with a progress value between 0 and 1
  7. /// </summary>
  8. /// <param name="progress"></param>
  9. /// <param name="ctx"></param>
  10. public delegate void LlamaProgressCallback(float progress, IntPtr ctx);
  11. /// <summary>
  12. /// A C# representation of the llama.cpp `llama_context_params` struct
  13. /// </summary>
  14. [StructLayout(LayoutKind.Sequential)]
  15. public struct LLamaContextParams
  16. {
  17. /// <summary>
  18. /// RNG seed, -1 for random
  19. /// </summary>
  20. public int seed;
  21. /// <summary>
  22. /// text context
  23. /// </summary>
  24. public int n_ctx;
  25. /// <summary>
  26. /// prompt processing batch size
  27. /// </summary>
  28. public int n_batch;
  29. /// <summary>
  30. /// grouped-query attention (TEMP - will be moved to model hparams)
  31. /// </summary>
  32. public int n_gqa;
  33. /// <summary>
  34. /// rms norm epsilon (TEMP - will be moved to model hparams)
  35. /// </summary>
  36. public float rms_norm_eps;
  37. /// <summary>
  38. /// number of layers to store in VRAM
  39. /// </summary>
  40. public int n_gpu_layers;
  41. /// <summary>
  42. /// the GPU that is used for scratch and small tensors
  43. /// </summary>
  44. public int main_gpu;
  45. /// <summary>
  46. /// how to split layers across multiple GPUs
  47. /// </summary>
  48. public nint tensor_split;
  49. /// <summary>
  50. /// ref: https://github.com/ggerganov/llama.cpp/pull/2054
  51. /// RoPE base frequency
  52. /// </summary>
  53. public float rope_freq_base;
  54. /// <summary>
  55. /// ref: https://github.com/ggerganov/llama.cpp/pull/2054
  56. /// RoPE frequency scaling factor
  57. /// </summary>
  58. public float rope_freq_scale;
  59. /// <summary>
  60. /// called with a progress value between 0 and 1, pass NULL to disable
  61. /// </summary>
  62. public IntPtr progress_callback;
  63. /// <summary>
  64. /// context pointer passed to the progress callback
  65. /// </summary>
  66. public IntPtr progress_callback_user_data;
  67. /// <summary>
  68. /// if true, reduce VRAM usage at the cost of performance
  69. /// </summary>
  70. public bool low_vram
  71. {
  72. get => Convert.ToBoolean(_low_vram);
  73. set => _low_vram = Convert.ToSByte(value);
  74. }
  75. private sbyte _low_vram;
  76. /// <summary>
  77. /// if true, use experimental mul_mat_q kernels
  78. /// </summary>
  79. public bool mul_mat_q
  80. {
  81. get => Convert.ToBoolean(_mul_mat_q);
  82. set => _mul_mat_q = Convert.ToSByte(value);
  83. }
  84. private sbyte _mul_mat_q;
  85. /// <summary>
  86. /// use fp16 for KV cache
  87. /// </summary>
  88. public bool f16_kv
  89. {
  90. get => Convert.ToBoolean(_f16_kv);
  91. set => _f16_kv = Convert.ToSByte(value);
  92. }
  93. private sbyte _f16_kv;
  94. /// <summary>
  95. /// the llama_eval() call computes all logits, not just the last one
  96. /// </summary>
  97. public bool logits_all
  98. {
  99. get => Convert.ToBoolean(_logits_all);
  100. set => _logits_all = Convert.ToSByte(value);
  101. }
  102. private sbyte _logits_all;
  103. /// <summary>
  104. /// only load the vocabulary, no weights
  105. /// </summary>
  106. public bool vocab_only
  107. {
  108. get => Convert.ToBoolean(_vocab_only);
  109. set => _vocab_only = Convert.ToSByte(value);
  110. }
  111. private sbyte _vocab_only;
  112. /// <summary>
  113. /// use mmap if possible
  114. /// </summary>
  115. public bool use_mmap
  116. {
  117. get => Convert.ToBoolean(_use_mmap);
  118. set => _use_mmap = Convert.ToSByte(value);
  119. }
  120. private sbyte _use_mmap;
  121. /// <summary>
  122. /// force system to keep model in RAM
  123. /// </summary>
  124. public bool use_mlock
  125. {
  126. get => Convert.ToBoolean(_use_mlock);
  127. set => _use_mlock = Convert.ToSByte(value);
  128. }
  129. private sbyte _use_mlock;
  130. /// <summary>
  131. /// embedding mode only
  132. /// </summary>
  133. public bool embedding
  134. {
  135. get => Convert.ToBoolean(_embedding);
  136. set => _embedding = Convert.ToSByte(value);
  137. }
  138. private sbyte _embedding;
  139. }
  140. }