You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaContextParams.cs 3.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. using System;
  2. using System.Runtime.InteropServices;
  3. namespace LLama.Native
  4. {
  5. /// <summary>
  6. /// Called by llama.cpp with a progress value between 0 and 1
  7. /// </summary>
  8. /// <param name="progress"></param>
  9. /// <param name="ctx"></param>
  10. /// <remarks>llama_progress_callback</remarks>
  11. public delegate bool LlamaProgressCallback(float progress, IntPtr ctx);
  12. /// <summary>
  13. /// A C# representation of the llama.cpp `llama_context_params` struct
  14. /// </summary>
  15. [StructLayout(LayoutKind.Sequential)]
  16. public struct LLamaContextParams
  17. {
  18. /// <summary>
  19. /// RNG seed, -1 for random
  20. /// </summary>
  21. public uint seed;
  22. /// <summary>
  23. /// text context, 0 = from model
  24. /// </summary>
  25. public uint n_ctx;
  26. /// <summary>
  27. /// prompt processing batch size
  28. /// </summary>
  29. public uint n_batch;
  30. /// <summary>
  31. /// number of threads to use for generation
  32. /// </summary>
  33. public uint n_threads;
  34. /// <summary>
  35. /// number of threads to use for batch processing
  36. /// </summary>
  37. public uint n_threads_batch;
  38. /// <summary>
  39. /// RoPE scaling type, from `enum llama_rope_scaling_type`
  40. /// </summary>
  41. public RopeScalingType rope_scaling_type;
  42. /// <summary>
  43. /// RoPE base frequency, 0 = from model
  44. /// </summary>
  45. public float rope_freq_base;
  46. /// <summary>
  47. /// RoPE frequency scaling factor, 0 = from model
  48. /// </summary>
  49. public float rope_freq_scale;
  50. /// <summary>
  51. /// YaRN extrapolation mix factor, negative = from model
  52. /// </summary>
  53. public float yarn_ext_factor;
  54. /// <summary>
  55. /// YaRN magnitude scaling factor
  56. /// </summary>
  57. public float yarn_attn_factor;
  58. /// <summary>
  59. /// YaRN low correction dim
  60. /// </summary>
  61. public float yarn_beta_fast;
  62. /// <summary>
  63. /// YaRN high correction dim
  64. /// </summary>
  65. public float yarn_beta_slow;
  66. /// <summary>
  67. /// YaRN original context size
  68. /// </summary>
  69. public uint yarn_orig_ctx;
  70. /// <summary>
  71. /// ggml_backend_sched_eval_callback
  72. /// </summary>
  73. public IntPtr cb_eval;
  74. /// <summary>
  75. /// User data passed into cb_eval
  76. /// </summary>
  77. public IntPtr cb_eval_user_data;
  78. /// <summary>
  79. /// data type for K cache
  80. /// </summary>
  81. public GGMLType type_k;
  82. /// <summary>
  83. /// data type for V cache
  84. /// </summary>
  85. public GGMLType type_v;
  86. /// <summary>
  87. /// Deprecated!
  88. /// </summary>
  89. private sbyte _mul_mat_q;
  90. /// <summary>
  91. /// Deprecated!
  92. /// </summary>
  93. private sbyte _logits_all;
  94. /// <summary>
  95. /// embedding mode only
  96. /// </summary>
  97. public bool embedding
  98. {
  99. readonly get => Convert.ToBoolean(_embedding);
  100. set => _embedding = Convert.ToSByte(value);
  101. }
  102. private sbyte _embedding;
  103. /// <summary>
  104. /// whether to offload the KQV ops (including the KV cache) to GPU
  105. /// </summary>
  106. public bool offload_kqv
  107. {
  108. readonly get => Convert.ToBoolean(_offload_kqv);
  109. set => _offload_kqv = Convert.ToSByte(value);
  110. }
  111. private sbyte _offload_kqv;
  112. }
  113. }