You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaFtype.cs 4.8 kB

April 2024 Binary Update (#662) * Updated binaries, using [this build](https://github.com/SciSharp/LLamaSharp/actions/runs/8654672719/job/23733195669) for llama.cpp commit `f7001ccc5aa359fcf41bba19d1c99c3d25c9bcc7`. - Added all new functions. - Moved some functions (e.g. `SafeLlamaModelHandle` specific functions) into `SafeLlamaModelHandle.cs` - Exposed tokens on `SafeLlamaModelHandle` and `LLamaWeights` through a `Tokens` property. As new special tokens are added in the future they can be added here. - Changed all token properties to return nullable tokens, to handle some models not having some tokens. - Fixed `DefaultSamplingPipeline` to handle no newline token in some models. * Moved native methods to more specific locations. - Context specific things have been moved into `SafeLLamaContextHandle.cs` and made private - they're exposed through C# properties and methods already. - Checking that GPU layer count is zero if GPU offload is not supported. - Moved methods for creating default structs (`llama_model_quantize_default_params` and `llama_context_default_params`) into relevant structs. * Removed exception if `GpuLayerCount > 0` when GPU is not supported. * - Added low level wrapper methods for new per-sequence state load/save in `SafeLLamaContextHandle` - Added high level wrapper methods (save/load with `State` object or memory mapped file) in `LLamaContext` - Moved native methods for per-sequence state load/save into `SafeLLamaContextHandle` * Added update and defrag methods for KV cache in `SafeLLamaContextHandle` * Updated submodule to `f7001ccc5aa359fcf41bba19d1c99c3d25c9bcc7` * Passing the sequence ID when saving a single sequence state
1 year ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. namespace LLama.Native
  2. {
  3. /// <summary>
  4. /// Supported model file types
  5. /// </summary>
  6. public enum LLamaFtype
  7. {
  8. /// <summary>
  9. /// All f32
  10. /// </summary>
  11. /// <remarks>Benchmark@7B: 26GB</remarks>
  12. LLAMA_FTYPE_ALL_F32 = 0,
  13. /// <summary>
  14. /// Mostly f16
  15. /// </summary>
  16. /// <remarks>Benchmark@7B: 13GB</remarks>
  17. LLAMA_FTYPE_MOSTLY_F16 = 1,
  18. /// <summary>
  19. /// Mostly 8 bit
  20. /// </summary>
  21. /// <remarks>Benchmark@7B: 6.7GB, +0.0004ppl</remarks>
  22. LLAMA_FTYPE_MOSTLY_Q8_0 = 7,
  23. /// <summary>
  24. /// Mostly 4 bit
  25. /// </summary>
  26. /// <remarks>Benchmark@7B: 3.50GB, +0.2499 ppl</remarks>
  27. LLAMA_FTYPE_MOSTLY_Q4_0 = 2,
  28. /// <summary>
  29. /// Mostly 4 bit
  30. /// </summary>
  31. /// <remarks>Benchmark@7B: 3.90GB, +0.1846 ppl</remarks>
  32. LLAMA_FTYPE_MOSTLY_Q4_1 = 3,
  33. /// <summary>
  34. /// Mostly 4 bit, tok_embeddings.weight and output.weight are f16
  35. /// </summary>
  36. LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4,
  37. /// <summary>
  38. /// Mostly 5 bit
  39. /// </summary>
  40. /// <remarks>Benchmark@7B: 4.30GB @ 7B tokens, +0.0796 ppl</remarks>
  41. LLAMA_FTYPE_MOSTLY_Q5_0 = 8,
  42. /// <summary>
  43. /// Mostly 5 bit
  44. /// </summary>
  45. /// <remarks>Benchmark@7B: 4.70GB, +0.0415 ppl</remarks>
  46. LLAMA_FTYPE_MOSTLY_Q5_1 = 9,
  47. /// <summary>
  48. /// K-Quant 2 bit
  49. /// </summary>
  50. /// <remarks>Benchmark@7B: 2.67GB @ 7N parameters, +0.8698 ppl</remarks>
  51. LLAMA_FTYPE_MOSTLY_Q2_K = 10,
  52. /// <summary>
  53. /// K-Quant 3 bit (Small)
  54. /// </summary>
  55. /// <remarks>Benchmark@7B: 2.75GB, +0.5505 ppl</remarks>
  56. LLAMA_FTYPE_MOSTLY_Q3_K_S = 11,
  57. /// <summary>
  58. /// K-Quant 3 bit (Medium)
  59. /// </summary>
  60. /// <remarks>Benchmark@7B: 3.06GB, +0.2437 ppl</remarks>
  61. LLAMA_FTYPE_MOSTLY_Q3_K_M = 12,
  62. /// <summary>
  63. /// K-Quant 3 bit (Large)
  64. /// </summary>
  65. /// <remarks>Benchmark@7B: 3.35GB, +0.1803 ppl</remarks>
  66. LLAMA_FTYPE_MOSTLY_Q3_K_L = 13,
  67. /// <summary>
  68. /// K-Quant 4 bit (Small)
  69. /// </summary>
  70. /// <remarks>Benchmark@7B: 3.56GB, +0.1149 ppl</remarks>
  71. LLAMA_FTYPE_MOSTLY_Q4_K_S = 14,
  72. /// <summary>
  73. /// K-Quant 4 bit (Medium)
  74. /// </summary>
  75. /// <remarks>Benchmark@7B: 3.80GB, +0.0535 ppl</remarks>
  76. LLAMA_FTYPE_MOSTLY_Q4_K_M = 15,
  77. /// <summary>
  78. /// K-Quant 5 bit (Small)
  79. /// </summary>
  80. /// <remarks>Benchmark@7B: 4.33GB, +0.0353 ppl</remarks>
  81. LLAMA_FTYPE_MOSTLY_Q5_K_S = 16,
  82. /// <summary>
  83. /// K-Quant 5 bit (Medium)
  84. /// </summary>
  85. /// <remarks>Benchmark@7B: 4.45GB, +0.0142 ppl</remarks>
  86. LLAMA_FTYPE_MOSTLY_Q5_K_M = 17,
  87. /// <summary>
  88. /// K-Quant 6 bit
  89. /// </summary>
  90. /// <remarks>Benchmark@7B: 5.15GB, +0.0044 ppl</remarks>
  91. LLAMA_FTYPE_MOSTLY_Q6_K = 18,
  92. /// <summary>
  93. /// except 1d tensors
  94. /// </summary>
  95. LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19,
  96. /// <summary>
  97. /// except 1d tensors
  98. /// </summary>
  99. LLAMA_FTYPE_MOSTLY_IQ2_XS = 20,
  100. /// <summary>
  101. /// except 1d tensors
  102. /// </summary>
  103. LLAMA_FTYPE_MOSTLY_Q2_K_S = 21,
  104. /// <summary>
  105. /// except 1d tensors
  106. /// </summary>
  107. LLAMA_FTYPE_MOSTLY_IQ3_K_XS = 22,
  108. /// <summary>
  109. /// except 1d tensors
  110. /// </summary>
  111. LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23,
  112. /// <summary>
  113. /// except 1d tensors
  114. /// </summary>
  115. LLAMA_FTYPE_MOSTLY_IQ1_S = 24,
  116. /// <summary>
  117. /// except 1d tensors
  118. /// </summary>
  119. LLAMA_FTYPE_MOSTLY_IQ4_NL = 25,
  120. /// <summary>
  121. /// except 1d tensors
  122. /// </summary>
  123. LLAMA_FTYPE_MOSTLY_IQ3_S = 26,
  124. /// <summary>
  125. /// except 1d tensors
  126. /// </summary>
  127. LLAMA_FTYPE_MOSTLY_IQ3_M = 27,
  128. /// <summary>
  129. /// except 1d tensors
  130. /// </summary>
  131. LLAMA_FTYPE_MOSTLY_IQ2_S = 28,
  132. /// <summary>
  133. /// except 1d tensors
  134. /// </summary>
  135. LLAMA_FTYPE_MOSTLY_IQ2_M = 29,
  136. /// <summary>
  137. /// except 1d tensors
  138. /// </summary>
  139. LLAMA_FTYPE_MOSTLY_IQ4_XS = 30,
  140. /// <summary>
  141. /// except 1d tensors
  142. /// </summary>
  143. LLAMA_FTYPE_MOSTLY_IQ1_M = 31,
  144. /// <summary>
  145. /// File type was not specified
  146. /// </summary>
  147. LLAMA_FTYPE_GUESSED = 1024
  148. }
  149. }