You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaFtype.cs 3.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. namespace LLama.Native
  2. {
  3. /// <summary>
  4. /// Supported model file types
  5. /// </summary>
  6. public enum LLamaFtype
  7. {
  8. /// <summary>
  9. /// All f32
  10. /// </summary>
  11. /// <remarks>Benchmark@7B: 26GB</remarks>
  12. LLAMA_FTYPE_ALL_F32 = 0,
  13. /// <summary>
  14. /// Mostly f16
  15. /// </summary>
  16. /// <remarks>Benchmark@7B: 13GB</remarks>
  17. LLAMA_FTYPE_MOSTLY_F16 = 1,
  18. /// <summary>
  19. /// Mostly 8 bit
  20. /// </summary>
  21. /// <remarks>Benchmark@7B: 6.7GB, +0.0004ppl</remarks>
  22. LLAMA_FTYPE_MOSTLY_Q8_0 = 7,
  23. /// <summary>
  24. /// Mostly 4 bit
  25. /// </summary>
  26. /// <remarks>Benchmark@7B: 3.50GB, +0.2499 ppl</remarks>
  27. LLAMA_FTYPE_MOSTLY_Q4_0 = 2,
  28. /// <summary>
  29. /// Mostly 4 bit
  30. /// </summary>
  31. /// <remarks>Benchmark@7B: 3.90GB, +0.1846 ppl</remarks>
  32. LLAMA_FTYPE_MOSTLY_Q4_1 = 3,
  33. /// <summary>
  34. /// Mostly 4 bit, tok_embeddings.weight and output.weight are f16
  35. /// </summary>
  36. LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4,
  37. /// <summary>
  38. /// Mostly 5 bit
  39. /// </summary>
  40. /// <remarks>Benchmark@7B: 4.30GB @ 7B tokens, +0.0796 ppl</remarks>
  41. LLAMA_FTYPE_MOSTLY_Q5_0 = 8,
  42. /// <summary>
  43. /// Mostly 5 bit
  44. /// </summary>
  45. /// <remarks>Benchmark@7B: 4.70GB, +0.0415 ppl</remarks>
  46. LLAMA_FTYPE_MOSTLY_Q5_1 = 9,
  47. /// <summary>
  48. /// K-Quant 2 bit
  49. /// </summary>
  50. /// <remarks>Benchmark@7B: 2.67GB @ 7N parameters, +0.8698 ppl</remarks>
  51. LLAMA_FTYPE_MOSTLY_Q2_K = 10,
  52. /// <summary>
  53. /// K-Quant 3 bit (Small)
  54. /// </summary>
  55. /// <remarks>Benchmark@7B: 2.75GB, +0.5505 ppl</remarks>
  56. LLAMA_FTYPE_MOSTLY_Q3_K_S = 11,
  57. /// <summary>
  58. /// K-Quant 3 bit (Medium)
  59. /// </summary>
  60. /// <remarks>Benchmark@7B: 3.06GB, +0.2437 ppl</remarks>
  61. LLAMA_FTYPE_MOSTLY_Q3_K_M = 12,
  62. /// <summary>
  63. /// K-Quant 3 bit (Large)
  64. /// </summary>
  65. /// <remarks>Benchmark@7B: 3.35GB, +0.1803 ppl</remarks>
  66. LLAMA_FTYPE_MOSTLY_Q3_K_L = 13,
  67. /// <summary>
  68. /// K-Quant 4 bit (Small)
  69. /// </summary>
  70. /// <remarks>Benchmark@7B: 3.56GB, +0.1149 ppl</remarks>
  71. LLAMA_FTYPE_MOSTLY_Q4_K_S = 14,
  72. /// <summary>
  73. /// K-Quant 4 bit (Medium)
  74. /// </summary>
  75. /// <remarks>Benchmark@7B: 3.80GB, +0.0535 ppl</remarks>
  76. LLAMA_FTYPE_MOSTLY_Q4_K_M = 15,
  77. /// <summary>
  78. /// K-Quant 5 bit (Small)
  79. /// </summary>
  80. /// <remarks>Benchmark@7B: 4.33GB, +0.0353 ppl</remarks>
  81. LLAMA_FTYPE_MOSTLY_Q5_K_S = 16,
  82. /// <summary>
  83. /// K-Quant 5 bit (Medium)
  84. /// </summary>
  85. /// <remarks>Benchmark@7B: 4.45GB, +0.0142 ppl</remarks>
  86. LLAMA_FTYPE_MOSTLY_Q5_K_M = 17,
  87. /// <summary>
  88. /// K-Quant 6 bit
  89. /// </summary>
  90. /// <remarks>Benchmark@7B: 5.15GB, +0.0044 ppl</remarks>
  91. LLAMA_FTYPE_MOSTLY_Q6_K = 18,
  92. /// <summary>
  93. /// except 1d tensors
  94. /// </summary>
  95. LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19,
  96. /// <summary>
  97. /// except 1d tensors
  98. /// </summary>
  99. LLAMA_FTYPE_MOSTLY_IQ2_XS = 20,
  100. /// <summary>
  101. /// except 1d tensors
  102. /// </summary>
  103. LLAMA_FTYPE_MOSTLY_Q2_K_S = 21,
  104. /// <summary>
  105. /// except 1d tensors
  106. /// </summary>
  107. LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22,
  108. /// <summary>
  109. /// except 1d tensors
  110. /// </summary>
  111. LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23,
  112. /// <summary>
  113. /// File type was not specified
  114. /// </summary>
  115. LLAMA_FTYPE_GUESSED = 1024
  116. }
  117. }