You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

GGMLType.cs 2.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. namespace LLama.Native;
  2. /// <summary>
  3. /// Possible GGML quantisation types
  4. /// </summary>
  5. public enum GGMLType
  6. {
  7. /// <summary>
  8. /// Full 32 bit float
  9. /// </summary>
  10. GGML_TYPE_F32 = 0,
  11. /// <summary>
  12. /// 16 bit float
  13. /// </summary>
  14. GGML_TYPE_F16 = 1,
  15. /// <summary>
  16. /// 4 bit float
  17. /// </summary>
  18. GGML_TYPE_Q4_0 = 2,
  19. /// <summary>
  20. /// 4 bit float
  21. /// </summary>
  22. GGML_TYPE_Q4_1 = 3,
  23. // GGML_TYPE_Q4_2 = 4, support has been removed
  24. // GGML_TYPE_Q4_3 (5) support has been removed
  25. /// <summary>
  26. /// 5 bit float
  27. /// </summary>
  28. GGML_TYPE_Q5_0 = 6,
  29. /// <summary>
  30. /// 5 bit float
  31. /// </summary>
  32. GGML_TYPE_Q5_1 = 7,
  33. /// <summary>
  34. /// 8 bit float
  35. /// </summary>
  36. GGML_TYPE_Q8_0 = 8,
  37. /// <summary>
  38. /// 8 bit float
  39. /// </summary>
  40. GGML_TYPE_Q8_1 = 9,
  41. // k-quantizations
  42. /// <summary>
  43. /// "type-1" 2-bit quantization in super-blocks containing 16 blocks, each block having 16 weight.
  44. /// Block scales and mins are quantized with 4 bits. This ends up effectively using 2.5625 bits per weight (bpw)
  45. /// </summary>
  46. GGML_TYPE_Q2_K = 10,
  47. /// <summary>
  48. /// "type-0" 3-bit quantization in super-blocks containing 16 blocks, each block having 16 weights.
  49. /// Scales are quantized with 6 bits. This end up using 3.4375 bpw.
  50. /// </summary>
  51. GGML_TYPE_Q3_K = 11,
  52. /// <summary>
  53. /// "type-1" 4-bit quantization in super-blocks containing 8 blocks, each block having 32 weights.
  54. /// Scales and mins are quantized with 6 bits. This ends up using 4.5 bpw.
  55. /// </summary>
  56. GGML_TYPE_Q4_K = 12,
  57. /// <summary>
  58. /// "type-1" 5-bit quantization. Same super-block structure as GGML_TYPE_Q4_K resulting in 5.5 bpw
  59. /// </summary>
  60. GGML_TYPE_Q5_K = 13,
  61. /// <summary>
  62. /// "type-0" 6-bit quantization. Super-blocks with 16 blocks, each block having 16 weights.
  63. /// Scales are quantized with 8 bits. This ends up using 6.5625 bpw
  64. /// </summary>
  65. GGML_TYPE_Q6_K = 14,
  66. /// <summary>
  67. /// "type-0" 8-bit quantization. Only used for quantizing intermediate results.
  68. /// The difference to the existing Q8_0 is that the block size is 256. All 2-6 bit dot products are implemented for this quantization type.
  69. /// </summary>
  70. GGML_TYPE_Q8_K = 15,
  71. /// <summary>
  72. /// Integer, 8 bit
  73. /// </summary>
  74. GGML_TYPE_I8 = 16,
  75. /// <summary>
  76. /// Integer, 16 bit
  77. /// </summary>
  78. GGML_TYPE_I16 = 17,
  79. /// <summary>
  80. /// Integer, 32 bit
  81. /// </summary>
  82. GGML_TYPE_I32 = 18,
  83. /// <summary>
  84. /// The value of this entry is the count of the number of possible quant types.
  85. /// </summary>
  86. GGML_TYPE_COUNT,
  87. }