You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaQuantizer.cs 4.5 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. using LLama.Native;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Linq;
  5. using System.Text;
  6. namespace LLama
  7. {
  8. /// <summary>
  9. /// The quantizer to quantize the model.
  10. /// </summary>
  11. public static class LLamaQuantizer
  12. {
  13. /// <summary>
  14. /// Quantize the model.
  15. /// </summary>
  16. /// <param name="srcFileName">The model file to be quantized.</param>
  17. /// <param name="dstFilename">The path to save the quantized model.</param>
  18. /// <param name="ftype">The type of quantization.</param>
  19. /// <param name="nthread">Thread to be used during the quantization. By default it's the physical core number.</param>
  20. /// <param name="allowRequantize"></param>
  21. /// <param name="quantizeOutputTensor"></param>
  22. /// <returns>Whether the quantization is successful.</returns>
  23. /// <exception cref="ArgumentException"></exception>
  24. public static unsafe bool Quantize(string srcFileName, string dstFilename, LLamaFtype ftype, int nthread = -1, bool allowRequantize = true,
  25. bool quantizeOutputTensor = false)
  26. {
  27. if (!ValidateFtype(ftype))
  28. {
  29. throw new ArgumentException($"The type {Enum.GetName(typeof(LLamaFtype), ftype)} is not a valid type " +
  30. $"to perform quantization.");
  31. }
  32. var quantizeParams = NativeApi.llama_model_quantize_default_params();
  33. quantizeParams.ftype = ftype;
  34. quantizeParams.nthread = nthread;
  35. quantizeParams.allow_requantize = allowRequantize;
  36. quantizeParams.quantize_output_tensor = quantizeOutputTensor;
  37. LLamaModelQuantizeParams* p = &quantizeParams;
  38. return NativeApi.llama_model_quantize(srcFileName, dstFilename, p) == 0;
  39. }
  40. /// <summary>
  41. /// Quantize the model.
  42. /// </summary>
  43. /// <param name="srcFileName">The model file to be quantized.</param>
  44. /// <param name="dstFilename">The path to save the quantized model.</param>
  45. /// <param name="ftype">The type of quantization.</param>
  46. /// <param name="nthread">Thread to be used during the quantization. By default it's the physical core number.</param>
  47. /// <param name="allowRequantize"></param>
  48. /// <param name="quantizeOutputTensor"></param>
  49. /// <returns>Whether the quantization is successful.</returns>
  50. /// <exception cref="ArgumentException"></exception>
  51. public static bool Quantize(string srcFileName, string dstFilename, string ftype, int nthread = -1, bool allowRequantize = true,
  52. bool quantizeOutputTensor = false)
  53. {
  54. return Quantize(srcFileName, dstFilename, StringToFtype(ftype), nthread, allowRequantize, quantizeOutputTensor);
  55. }
  56. private static bool ValidateFtype(string ftype)
  57. {
  58. return new string[] { "q4_0", "q4_1", "q5_0", "q5_1", "q8_0" }.Contains(ftype);
  59. }
  60. private static bool ValidateFtype(LLamaFtype ftype)
  61. {
  62. return ftype is LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_0 or LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_1
  63. or LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_0 or LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_1 or LLamaFtype.LLAMA_FTYPE_MOSTLY_Q8_0;
  64. }
  65. private static string FtypeToString(LLamaFtype ftype)
  66. {
  67. return ftype switch
  68. {
  69. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_0 => "q4_0",
  70. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_1 => "q4_1",
  71. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_0 => "q5_0",
  72. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_1 => "q5_1",
  73. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q8_0 => "q8_0",
  74. _ => throw new ArgumentException($"The type {Enum.GetName(typeof(LLamaFtype), ftype)} is not a valid type " +
  75. $"to perform quantization.")
  76. };
  77. }
  78. private static LLamaFtype StringToFtype(string str)
  79. {
  80. return str switch
  81. {
  82. "q4_0" => LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_0,
  83. "q4_1" => LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_1,
  84. "q5_0" => LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_0,
  85. "q5_1" => LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_1,
  86. "q8_0" => LLamaFtype.LLAMA_FTYPE_MOSTLY_Q8_0,
  87. _ => throw new ArgumentException($"Invalid ftype {str} to quantize.")
  88. };
  89. }
  90. }
  91. }