You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

Quantizer.cs 3.7 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. using LLama.Native;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Linq;
  5. using System.Text;
  6. namespace LLama
  7. {
  8. public class Quantizer
  9. {
  10. /// <summary>
  11. /// Quantize the model.
  12. /// </summary>
  13. /// <param name="srcFileName">The model file to be quantized.</param>
  14. /// <param name="dstFilename">The path to save the quantized model.</param>
  15. /// <param name="ftype">The type of quantization.</param>
  16. /// <param name="nthread">Thread to be used during the quantization. By default it's the physical core number.</param>
  17. /// <returns>Whether the quantization is successful.</returns>
  18. /// <exception cref="ArgumentException"></exception>
  19. public static bool Quantize(string srcFileName, string dstFilename, LLamaFtype ftype, int nthread = -1)
  20. {
  21. if (!ValidateFtype(ftype))
  22. {
  23. throw new ArgumentException($"The type {Enum.GetName(typeof(LLamaFtype), ftype)} is not a valid type " +
  24. $"to perform quantization.");
  25. }
  26. return NativeApi.llama_model_quantize(srcFileName, dstFilename, ftype, nthread) == 0;
  27. }
  28. /// <summary>
  29. /// Quantize the model.
  30. /// </summary>
  31. /// <param name="srcFileName">The model file to be quantized.</param>
  32. /// <param name="dstFilename">The path to save the quantized model.</param>
  33. /// <param name="ftype">The type of quantization.</param>
  34. /// <param name="nthread">Thread to be used during the quantization. By default it's the physical core number.</param>
  35. /// <returns>Whether the quantization is successful.</returns>
  36. /// <exception cref="ArgumentException"></exception>
  37. public static bool Quantize(string srcFileName, string dstFilename, string ftype, int nthread = -1)
  38. {
  39. return Quantize(srcFileName, dstFilename, StringToFtype(ftype), nthread);
  40. }
  41. private static bool ValidateFtype(string ftype)
  42. {
  43. return new string[] { "q4_0", "q4_1", "q4_2", "q5_0", "q5_1", "q8_0" }.Contains(ftype);
  44. }
  45. private static bool ValidateFtype(LLamaFtype ftype)
  46. {
  47. return ftype is LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_0 or LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_1 or LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_2
  48. or LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_0 or LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_1 or LLamaFtype.LLAMA_FTYPE_MOSTLY_Q8_0;
  49. }
  50. private static string FtypeToString(LLamaFtype ftype)
  51. {
  52. return ftype switch
  53. {
  54. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_0 => "q4_0",
  55. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_1 => "q4_1",
  56. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_2 => "q4_2",
  57. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_0 => "q5_0",
  58. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_1 => "q5_1",
  59. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q8_0 => "q8_0",
  60. _ => throw new ArgumentException($"The type {Enum.GetName(typeof(LLamaFtype), ftype)} is not a valid type " +
  61. $"to perform quantization.")
  62. };
  63. }
  64. private static LLamaFtype StringToFtype(string str)
  65. {
  66. return str switch
  67. {
  68. "q4_0" => LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_0,
  69. "q4_1" => LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_1,
  70. "q4_2" => LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_2,
  71. "q5_0" => LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_0,
  72. "q5_1" => LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_1,
  73. "q8_0" => LLamaFtype.LLAMA_FTYPE_MOSTLY_Q8_0,
  74. };
  75. }
  76. }
  77. }

C#/.NET上易用的LLM高性能推理框架,支持LLaMA和LLaVA系列模型。