You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

Quantizer.cs 1.8 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. using LLama.Native;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Linq;
  5. using System.Text;
  6. namespace LLama
  7. {
  8. public class Quantizer
  9. {
  10. public static bool Quantize(string srcFileName, string dstFilename, LLamaFtype ftype, int nthread = 0, bool printInfo = true)
  11. {
  12. return Quantize(srcFileName, dstFilename, FtypeToString(ftype), nthread, printInfo);
  13. }
  14. public static bool Quantize(string srcFileName, string dstFilename, string ftype, int nthread = 0, bool printInfo = true)
  15. {
  16. if (!ValidateFtype(ftype))
  17. {
  18. throw new ArgumentException($"The type {Enum.GetName(typeof(LLamaFtype), ftype)} is not a valid type " +
  19. $"to perform quantization.");
  20. }
  21. return NativeApi.ggml_custom_quantize(srcFileName, dstFilename, ftype, nthread, printInfo);
  22. }
  23. private static bool ValidateFtype(string ftype)
  24. {
  25. return new string[] { "q4_0", "q4_1", "q4_2", "q5_0", "q5_1", "q8_0" }.Contains(ftype);
  26. }
  27. private static string FtypeToString(LLamaFtype ftype)
  28. {
  29. return ftype switch
  30. {
  31. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_0 => "q4_0",
  32. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_1 => "q4_1",
  33. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_2 => "q4_2",
  34. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_0 => "q5_0",
  35. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_1 => "q5_1",
  36. LLamaFtype.LLAMA_FTYPE_MOSTLY_Q8_0 => "q8_0",
  37. _ => throw new ArgumentException($"The type {Enum.GetName(typeof(LLamaFtype), ftype)} is not a valid type " +
  38. $"to perform quantization.")
  39. };
  40. }
  41. }
  42. }

C#/.NET上易用的LLM高性能推理框架,支持LLaMA和LLaVA系列模型。