You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaModelQuantizeParams.cs 1.1 kB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. using System;
  2. namespace LLama.Native
  3. {
  4. /// <summary>
  5. /// Quantizer parameters used in the native API
  6. /// </summary>
  7. public struct LLamaModelQuantizeParams
  8. {
  9. /// <summary>
  10. /// number of threads to use for quantizing, if &lt;=0 will use std::thread::hardware_concurrency()
  11. /// </summary>
  12. public int nthread;
  13. /// <summary>
  14. /// quantize to this llama_ftype
  15. /// </summary>
  16. public LLamaFtype ftype;
  17. /// <summary>
  18. /// allow quantizing non-f32/f16 tensors
  19. /// </summary>
  20. public bool allow_requantize
  21. {
  22. get => Convert.ToBoolean(_allow_requantize);
  23. set => _allow_requantize = Convert.ToSByte(value);
  24. }
  25. private sbyte _allow_requantize;
  26. /// <summary>
  27. /// quantize output.weight
  28. /// </summary>
  29. public bool quantize_output_tensor
  30. {
  31. get => Convert.ToBoolean(_quantize_output_tensor);
  32. set => _quantize_output_tensor = Convert.ToSByte(value);
  33. }
  34. private sbyte _quantize_output_tensor;
  35. }
  36. }