You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaModelQuantizeParams.cs 1.1 kB

1234567891011121314151617181920212223242526272829303132333435363738
  1. namespace LLama.Native
  2. {
  3. /// <summary>
  4. /// Quantizer parameters used in the native API
  5. /// </summary>
  6. public struct LLamaModelQuantizeParams
  7. {
  8. /// <summary>
  9. /// number of threads to use for quantizing, if &lt;=0 will use std::thread::hardware_concurrency()
  10. /// </summary>
  11. public int nthread;
  12. /// <summary>
  13. /// quantize to this llama_ftype
  14. /// </summary>
  15. public LLamaFtype ftype;
  16. /// <summary>
  17. /// allow quantizing non-f32/f16 tensors
  18. /// </summary>
  19. public bool allow_requantize
  20. {
  21. get => Utils.SignedByteToBool(_allow_requantize);
  22. set => _allow_requantize = Utils.BoolToSignedByte(value);
  23. }
  24. private sbyte _allow_requantize;
  25. /// <summary>
  26. /// quantize output.weight
  27. /// </summary>
  28. public bool quantize_output_tensor
  29. {
  30. get => Utils.SignedByteToBool(_quantize_output_tensor);
  31. set => _quantize_output_tensor = Utils.BoolToSignedByte(value);
  32. }
  33. private sbyte _quantize_output_tensor;
  34. }
  35. }