You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLamaModelQuantizeParams.cs 848 B

1234567891011121314151617181920212223242526272829303132
  1. using System.Runtime.InteropServices;
  2. namespace LLama.Native
  3. {
  4. /// <summary>
  5. /// Quantizer parameters used in the native API
  6. /// </summary>
  7. public struct LLamaModelQuantizeParams
  8. {
  9. /// <summary>
  10. /// number of threads to use for quantizing, if &lt;=0 will use std::thread::hardware_concurrency()
  11. /// </summary>
  12. public int nthread;
  13. /// <summary>
  14. /// quantize to this llama_ftype
  15. /// </summary>
  16. public LLamaFtype ftype;
  17. /// <summary>
  18. /// allow quantizing non-f32/f16 tensors
  19. /// </summary>
  20. [MarshalAs(UnmanagedType.I1)]
  21. public bool allow_requantize;
  22. /// <summary>
  23. /// quantize output.weight
  24. /// </summary>
  25. [MarshalAs(UnmanagedType.I1)]
  26. public bool quantize_output_tensor;
  27. }
  28. }