namespace LLama.Native { /// /// Quantizer parameters used in the native API /// public struct LLamaModelQuantizeParams { /// /// number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency() /// public int nthread; /// /// quantize to this llama_ftype /// public LLamaFtype ftype; /// /// allow quantizing non-f32/f16 tensors /// public bool allow_requantize { get => Utils.SignedByteToBool(_allow_requantize); set => _allow_requantize = Utils.BoolToSignedByte(value); } private sbyte _allow_requantize; /// /// quantize output.weight /// public bool quantize_output_tensor { get => Utils.SignedByteToBool(_quantize_output_tensor); set => _quantize_output_tensor = Utils.BoolToSignedByte(value); } private sbyte _quantize_output_tensor; } }