Namespace: LLama.Native
Quantizer parameters used in the native API
public struct LLamaModelQuantizeParams
Inheritance Object → ValueType → LLamaModelQuantizeParams
Remarks:
llama_model_quantize_params
number of threads to use for quantizing, if <=0 will use std:🧵:hardware_concurrency()
public int nthread;
quantize to this llama_ftype
public LLamaFtype ftype;
pointer to importance matrix data
public IntPtr imatrix;
allow quantizing non-f32/f16 tensors
public bool allow_requantize { get; set; }
quantize output.weight
public bool quantize_output_tensor { get; set; }
only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
public bool only_copy { get; set; }
disable k-quant mixtures and quantize all tensors to the same type
public bool pure { get; set; }