using System; using System.Collections.Generic; using System.Runtime.InteropServices; using System.Text; namespace LLama.Native { public struct LLamaModelQuantizeParams { /// /// number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency() /// public int nthread; /// /// quantize to this llama_ftype /// public LLamaFtype ftype; /// /// allow quantizing non-f32/f16 tensors /// [MarshalAs(UnmanagedType.I1)] public bool allow_requantize; /// /// quantize output.weight /// [MarshalAs(UnmanagedType.I1)] public bool quantize_output_tensor; } }