using System; using System.Collections.Generic; using System.Runtime.InteropServices; using System.Text; namespace LLama.Native { internal partial class NativeApi { /// /// Returns 0 on success /// /// /// /// /// how many threads to use. If <=0, will use std::thread::hardware_concurrency(), else the number given /// not great API - very likely to change /// Returns 0 on success [DllImport(libraryName)] public static extern int llama_model_quantize(string fname_inp, string fname_out, LLamaFtype ftype, int nthread); } }