using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;
namespace LLama.Native
{
public struct LLamaModelQuantizeParams
{
///
/// number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
///
public int nthread;
///
/// quantize to this llama_ftype
///
public LLamaFtype ftype;
///
/// allow quantizing non-f32/f16 tensors
///
[MarshalAs(UnmanagedType.I1)]
public bool allow_requantize;
///
/// quantize output.weight
///
[MarshalAs(UnmanagedType.I1)]
public bool quantize_output_tensor;
}
}