using System;
using System.Runtime.InteropServices;
namespace LLama.Native
{
///
/// Quantizer parameters used in the native API
///
/// llama_model_quantize_params
[StructLayout(LayoutKind.Sequential)]
public struct LLamaModelQuantizeParams
{
///
/// number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
///
public int nthread;
///
/// quantize to this llama_ftype
///
public LLamaFtype ftype;
///
/// allow quantizing non-f32/f16 tensors
///
public bool allow_requantize
{
get => Convert.ToBoolean(_allow_requantize);
set => _allow_requantize = Convert.ToSByte(value);
}
private sbyte _allow_requantize;
///
/// quantize output.weight
///
public bool quantize_output_tensor
{
get => Convert.ToBoolean(_quantize_output_tensor);
set => _quantize_output_tensor = Convert.ToSByte(value);
}
private sbyte _quantize_output_tensor;
///
/// only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
///
public bool only_copy
{
get => Convert.ToBoolean(_only_copy);
set => _only_copy = Convert.ToSByte(value);
}
private sbyte _only_copy;
///
/// disable k-quant mixtures and quantize all tensors to the same type
///
public bool pure
{
get => Convert.ToBoolean(_pure);
set => _pure = Convert.ToSByte(value);
}
private sbyte _pure;
///
/// pointer to importance matrix data
///
public IntPtr imatrix;
}
}