using System;
using System.Runtime.InteropServices;
namespace LLama.Native
{
///
/// Quantizer parameters used in the native API
///
/// llama_model_quantize_params
[StructLayout(LayoutKind.Sequential)]
public struct LLamaModelQuantizeParams
{
///
/// number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
///
public int nthread;
///
/// quantize to this llama_ftype
///
public LLamaFtype ftype;
///
/// output tensor type
///
public GGMLType output_tensor_type;
///
/// itoken embeddings tensor type
///
public GGMLType token_embedding_type;
///
/// allow quantizing non-f32/f16 tensors
///
public bool allow_requantize
{
get => Convert.ToBoolean(_allow_requantize);
set => _allow_requantize = Convert.ToSByte(value);
}
private sbyte _allow_requantize;
///
/// quantize output.weight
///
public bool quantize_output_tensor
{
get => Convert.ToBoolean(_quantize_output_tensor);
set => _quantize_output_tensor = Convert.ToSByte(value);
}
private sbyte _quantize_output_tensor;
///
/// only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
///
public bool only_copy
{
get => Convert.ToBoolean(_only_copy);
set => _only_copy = Convert.ToSByte(value);
}
private sbyte _only_copy;
///
/// quantize all tensors to the default type
///
public bool pure
{
get => Convert.ToBoolean(_pure);
set => _pure = Convert.ToSByte(value);
}
private sbyte _pure;
///
/// pointer to importance matrix data
///
public IntPtr imatrix;
///
/// pointer to vector containing overrides
///
public IntPtr kv_overrides;
///
/// Create a LLamaModelQuantizeParams with default values
///
///
public static LLamaModelQuantizeParams Default()
{
return llama_model_quantize_default_params();
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
static extern LLamaModelQuantizeParams llama_model_quantize_default_params();
}
}
}