namespace LLama.Native
{
///
/// Supported model file types
///
public enum LLamaFtype
{
///
/// All f32
///
/// Benchmark@7B: 26GB
LLAMA_FTYPE_ALL_F32 = 0,
///
/// Mostly f16
///
/// Benchmark@7B: 13GB
LLAMA_FTYPE_MOSTLY_F16 = 1,
///
/// Mostly 8 bit
///
/// Benchmark@7B: 6.7GB, +0.0004ppl
LLAMA_FTYPE_MOSTLY_Q8_0 = 7,
///
/// Mostly 4 bit
///
/// Benchmark@7B: 3.50GB, +0.2499 ppl
LLAMA_FTYPE_MOSTLY_Q4_0 = 2,
///
/// Mostly 4 bit
///
/// Benchmark@7B: 3.90GB, +0.1846 ppl
LLAMA_FTYPE_MOSTLY_Q4_1 = 3,
///
/// Mostly 4 bit, tok_embeddings.weight and output.weight are f16
///
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4,
///
/// Mostly 5 bit
///
/// Benchmark@7B: 4.30GB @ 7B tokens, +0.0796 ppl
LLAMA_FTYPE_MOSTLY_Q5_0 = 8,
///
/// Mostly 5 bit
///
/// Benchmark@7B: 4.70GB, +0.0415 ppl
LLAMA_FTYPE_MOSTLY_Q5_1 = 9,
///
/// K-Quant 2 bit
///
/// Benchmark@7B: 2.67GB @ 7N parameters, +0.8698 ppl
LLAMA_FTYPE_MOSTLY_Q2_K = 10,
///
/// K-Quant 3 bit (Small)
///
/// Benchmark@7B: 2.75GB, +0.5505 ppl
LLAMA_FTYPE_MOSTLY_Q3_K_S = 11,
///
/// K-Quant 3 bit (Medium)
///
/// Benchmark@7B: 3.06GB, +0.2437 ppl
LLAMA_FTYPE_MOSTLY_Q3_K_M = 12,
///
/// K-Quant 3 bit (Large)
///
/// Benchmark@7B: 3.35GB, +0.1803 ppl
LLAMA_FTYPE_MOSTLY_Q3_K_L = 13,
///
/// K-Quant 4 bit (Small)
///
/// Benchmark@7B: 3.56GB, +0.1149 ppl
LLAMA_FTYPE_MOSTLY_Q4_K_S = 14,
///
/// K-Quant 4 bit (Medium)
///
/// Benchmark@7B: 3.80GB, +0.0535 ppl
LLAMA_FTYPE_MOSTLY_Q4_K_M = 15,
///
/// K-Quant 5 bit (Small)
///
/// Benchmark@7B: 4.33GB, +0.0353 ppl
LLAMA_FTYPE_MOSTLY_Q5_K_S = 16,
///
/// K-Quant 5 bit (Medium)
///
/// Benchmark@7B: 4.45GB, +0.0142 ppl
LLAMA_FTYPE_MOSTLY_Q5_K_M = 17,
///
/// K-Quant 6 bit
///
/// Benchmark@7B: 5.15GB, +0.0044 ppl
LLAMA_FTYPE_MOSTLY_Q6_K = 18,
///
/// except 1d tensors
///
LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19,
///
/// except 1d tensors
///
LLAMA_FTYPE_MOSTLY_IQ2_XS = 20,
///
/// except 1d tensors
///
LLAMA_FTYPE_MOSTLY_Q2_K_S = 21,
///
/// except 1d tensors
///
LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22,
///
/// except 1d tensors
///
LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23,
///
/// File type was not specified
///
LLAMA_FTYPE_GUESSED = 1024
}
}