namespace LLama.Native { /// /// Supported model file types /// public enum LLamaFtype { /// /// All f32 /// /// Benchmark@7B: 26GB LLAMA_FTYPE_ALL_F32 = 0, /// /// Mostly f16 /// /// Benchmark@7B: 13GB LLAMA_FTYPE_MOSTLY_F16 = 1, /// /// Mostly 8 bit /// /// Benchmark@7B: 6.7GB, +0.0004ppl LLAMA_FTYPE_MOSTLY_Q8_0 = 7, /// /// Mostly 4 bit /// /// Benchmark@7B: 3.50GB, +0.2499 ppl LLAMA_FTYPE_MOSTLY_Q4_0 = 2, /// /// Mostly 4 bit /// /// Benchmark@7B: 3.90GB, +0.1846 ppl LLAMA_FTYPE_MOSTLY_Q4_1 = 3, /// /// Mostly 4 bit, tok_embeddings.weight and output.weight are f16 /// LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, /// /// Mostly 5 bit /// /// Benchmark@7B: 4.30GB @ 7B tokens, +0.0796 ppl LLAMA_FTYPE_MOSTLY_Q5_0 = 8, /// /// Mostly 5 bit /// /// Benchmark@7B: 4.70GB, +0.0415 ppl LLAMA_FTYPE_MOSTLY_Q5_1 = 9, /// /// K-Quant 2 bit /// /// Benchmark@7B: 2.67GB @ 7N parameters, +0.8698 ppl LLAMA_FTYPE_MOSTLY_Q2_K = 10, /// /// K-Quant 3 bit (Small) /// /// Benchmark@7B: 2.75GB, +0.5505 ppl LLAMA_FTYPE_MOSTLY_Q3_K_S = 11, /// /// K-Quant 3 bit (Medium) /// /// Benchmark@7B: 3.06GB, +0.2437 ppl LLAMA_FTYPE_MOSTLY_Q3_K_M = 12, /// /// K-Quant 3 bit (Large) /// /// Benchmark@7B: 3.35GB, +0.1803 ppl LLAMA_FTYPE_MOSTLY_Q3_K_L = 13, /// /// K-Quant 4 bit (Small) /// /// Benchmark@7B: 3.56GB, +0.1149 ppl LLAMA_FTYPE_MOSTLY_Q4_K_S = 14, /// /// K-Quant 4 bit (Medium) /// /// Benchmark@7B: 3.80GB, +0.0535 ppl LLAMA_FTYPE_MOSTLY_Q4_K_M = 15, /// /// K-Quant 5 bit (Small) /// /// Benchmark@7B: 4.33GB, +0.0353 ppl LLAMA_FTYPE_MOSTLY_Q5_K_S = 16, /// /// K-Quant 5 bit (Medium) /// /// Benchmark@7B: 4.45GB, +0.0142 ppl LLAMA_FTYPE_MOSTLY_Q5_K_M = 17, /// /// K-Quant 6 bit /// /// Benchmark@7B: 5.15GB, +0.0044 ppl LLAMA_FTYPE_MOSTLY_Q6_K = 18, /// /// except 1d tensors /// LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, /// /// except 1d tensors /// LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, /// /// except 1d tensors /// LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, /// /// except 1d tensors /// LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22, /// /// except 1d tensors /// LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, /// /// File type was not specified /// LLAMA_FTYPE_GUESSED = 1024 } }