|
|
|
@@ -1,29 +1,109 @@ |
|
|
|
using System; |
|
|
|
using System.Collections.Generic; |
|
|
|
using System.Text; |
|
|
|
|
|
|
|
namespace LLama.Native |
|
|
|
namespace LLama.Native |
|
|
|
{ |
|
|
|
/// <summary> |
|
|
|
/// Supported model file types |
|
|
|
/// </summary> |
|
|
|
public enum LLamaFtype |
|
|
|
{ |
|
|
|
/// <summary> |
|
|
|
/// All f32 |
|
|
|
/// </summary> |
|
|
|
/// <remarks>Benchmark: 26GB @ 7B parameters</remarks> |
|
|
|
LLAMA_FTYPE_ALL_F32 = 0, |
|
|
|
LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors |
|
|
|
LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors |
|
|
|
LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors |
|
|
|
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16 |
|
|
|
// LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed |
|
|
|
// LLAMA_FTYPE_MOSTLY_Q4_3 (6) support has been removed |
|
|
|
LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors |
|
|
|
LLAMA_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors |
|
|
|
LLAMA_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors |
|
|
|
LLAMA_FTYPE_MOSTLY_Q2_K = 10,// except 1d tensors |
|
|
|
LLAMA_FTYPE_MOSTLY_Q3_K_S = 11,// except 1d tensors |
|
|
|
LLAMA_FTYPE_MOSTLY_Q3_K_M = 12,// except 1d tensors |
|
|
|
LLAMA_FTYPE_MOSTLY_Q3_K_L = 13,// except 1d tensors |
|
|
|
LLAMA_FTYPE_MOSTLY_Q4_K_S = 14,// except 1d tensors |
|
|
|
LLAMA_FTYPE_MOSTLY_Q4_K_M = 15,// except 1d tensors |
|
|
|
LLAMA_FTYPE_MOSTLY_Q5_K_S = 16,// except 1d tensors |
|
|
|
LLAMA_FTYPE_MOSTLY_Q5_K_M = 17,// except 1d tensors |
|
|
|
LLAMA_FTYPE_MOSTLY_Q6_K = 18,// except 1d tensors |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// Mostly f16 |
|
|
|
/// </summary> |
|
|
|
/// <remarks>Benchmark: 13GB @ 7B parameters</remarks> |
|
|
|
LLAMA_FTYPE_MOSTLY_F16 = 1, |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// Mostly 8 bit |
|
|
|
/// </summary> |
|
|
|
/// <remarks>Benchmark: 6.7GB @ 7B parameters, +0.0004ppl</remarks> |
|
|
|
LLAMA_FTYPE_MOSTLY_Q8_0 = 7, |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// Mostly 4 bit |
|
|
|
/// </summary> |
|
|
|
/// <remarks>Benchmark: 3.50GB @ 7B parameters, +0.2499 ppl</remarks> |
|
|
|
LLAMA_FTYPE_MOSTLY_Q4_0 = 2, |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// Mostly 4 bit |
|
|
|
/// </summary> |
|
|
|
/// <remarks>Benchmark: 3.90GB @ 7B parameters, +0.1846 ppl</remarks> |
|
|
|
LLAMA_FTYPE_MOSTLY_Q4_1 = 3, |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// Mostly 4 bit, tok_embeddings.weight and output.weight are f16 |
|
|
|
/// </summary> |
|
|
|
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// Mostly 5 bit |
|
|
|
/// </summary> |
|
|
|
/// <remarks>Benchmark: 4.30GB @ 7B tokens, +0.0796 ppl</remarks> |
|
|
|
LLAMA_FTYPE_MOSTLY_Q5_0 = 8, |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// Mostly 5 bit |
|
|
|
/// </summary> |
|
|
|
/// <remarks>Benchmark: 4.70GB @ 7B parameters, +0.0415 ppl</remarks> |
|
|
|
LLAMA_FTYPE_MOSTLY_Q5_1 = 9, |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// K-Quant 2 bit |
|
|
|
/// </summary> |
|
|
|
/// <remarks>Benchmark: 2.67GB @ 7N parameters, +0.8698 ppl</remarks> |
|
|
|
LLAMA_FTYPE_MOSTLY_Q2_K = 10, |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// K-Quant 3 bit (Small) |
|
|
|
/// </summary> |
|
|
|
/// <remarks>Benchmark: 2.75GB @ 7B parameters, +0.5505 ppl</remarks> |
|
|
|
LLAMA_FTYPE_MOSTLY_Q3_K_S = 11, |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// K-Quant 3 bit (Medium) |
|
|
|
/// </summary> |
|
|
|
/// <remarks>Benchmark: 3.06GB @ 7B parameters, +0.2437 ppl</remarks> |
|
|
|
LLAMA_FTYPE_MOSTLY_Q3_K_M = 12, |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// K-Quant 3 bit (Large) |
|
|
|
/// </summary> |
|
|
|
/// <remarks>Benchmark: 3.35GB @ 7B parameters, +0.1803 ppl</remarks> |
|
|
|
LLAMA_FTYPE_MOSTLY_Q3_K_L = 13, |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// K-Quant 4 bit (Small) |
|
|
|
/// </summary> |
|
|
|
/// <remarks>Benchmark: 3.56GB @ 7B parameters, +0.1149 ppl</remarks> |
|
|
|
LLAMA_FTYPE_MOSTLY_Q4_K_S = 14, |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// K-Quant 4 bit (Medium) |
|
|
|
/// </summary> |
|
|
|
/// <remarks>Benchmark: 3.80GB @ 7B parameters, +0.0535 ppl</remarks> |
|
|
|
LLAMA_FTYPE_MOSTLY_Q4_K_M = 15, |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// K-Quant 5 bit (Small) |
|
|
|
/// </summary> |
|
|
|
/// <remarks>Benchmark: 4.33GB @ 7B parameters, +0.0353 ppl</remarks> |
|
|
|
LLAMA_FTYPE_MOSTLY_Q5_K_S = 16, |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// K-Quant 5 bit (Medium) |
|
|
|
/// </summary> |
|
|
|
/// <remarks>Benchmark: 4.45GB @ 7B parameters, +0.0142 ppl</remarks> |
|
|
|
LLAMA_FTYPE_MOSTLY_Q5_K_M = 17, |
|
|
|
|
|
|
|
/// <summary> |
|
|
|
/// K-Quant 6 bit |
|
|
|
/// </summary> |
|
|
|
/// <remarks>Benchmark: 5.15GB @ 7B parameters, +0.0044 ppl</remarks> |
|
|
|
LLAMA_FTYPE_MOSTLY_Q6_K = 18, |
|
|
|
} |
|
|
|
} |