diff --git a/.github/download_models.py b/.github/download_models.py
new file mode 100644
index 00000000..8bebf983
--- /dev/null
+++ b/.github/download_models.py
@@ -0,0 +1,20 @@
+from huggingface_hub import hf_hub_download
+import argparse
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--model-list', type=str, required=True)
+ parser.add_argument('--model-dir', type=str, required=True)
+ parser.add_argument('--endpoint', type=str, default='https://huggingface.co')
+ args = parser.parse_args()
+
+ with open(args.model_list, 'r') as f:
+ model_id, filename = f.readline().split(',')
+
+ hf_hub_download(
+ model_id=model_id,
+ filename=filename,
+ local_dir=args.model_dir,
+ local_dir_use_symlinks=False,
+ endpoint=args.endpoint
+ )
\ No newline at end of file
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index a1e3a7f9..5a82ee34 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -20,6 +20,7 @@ jobs:
include:
- build: cuda11
image: nvidia/cuda:11.7.1-devel-ubuntu22.04
+ model_dir: /llamasharp_ci/models_benchmark
# - build: cuda12
# image: nvidia/cuda:12.1.1-runtime-ubuntu22.04
@@ -51,11 +52,13 @@ jobs:
dotnet-version: |
8.0.x
- - name: Cache Packages
- uses: actions/cache@v4
+ - name: Prepare models
+ uses: actions/setup-python@v5
with:
- key: "benchmark_models"
- path: LLama.Benchmark/Models
+ python-version: '3.10'
+ run: |
+ pip install huggingface_hub
+ python3 LLama.Benchmark/prepare_models.py --model-dir ${{ matrix.model_dir }} --model-list LLama.Benchmark/Assets/models.txt --endpoint https://hf-mirror.com
- name: Clear package cache
run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear
@@ -66,7 +69,7 @@ jobs:
dotnet build LLama/LLamaSharp.csproj -c release --no-restore
dotnet build LLama.Benchmark/LLama.Benchmark.csproj -c release --no-restore
- name: Run benchmark test
- run: dotnet run LLama.Benchmark/LLama.Benchmark.csproj -c release
+ run: dotnet run --project LLama.Benchmark/LLama.Benchmark.csproj -c release ${{ matrix.model_dir }}
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v3
diff --git a/LLama.Benchmark/Assets/extreme-ironing-taxi-610x427.jpg b/LLama.Benchmark/Assets/extreme-ironing-taxi-610x427.jpg
new file mode 100644
index 00000000..078fde7c
Binary files /dev/null and b/LLama.Benchmark/Assets/extreme-ironing-taxi-610x427.jpg differ
diff --git a/LLama.Benchmark/Assets/models.txt b/LLama.Benchmark/Assets/models.txt
new file mode 100644
index 00000000..3a9d6b51
--- /dev/null
+++ b/LLama.Benchmark/Assets/models.txt
@@ -0,0 +1 @@
+TheBloke/Llama-2-7b-Chat-GGUF,llama-2-7b-chat.Q3_K_S.gguf
\ No newline at end of file
diff --git a/LLama.Benchmark/Constants.cs b/LLama.Benchmark/Constants.cs
index c39ca9aa..76dbb62e 100644
--- a/LLama.Benchmark/Constants.cs
+++ b/LLama.Benchmark/Constants.cs
@@ -9,13 +9,15 @@ namespace LLama.Benchmark
{
internal static class Constants
{
- public static readonly string Generative7BModelPath = "Models/llama-2-7b-chat.Q3_K_S.gguf";
- public static readonly string EmbeddingModelPath = "Models/all-MiniLM-L12-v2.Q8_0.gguf";
+ public static string ModelDir { get; set; } = "";
- public static readonly string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";
- public static readonly string LLavaMmpPath = "Models/mmproj-model-f16.gguf";
- public static readonly string LLavaImage = "Models/extreme-ironing-taxi-610x427.jpg";
+ public static string Generative7BModelPath => Path.Combine(ModelDir, "llama-2-7b-chat.Q3_K_S.gguf");
+ public static string EmbeddingModelPath => Path.Combine(ModelDir, "all-MiniLM-L12-v2.Q8_0.gguf");
- public static readonly string TextCompletionPromptsFilePath = "Assets/TextCompletionPrompts.txt";
+ public static string LLavaModelPath => Path.Combine(ModelDir, "llava-v1.6-mistral-7b.Q3_K_XS.gguf");
+ public static string LLavaMmpPath => Path.Combine(ModelDir, "mmproj-model-f16.gguf");
+ public static string LLavaImage => "Assets/extreme-ironing-taxi-610x427.jpg";
+
+ public static string TextCompletionPromptsFilePath = "Assets/TextCompletionPrompts.txt";
}
}
diff --git a/LLama.Benchmark/LLama.Benchmark.csproj b/LLama.Benchmark/LLama.Benchmark.csproj
index 6e420533..52512c69 100644
--- a/LLama.Benchmark/LLama.Benchmark.csproj
+++ b/LLama.Benchmark/LLama.Benchmark.csproj
@@ -13,24 +13,15 @@
-
-
-
-
-
-
-
-
-
+
PreserveNewest
-
-
+
PreserveNewest
diff --git a/LLama.Benchmark/Program.cs b/LLama.Benchmark/Program.cs
index 3a472f96..7120a1b8 100644
--- a/LLama.Benchmark/Program.cs
+++ b/LLama.Benchmark/Program.cs
@@ -1,4 +1,5 @@
using BenchmarkDotNet.Running;
+using System.Diagnostics;
namespace LLama.Benchmark
{
@@ -6,6 +7,13 @@ namespace LLama.Benchmark
{
public static void Main(string[] args)
{
+ if (args.Length == 1)
+ {
+ var modelDir = args[0];
+ Constants.ModelDir = modelDir;
+ Console.WriteLine($"#################### model dir: {modelDir}");
+ }
+
var summary = BenchmarkRunner.Run(typeof(Program).Assembly);
Console.WriteLine(summary);
}