diff --git a/.github/download_models.py b/.github/download_models.py new file mode 100644 index 00000000..8bebf983 --- /dev/null +++ b/.github/download_models.py @@ -0,0 +1,20 @@ +from huggingface_hub import hf_hub_download +import argparse + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--model-list', type=str, required=True) + parser.add_argument('--model-dir', type=str, required=True) + parser.add_argument('--endpoint', type=str, default='https://huggingface.co') + args = parser.parse_args() + + with open(args.model_list, 'r') as f: + model_id, filename = f.readline().split(',') + + hf_hub_download( + model_id=model_id, + filename=filename, + local_dir=args.model_dir, + local_dir_use_symlinks=False, + endpoint=args.endpoint + ) \ No newline at end of file diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index a1e3a7f9..5a82ee34 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -20,6 +20,7 @@ jobs: include: - build: cuda11 image: nvidia/cuda:11.7.1-devel-ubuntu22.04 + model_dir: /llamasharp_ci/models_benchmark # - build: cuda12 # image: nvidia/cuda:12.1.1-runtime-ubuntu22.04 @@ -51,11 +52,13 @@ jobs: dotnet-version: | 8.0.x - - name: Cache Packages - uses: actions/cache@v4 + - name: Prepare models + uses: actions/setup-python@v5 with: - key: "benchmark_models" - path: LLama.Benchmark/Models + python-version: '3.10' + run: | + pip install huggingface_hub + python3 LLama.Benchmark/prepare_models.py --model-dir ${{ matrix.model_dir }} --model-list LLama.Benchmark/Assets/models.txt --endpoint https://hf-mirror.com - name: Clear package cache run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear @@ -66,7 +69,7 @@ jobs: dotnet build LLama/LLamaSharp.csproj -c release --no-restore dotnet build LLama.Benchmark/LLama.Benchmark.csproj -c release --no-restore - name: Run benchmark test - run: dotnet run LLama.Benchmark/LLama.Benchmark.csproj -c release + run: dotnet run --project LLama.Benchmark/LLama.Benchmark.csproj -c release ${{ matrix.model_dir }} - name: Upload artifacts if: always() uses: actions/upload-artifact@v3 diff --git a/LLama.Benchmark/Assets/extreme-ironing-taxi-610x427.jpg b/LLama.Benchmark/Assets/extreme-ironing-taxi-610x427.jpg new file mode 100644 index 00000000..078fde7c Binary files /dev/null and b/LLama.Benchmark/Assets/extreme-ironing-taxi-610x427.jpg differ diff --git a/LLama.Benchmark/Assets/models.txt b/LLama.Benchmark/Assets/models.txt new file mode 100644 index 00000000..3a9d6b51 --- /dev/null +++ b/LLama.Benchmark/Assets/models.txt @@ -0,0 +1 @@ +TheBloke/Llama-2-7b-Chat-GGUF,llama-2-7b-chat.Q3_K_S.gguf \ No newline at end of file diff --git a/LLama.Benchmark/Constants.cs b/LLama.Benchmark/Constants.cs index c39ca9aa..76dbb62e 100644 --- a/LLama.Benchmark/Constants.cs +++ b/LLama.Benchmark/Constants.cs @@ -9,13 +9,15 @@ namespace LLama.Benchmark { internal static class Constants { - public static readonly string Generative7BModelPath = "Models/llama-2-7b-chat.Q3_K_S.gguf"; - public static readonly string EmbeddingModelPath = "Models/all-MiniLM-L12-v2.Q8_0.gguf"; + public static string ModelDir { get; set; } = ""; - public static readonly string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf"; - public static readonly string LLavaMmpPath = "Models/mmproj-model-f16.gguf"; - public static readonly string LLavaImage = "Models/extreme-ironing-taxi-610x427.jpg"; + public static string Generative7BModelPath => Path.Combine(ModelDir, "llama-2-7b-chat.Q3_K_S.gguf"); + public static string EmbeddingModelPath => Path.Combine(ModelDir, "all-MiniLM-L12-v2.Q8_0.gguf"); - public static readonly string TextCompletionPromptsFilePath = "Assets/TextCompletionPrompts.txt"; + public static string LLavaModelPath => Path.Combine(ModelDir, "llava-v1.6-mistral-7b.Q3_K_XS.gguf"); + public static string LLavaMmpPath => Path.Combine(ModelDir, "mmproj-model-f16.gguf"); + public static string LLavaImage => "Assets/extreme-ironing-taxi-610x427.jpg"; + + public static string TextCompletionPromptsFilePath = "Assets/TextCompletionPrompts.txt"; } } diff --git a/LLama.Benchmark/LLama.Benchmark.csproj b/LLama.Benchmark/LLama.Benchmark.csproj index 6e420533..52512c69 100644 --- a/LLama.Benchmark/LLama.Benchmark.csproj +++ b/LLama.Benchmark/LLama.Benchmark.csproj @@ -13,24 +13,15 @@ - - - - - - - - - + PreserveNewest - - + PreserveNewest diff --git a/LLama.Benchmark/Program.cs b/LLama.Benchmark/Program.cs index 3a472f96..7120a1b8 100644 --- a/LLama.Benchmark/Program.cs +++ b/LLama.Benchmark/Program.cs @@ -1,4 +1,5 @@ using BenchmarkDotNet.Running; +using System.Diagnostics; namespace LLama.Benchmark { @@ -6,6 +7,13 @@ namespace LLama.Benchmark { public static void Main(string[] args) { + if (args.Length == 1) + { + var modelDir = args[0]; + Constants.ModelDir = modelDir; + Console.WriteLine($"#################### model dir: {modelDir}"); + } + var summary = BenchmarkRunner.Run(typeof(Program).Assembly); Console.WriteLine(summary); }