Change the way to cache models.

1 year ago · 2a1e15040b
--- a/.github/download_models.py
+++ b/.github/download_models.py
@@ -0,0 +1,20 @@
 from huggingface_hub import hf_hub_download
 import argparse

 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--model-list', type=str, required=True)
    parser.add_argument('--model-dir', type=str, required=True)
    parser.add_argument('--endpoint', type=str, default='https://huggingface.co')
    args = parser.parse_args()
    
    with open(args.model_list, 'r') as f:
        model_id, filename = f.readline().split(',')
    
    hf_hub_download(
        model_id=model_id, 
        filename=filename, 
        local_dir=args.model_dir, 
        local_dir_use_symlinks=False, 
        endpoint=args.endpoint
    )
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -20,6 +20,7 @@ jobs:
        include:
          - build: cuda11
            image: nvidia/cuda:11.7.1-devel-ubuntu22.04
            model_dir: /llamasharp_ci/models_benchmark
          # - build: cuda12
          #   image: nvidia/cuda:12.1.1-runtime-ubuntu22.04

@@ -51,11 +52,13 @@ jobs:
        dotnet-version: |
          8.0.x

    - name: Cache Packages
      uses: actions/cache@v4
    - name: Prepare models
      uses: actions/setup-python@v5
      with:
        key: "benchmark_models"
        path: LLama.Benchmark/Models
        python-version: '3.10' 
      run: | 
        pip install huggingface_hub
        python3 LLama.Benchmark/prepare_models.py --model-dir ${{ matrix.model_dir }} --model-list LLama.Benchmark/Assets/models.txt --endpoint https://hf-mirror.com

    - name: Clear package cache
      run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear
@@ -66,7 +69,7 @@ jobs:
        dotnet build LLama/LLamaSharp.csproj -c release --no-restore
        dotnet build LLama.Benchmark/LLama.Benchmark.csproj -c release --no-restore
    - name: Run benchmark test
      run: dotnet run LLama.Benchmark/LLama.Benchmark.csproj -c release
      run: dotnet run --project LLama.Benchmark/LLama.Benchmark.csproj -c release ${{ matrix.model_dir }}
    - name: Upload artifacts
      if: always()
      uses: actions/upload-artifact@v3
--- a/LLama.Benchmark/Assets/extreme-ironing-taxi-610x427.jpg
+++ b/LLama.Benchmark/Assets/extreme-ironing-taxi-610x427.jpg
--- a/LLama.Benchmark/Assets/models.txt
+++ b/LLama.Benchmark/Assets/models.txt
@@ -0,0 +1 @@
 TheBloke/Llama-2-7b-Chat-GGUF,llama-2-7b-chat.Q3_K_S.gguf
--- a/LLama.Benchmark/Constants.cs
+++ b/LLama.Benchmark/Constants.cs
@@ -9,13 +9,15 @@ namespace LLama.Benchmark
 {
    internal static class Constants
    {
        public static readonly string Generative7BModelPath = "Models/llama-2-7b-chat.Q3_K_S.gguf";
        public static readonly string EmbeddingModelPath = "Models/all-MiniLM-L12-v2.Q8_0.gguf";
        public static string ModelDir { get; set; } = "";

        public static readonly string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";
        public static readonly string LLavaMmpPath = "Models/mmproj-model-f16.gguf";
        public static readonly string LLavaImage = "Models/extreme-ironing-taxi-610x427.jpg";
        public static string Generative7BModelPath => Path.Combine(ModelDir, "llama-2-7b-chat.Q3_K_S.gguf");
        public static string EmbeddingModelPath => Path.Combine(ModelDir, "all-MiniLM-L12-v2.Q8_0.gguf");

        public static readonly string TextCompletionPromptsFilePath = "Assets/TextCompletionPrompts.txt";
        public static string LLavaModelPath => Path.Combine(ModelDir, "llava-v1.6-mistral-7b.Q3_K_XS.gguf");
        public static string LLavaMmpPath => Path.Combine(ModelDir, "mmproj-model-f16.gguf");
        public static string LLavaImage => "Assets/extreme-ironing-taxi-610x427.jpg";

        public static string TextCompletionPromptsFilePath = "Assets/TextCompletionPrompts.txt";
    }
 }
--- a/LLama.Benchmark/LLama.Benchmark.csproj
+++ b/LLama.Benchmark/LLama.Benchmark.csproj
@@ -13,24 +13,15 @@
    <PackageReference Include="BenchmarkDotNet" Version="0.13.12" />
  </ItemGroup>

    <Target Name="DownloadContentFiles" BeforeTargets="Build">
        <DownloadFile SourceUrl="https://hf-mirror.com/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf" SkipUnchangedFiles="true"></DownloadFile>
    </Target>

    <ItemGroup>
        <Folder Include="Models\" />
    </ItemGroup>

    <ItemGroup>
      <ProjectReference Include="..\LLama\LLamaSharp.csproj" />
    </ItemGroup>

    <ItemGroup>
        <None Update="Models\llama-2-7b-chat.Q3_K_S.gguf">
        <None Update="Assets\TextCompletionPrompts.txt">
            <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
        </None>

        <None Update="Assets\TextCompletionPrompts.txt">
        <None Update="Models\extreme-ironing-taxi-610x427.jpg">
            <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
        </None>
    </ItemGroup>
--- a/LLama.Benchmark/Program.cs
+++ b/LLama.Benchmark/Program.cs
@@ -1,4 +1,5 @@
 using BenchmarkDotNet.Running;
 using System.Diagnostics;

 namespace LLama.Benchmark
 {
@@ -6,6 +7,13 @@ namespace LLama.Benchmark
    {
        public static void Main(string[] args)
        {
            if (args.Length == 1)
            {
                var modelDir = args[0];
                Constants.ModelDir = modelDir;
                Console.WriteLine($"#################### model dir: {modelDir}");
            }

            var summary = BenchmarkRunner.Run(typeof(Program).Assembly);
            Console.WriteLine(summary);
        }