Browse Source

Change the way to cache models.

pull/720/head
Rinne 1 year ago
parent
commit
2a1e15040b
No known key found for this signature in database GPG Key ID: E86D01E1809BD23E
7 changed files with 47 additions and 22 deletions
  1. +20
    -0
      .github/download_models.py
  2. +8
    -5
      .github/workflows/benchmark.yml
  3. BIN
      LLama.Benchmark/Assets/extreme-ironing-taxi-610x427.jpg
  4. +1
    -0
      LLama.Benchmark/Assets/models.txt
  5. +8
    -6
      LLama.Benchmark/Constants.cs
  6. +2
    -11
      LLama.Benchmark/LLama.Benchmark.csproj
  7. +8
    -0
      LLama.Benchmark/Program.cs

+ 20
- 0
.github/download_models.py View File

@@ -0,0 +1,20 @@
from huggingface_hub import hf_hub_download
import argparse

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--model-list', type=str, required=True)
parser.add_argument('--model-dir', type=str, required=True)
parser.add_argument('--endpoint', type=str, default='https://huggingface.co')
args = parser.parse_args()
with open(args.model_list, 'r') as f:
model_id, filename = f.readline().split(',')
hf_hub_download(
model_id=model_id,
filename=filename,
local_dir=args.model_dir,
local_dir_use_symlinks=False,
endpoint=args.endpoint
)

+ 8
- 5
.github/workflows/benchmark.yml View File

@@ -20,6 +20,7 @@ jobs:
include:
- build: cuda11
image: nvidia/cuda:11.7.1-devel-ubuntu22.04
model_dir: /llamasharp_ci/models_benchmark
# - build: cuda12
# image: nvidia/cuda:12.1.1-runtime-ubuntu22.04

@@ -51,11 +52,13 @@ jobs:
dotnet-version: |
8.0.x

- name: Cache Packages
uses: actions/cache@v4
- name: Prepare models
uses: actions/setup-python@v5
with:
key: "benchmark_models"
path: LLama.Benchmark/Models
python-version: '3.10'
run: |
pip install huggingface_hub
python3 LLama.Benchmark/prepare_models.py --model-dir ${{ matrix.model_dir }} --model-list LLama.Benchmark/Assets/models.txt --endpoint https://hf-mirror.com

- name: Clear package cache
run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear
@@ -66,7 +69,7 @@ jobs:
dotnet build LLama/LLamaSharp.csproj -c release --no-restore
dotnet build LLama.Benchmark/LLama.Benchmark.csproj -c release --no-restore
- name: Run benchmark test
run: dotnet run LLama.Benchmark/LLama.Benchmark.csproj -c release
run: dotnet run --project LLama.Benchmark/LLama.Benchmark.csproj -c release ${{ matrix.model_dir }}
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v3


BIN
LLama.Benchmark/Assets/extreme-ironing-taxi-610x427.jpg View File

Before After
Width: 610  |  Height: 427  |  Size: 93 kB

+ 1
- 0
LLama.Benchmark/Assets/models.txt View File

@@ -0,0 +1 @@
TheBloke/Llama-2-7b-Chat-GGUF,llama-2-7b-chat.Q3_K_S.gguf

+ 8
- 6
LLama.Benchmark/Constants.cs View File

@@ -9,13 +9,15 @@ namespace LLama.Benchmark
{
internal static class Constants
{
public static readonly string Generative7BModelPath = "Models/llama-2-7b-chat.Q3_K_S.gguf";
public static readonly string EmbeddingModelPath = "Models/all-MiniLM-L12-v2.Q8_0.gguf";
public static string ModelDir { get; set; } = "";

public static readonly string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";
public static readonly string LLavaMmpPath = "Models/mmproj-model-f16.gguf";
public static readonly string LLavaImage = "Models/extreme-ironing-taxi-610x427.jpg";
public static string Generative7BModelPath => Path.Combine(ModelDir, "llama-2-7b-chat.Q3_K_S.gguf");
public static string EmbeddingModelPath => Path.Combine(ModelDir, "all-MiniLM-L12-v2.Q8_0.gguf");

public static readonly string TextCompletionPromptsFilePath = "Assets/TextCompletionPrompts.txt";
public static string LLavaModelPath => Path.Combine(ModelDir, "llava-v1.6-mistral-7b.Q3_K_XS.gguf");
public static string LLavaMmpPath => Path.Combine(ModelDir, "mmproj-model-f16.gguf");
public static string LLavaImage => "Assets/extreme-ironing-taxi-610x427.jpg";

public static string TextCompletionPromptsFilePath = "Assets/TextCompletionPrompts.txt";
}
}

+ 2
- 11
LLama.Benchmark/LLama.Benchmark.csproj View File

@@ -13,24 +13,15 @@
<PackageReference Include="BenchmarkDotNet" Version="0.13.12" />
</ItemGroup>

<Target Name="DownloadContentFiles" BeforeTargets="Build">
<DownloadFile SourceUrl="https://hf-mirror.com/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf" SkipUnchangedFiles="true"></DownloadFile>
</Target>

<ItemGroup>
<Folder Include="Models\" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\LLama\LLamaSharp.csproj" />
</ItemGroup>

<ItemGroup>
<None Update="Models\llama-2-7b-chat.Q3_K_S.gguf">
<None Update="Assets\TextCompletionPrompts.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>

<None Update="Assets\TextCompletionPrompts.txt">
<None Update="Models\extreme-ironing-taxi-610x427.jpg">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>


+ 8
- 0
LLama.Benchmark/Program.cs View File

@@ -1,4 +1,5 @@
using BenchmarkDotNet.Running;
using System.Diagnostics;

namespace LLama.Benchmark
{
@@ -6,6 +7,13 @@ namespace LLama.Benchmark
{
public static void Main(string[] args)
{
if (args.Length == 1)
{
var modelDir = args[0];
Constants.ModelDir = modelDir;
Console.WriteLine($"#################### model dir: {modelDir}");
}

var summary = BenchmarkRunner.Run(typeof(Program).Assembly);
Console.WriteLine(summary);
}


Loading…
Cancel
Save