diff --git a/.github/download_models.py b/.github/download_models.py
new file mode 100644
index 00000000..8bebf983
--- /dev/null
+++ b/.github/download_models.py
@@ -0,0 +1,20 @@
+from huggingface_hub import hf_hub_download
+import argparse
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model-list', type=str, required=True)
+    parser.add_argument('--model-dir', type=str, required=True)
+    parser.add_argument('--endpoint', type=str, default='https://huggingface.co')
+    args = parser.parse_args()
+    
+    with open(args.model_list, 'r') as f:
+        model_id, filename = f.readline().split(',')
+    
+    hf_hub_download(
+        model_id=model_id, 
+        filename=filename, 
+        local_dir=args.model_dir, 
+        local_dir_use_symlinks=False, 
+        endpoint=args.endpoint
+    )
\ No newline at end of file
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index a1e3a7f9..5a82ee34 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -20,6 +20,7 @@ jobs:
         include:
           - build: cuda11
             image: nvidia/cuda:11.7.1-devel-ubuntu22.04
+            model_dir: /llamasharp_ci/models_benchmark
           # - build: cuda12
           #   image: nvidia/cuda:12.1.1-runtime-ubuntu22.04
 
@@ -51,11 +52,13 @@ jobs:
         dotnet-version: |
           8.0.x
 
-    - name: Cache Packages
-      uses: actions/cache@v4
+    - name: Prepare models
+      uses: actions/setup-python@v5
       with:
-        key: "benchmark_models"
-        path: LLama.Benchmark/Models
+        python-version: '3.10' 
+      run: | 
+        pip install huggingface_hub
+        python3 LLama.Benchmark/prepare_models.py --model-dir ${{ matrix.model_dir }} --model-list LLama.Benchmark/Assets/models.txt --endpoint https://hf-mirror.com
 
     - name: Clear package cache
       run: dotnet clean LLamaSharp.sln && dotnet nuget locals all --clear
@@ -66,7 +69,7 @@ jobs:
         dotnet build LLama/LLamaSharp.csproj -c release --no-restore
         dotnet build LLama.Benchmark/LLama.Benchmark.csproj -c release --no-restore
     - name: Run benchmark test
-      run: dotnet run LLama.Benchmark/LLama.Benchmark.csproj -c release
+      run: dotnet run --project LLama.Benchmark/LLama.Benchmark.csproj -c release ${{ matrix.model_dir }}
     - name: Upload artifacts
       if: always()
       uses: actions/upload-artifact@v3
diff --git a/LLama.Benchmark/Assets/extreme-ironing-taxi-610x427.jpg b/LLama.Benchmark/Assets/extreme-ironing-taxi-610x427.jpg
new file mode 100644
index 00000000..078fde7c
Binary files /dev/null and b/LLama.Benchmark/Assets/extreme-ironing-taxi-610x427.jpg differ
diff --git a/LLama.Benchmark/Assets/models.txt b/LLama.Benchmark/Assets/models.txt
new file mode 100644
index 00000000..3a9d6b51
--- /dev/null
+++ b/LLama.Benchmark/Assets/models.txt
@@ -0,0 +1 @@
+TheBloke/Llama-2-7b-Chat-GGUF,llama-2-7b-chat.Q3_K_S.gguf
\ No newline at end of file
diff --git a/LLama.Benchmark/Constants.cs b/LLama.Benchmark/Constants.cs
index c39ca9aa..76dbb62e 100644
--- a/LLama.Benchmark/Constants.cs
+++ b/LLama.Benchmark/Constants.cs
@@ -9,13 +9,15 @@ namespace LLama.Benchmark
 {
     internal static class Constants
     {
-        public static readonly string Generative7BModelPath = "Models/llama-2-7b-chat.Q3_K_S.gguf";
-        public static readonly string EmbeddingModelPath = "Models/all-MiniLM-L12-v2.Q8_0.gguf";
+        public static string ModelDir { get; set; } = "";
 
-        public static readonly string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";
-        public static readonly string LLavaMmpPath = "Models/mmproj-model-f16.gguf";
-        public static readonly string LLavaImage = "Models/extreme-ironing-taxi-610x427.jpg";
+        public static string Generative7BModelPath => Path.Combine(ModelDir, "llama-2-7b-chat.Q3_K_S.gguf");
+        public static string EmbeddingModelPath => Path.Combine(ModelDir, "all-MiniLM-L12-v2.Q8_0.gguf");
 
-        public static readonly string TextCompletionPromptsFilePath = "Assets/TextCompletionPrompts.txt";
+        public static string LLavaModelPath => Path.Combine(ModelDir, "llava-v1.6-mistral-7b.Q3_K_XS.gguf");
+        public static string LLavaMmpPath => Path.Combine(ModelDir, "mmproj-model-f16.gguf");
+        public static string LLavaImage => "Assets/extreme-ironing-taxi-610x427.jpg";
+
+        public static string TextCompletionPromptsFilePath = "Assets/TextCompletionPrompts.txt";
     }
 }
diff --git a/LLama.Benchmark/LLama.Benchmark.csproj b/LLama.Benchmark/LLama.Benchmark.csproj
index 6e420533..52512c69 100644
--- a/LLama.Benchmark/LLama.Benchmark.csproj
+++ b/LLama.Benchmark/LLama.Benchmark.csproj
@@ -13,24 +13,15 @@
     <PackageReference Include="BenchmarkDotNet" Version="0.13.12" />
   </ItemGroup>
 
-    <Target Name="DownloadContentFiles" BeforeTargets="Build">
-        <DownloadFile SourceUrl="https://hf-mirror.com/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf" SkipUnchangedFiles="true"></DownloadFile>
-    </Target>
-
-    <ItemGroup>
-        <Folder Include="Models\" />
-    </ItemGroup>
-
     <ItemGroup>
       <ProjectReference Include="..\LLama\LLamaSharp.csproj" />
     </ItemGroup>
 
     <ItemGroup>
-        <None Update="Models\llama-2-7b-chat.Q3_K_S.gguf">
+        <None Update="Assets\TextCompletionPrompts.txt">
             <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         </None>
-
-        <None Update="Assets\TextCompletionPrompts.txt">
+        <None Update="Models\extreme-ironing-taxi-610x427.jpg">
             <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         </None>
     </ItemGroup>
diff --git a/LLama.Benchmark/Program.cs b/LLama.Benchmark/Program.cs
index 3a472f96..7120a1b8 100644
--- a/LLama.Benchmark/Program.cs
+++ b/LLama.Benchmark/Program.cs
@@ -1,4 +1,5 @@
 using BenchmarkDotNet.Running;
+using System.Diagnostics;
 
 namespace LLama.Benchmark
 {
@@ -6,6 +7,13 @@ namespace LLama.Benchmark
     {
         public static void Main(string[] args)
         {
+            if (args.Length == 1)
+            {
+                var modelDir = args[0];
+                Constants.ModelDir = modelDir;
+                Console.WriteLine($"#################### model dir: {modelDir}");
+            }
+
             var summary = BenchmarkRunner.Run(typeof(Program).Assembly);
             Console.WriteLine(summary);
         }