diff --git a/LLama/LLamaModel.cs b/LLama/LLamaModel.cs
index 36d0db15..cced0987 100644
--- a/LLama/LLamaModel.cs
+++ b/LLama/LLamaModel.cs
@@ -53,7 +53,8 @@ namespace LLama
public SafeLLamaContextHandle NativeHandle => _ctx;
///
- /// Please refer `LLamaParams` to find the meanings of each arg.
+ /// Please refer `LLamaParams` to find the meanings of each arg. Be sure to have set the `n_gpu_layers`, otherwise it will
+ /// load 20 layers to gpu by default.
///
/// The model file path.
/// The model name.
@@ -159,7 +160,8 @@ namespace LLama
}
///
- ///
+ /// Please refer `LLamaParams` to find the meanings of each arg. Be sure to have set the `n_gpu_layers`, otherwise it will
+ /// load 20 layers to gpu by default.
///
/// The LLamaModel params
/// Model name
diff --git a/LLama/LLamaParams.cs b/LLama/LLamaParams.cs
index 4380bcfd..e6ee8cc2 100644
--- a/LLama/LLamaParams.cs
+++ b/LLama/LLamaParams.cs
@@ -12,7 +12,7 @@ namespace LLama
public int n_ctx = 512; // context size
public int n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
public int n_keep = 0; // number of tokens to keep from initial prompt
- public int n_gpu_layers = 0; // number of layers to store in VRAM
+ public int n_gpu_layers = -1; // number of layers to store in VRAM
// sampling parameters
public Dictionary logit_bias; // logit bias for specific tokens
@@ -80,7 +80,7 @@ namespace LLama
this.n_ctx = n_ctx;
this.n_batch = n_batch;
this.n_keep = n_keep;
- this.n_gpu_layers = n_gpu_layers == -1 ? int.MaxValue : n_gpu_layers;
+ this.n_gpu_layers = n_gpu_layers == -1 ? 20 : n_gpu_layers;
if (logit_bias == null)
{