diff --git a/LLama/LLamaModel.cs b/LLama/LLamaModel.cs index 36d0db15..cced0987 100644 --- a/LLama/LLamaModel.cs +++ b/LLama/LLamaModel.cs @@ -53,7 +53,8 @@ namespace LLama public SafeLLamaContextHandle NativeHandle => _ctx; /// - /// Please refer `LLamaParams` to find the meanings of each arg. + /// Please refer `LLamaParams` to find the meanings of each arg. Be sure to have set the `n_gpu_layers`, otherwise it will + /// load 20 layers to gpu by default. /// /// The model file path. /// The model name. @@ -159,7 +160,8 @@ namespace LLama } /// - /// + /// Please refer `LLamaParams` to find the meanings of each arg. Be sure to have set the `n_gpu_layers`, otherwise it will + /// load 20 layers to gpu by default. /// /// The LLamaModel params /// Model name diff --git a/LLama/LLamaParams.cs b/LLama/LLamaParams.cs index 4380bcfd..e6ee8cc2 100644 --- a/LLama/LLamaParams.cs +++ b/LLama/LLamaParams.cs @@ -12,7 +12,7 @@ namespace LLama public int n_ctx = 512; // context size public int n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS) public int n_keep = 0; // number of tokens to keep from initial prompt - public int n_gpu_layers = 0; // number of layers to store in VRAM + public int n_gpu_layers = -1; // number of layers to store in VRAM // sampling parameters public Dictionary logit_bias; // logit bias for specific tokens @@ -80,7 +80,7 @@ namespace LLama this.n_ctx = n_ctx; this.n_batch = n_batch; this.n_keep = n_keep; - this.n_gpu_layers = n_gpu_layers == -1 ? int.MaxValue : n_gpu_layers; + this.n_gpu_layers = n_gpu_layers == -1 ? 20 : n_gpu_layers; if (logit_bias == null) {