| @@ -53,7 +53,8 @@ namespace LLama | |||||
| public SafeLLamaContextHandle NativeHandle => _ctx; | public SafeLLamaContextHandle NativeHandle => _ctx; | ||||
| /// <summary> | /// <summary> | ||||
| /// Please refer `LLamaParams` to find the meanings of each arg. | |||||
| /// Please refer `LLamaParams` to find the meanings of each arg. Be sure to have set the `n_gpu_layers`, otherwise it will | |||||
| /// load 20 layers to gpu by default. | |||||
| /// </summary> | /// </summary> | ||||
| /// <param name="model_path">The model file path.</param> | /// <param name="model_path">The model file path.</param> | ||||
| /// <param name="model_name">The model name.</param> | /// <param name="model_name">The model name.</param> | ||||
| @@ -159,7 +160,8 @@ namespace LLama | |||||
| } | } | ||||
| /// <summary> | /// <summary> | ||||
| /// | |||||
| /// Please refer `LLamaParams` to find the meanings of each arg. Be sure to have set the `n_gpu_layers`, otherwise it will | |||||
| /// load 20 layers to gpu by default. | |||||
| /// </summary> | /// </summary> | ||||
| /// <param name="params">The LLamaModel params</param> | /// <param name="params">The LLamaModel params</param> | ||||
| /// <param name="name">Model name</param> | /// <param name="name">Model name</param> | ||||
| @@ -12,7 +12,7 @@ namespace LLama | |||||
| public int n_ctx = 512; // context size | public int n_ctx = 512; // context size | ||||
| public int n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS) | public int n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS) | ||||
| public int n_keep = 0; // number of tokens to keep from initial prompt | public int n_keep = 0; // number of tokens to keep from initial prompt | ||||
| public int n_gpu_layers = 0; // number of layers to store in VRAM | |||||
| public int n_gpu_layers = -1; // number of layers to store in VRAM | |||||
| // sampling parameters | // sampling parameters | ||||
| public Dictionary<llama_token, float> logit_bias; // logit bias for specific tokens | public Dictionary<llama_token, float> logit_bias; // logit bias for specific tokens | ||||
| @@ -80,7 +80,7 @@ namespace LLama | |||||
| this.n_ctx = n_ctx; | this.n_ctx = n_ctx; | ||||
| this.n_batch = n_batch; | this.n_batch = n_batch; | ||||
| this.n_keep = n_keep; | this.n_keep = n_keep; | ||||
| this.n_gpu_layers = n_gpu_layers == -1 ? int.MaxValue : n_gpu_layers; | |||||
| this.n_gpu_layers = n_gpu_layers == -1 ? 20 : n_gpu_layers; | |||||
| if (logit_bias == null) | if (logit_bias == null) | ||||
| { | { | ||||