| @@ -0,0 +1,55 @@ | |||||
| namespace LLama.Web.Async | |||||
| { | |||||
| /// <summary> | |||||
| /// Create an Async locking using statment | |||||
| /// </summary> | |||||
| public sealed class AsyncLock | |||||
| { | |||||
| private readonly SemaphoreSlim _semaphore; | |||||
| private readonly Task<IDisposable> _releaser; | |||||
| /// <summary> | |||||
| /// Initializes a new instance of the <see cref="AsyncLock"/> class. | |||||
| /// </summary> | |||||
| public AsyncLock() | |||||
| { | |||||
| _semaphore = new SemaphoreSlim(1, 1); | |||||
| _releaser = Task.FromResult((IDisposable)new Releaser(this)); | |||||
| } | |||||
| /// <summary> | |||||
| /// Locks the using statement asynchronously. | |||||
| /// </summary> | |||||
| /// <returns></returns> | |||||
| public Task<IDisposable> LockAsync() | |||||
| { | |||||
| var wait = _semaphore.WaitAsync(); | |||||
| if (wait.IsCompleted) | |||||
| return _releaser; | |||||
| return wait.ContinueWith((_, state) => (IDisposable)state, _releaser.Result, CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default); | |||||
| } | |||||
| /// <summary> | |||||
| /// IDisposable wrapper class to release the lock on dispose | |||||
| /// </summary> | |||||
| /// <seealso cref="IDisposable" /> | |||||
| private sealed class Releaser : IDisposable | |||||
| { | |||||
| private readonly AsyncLock _lockToRelease; | |||||
| internal Releaser(AsyncLock lockToRelease) | |||||
| { | |||||
| _lockToRelease = lockToRelease; | |||||
| } | |||||
| public void Dispose() | |||||
| { | |||||
| _lockToRelease._semaphore.Release(); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -2,6 +2,7 @@ | |||||
| { | { | ||||
| public class LLamaOptions | public class LLamaOptions | ||||
| { | { | ||||
| public ModelLoadType ModelLoadType { get; set; } | |||||
| public List<ModelOptions> Models { get; set; } | public List<ModelOptions> Models { get; set; } | ||||
| public List<PromptOptions> Prompts { get; set; } = new List<PromptOptions>(); | public List<PromptOptions> Prompts { get; set; } = new List<PromptOptions>(); | ||||
| public List<ParameterOptions> Parameters { get; set; } = new List<ParameterOptions>(); | public List<ParameterOptions> Parameters { get; set; } = new List<ParameterOptions>(); | ||||
| @@ -0,0 +1,30 @@ | |||||
| namespace LLama.Web.Common | |||||
| { | |||||
| /// <summary> | |||||
| /// The type of model load caching to use | |||||
| /// </summary> | |||||
| public enum ModelLoadType | |||||
| { | |||||
| /// <summary> | |||||
| /// Only one model will be loaded into memory at a time, any other models will be unloaded before the new one is loaded | |||||
| /// </summary> | |||||
| Single = 0, | |||||
| /// <summary> | |||||
| /// Multiple models will be loaded into memory, ensure you use the ModelConfigs to split the hardware resources | |||||
| /// </summary> | |||||
| Multiple = 1, | |||||
| /// <summary> | |||||
| /// The first model in the appsettings.json list will be preloaded into memory at app startup | |||||
| /// </summary> | |||||
| PreloadSingle = 2, | |||||
| /// <summary> | |||||
| /// All models in the appsettings.json list will be preloaded into memory at app startup, ensure you use the ModelConfigs to split the hardware resources | |||||
| /// </summary> | |||||
| PreloadMultiple = 3, | |||||
| } | |||||
| } | |||||
| @@ -3,105 +3,123 @@ using LLama.Abstractions; | |||||
| namespace LLama.Web.Common | namespace LLama.Web.Common | ||||
| { | { | ||||
| public class ModelOptions | |||||
| : IModelParams | |||||
| public class ModelOptions : IModelParams | |||||
| { | { | ||||
| /// <summary> | |||||
| /// Model friendly name | |||||
| /// </summary> | |||||
| public string Name { get; set; } | public string Name { get; set; } | ||||
| /// <summary> | |||||
| /// Max context insta=nces allowed per model | |||||
| /// </summary> | |||||
| public int MaxInstances { get; set; } | public int MaxInstances { get; set; } | ||||
| /// <summary> | |||||
| /// Model context size (n_ctx) | |||||
| /// </summary> | |||||
| public int ContextSize { get; set; } = 512; | |||||
| /// <summary> | |||||
| /// the GPU that is used for scratch and small tensors | |||||
| /// </summary> | |||||
| public int MainGpu { get; set; } = 0; | |||||
| /// <summary> | |||||
| /// if true, reduce VRAM usage at the cost of performance | |||||
| /// </summary> | |||||
| public bool LowVram { get; set; } = false; | |||||
| /// <summary> | |||||
| /// Number of layers to run in VRAM / GPU memory (n_gpu_layers) | |||||
| /// </summary> | |||||
| public int GpuLayerCount { get; set; } = 20; | |||||
| /// <summary> | |||||
| /// Seed for the random number generator (seed) | |||||
| /// </summary> | |||||
| public int Seed { get; set; } = 1686349486; | |||||
| /// <summary> | |||||
| /// Use f16 instead of f32 for memory kv (memory_f16) | |||||
| /// </summary> | |||||
| public bool UseFp16Memory { get; set; } = true; | |||||
| /// <summary> | |||||
| /// Use mmap for faster loads (use_mmap) | |||||
| /// </summary> | |||||
| public bool UseMemorymap { get; set; } = true; | |||||
| /// <summary> | |||||
| /// Use mlock to keep model in memory (use_mlock) | |||||
| /// </summary> | |||||
| public bool UseMemoryLock { get; set; } = false; | |||||
| /// <summary> | |||||
| /// Compute perplexity over the prompt (perplexity) | |||||
| /// </summary> | |||||
| public bool Perplexity { get; set; } = false; | |||||
| /// <summary> | |||||
| /// Model path (model) | |||||
| /// </summary> | |||||
| public string ModelPath { get; set; } | |||||
| /// <summary> | |||||
| /// model alias | |||||
| /// </summary> | |||||
| public string ModelAlias { get; set; } = "unknown"; | |||||
| /// <summary> | |||||
| /// Model context size (n_ctx) | |||||
| /// </summary> | |||||
| public int ContextSize { get; set; } = 512; | |||||
| /// <summary> | |||||
| /// the GPU that is used for scratch and small tensors | |||||
| /// </summary> | |||||
| public int MainGpu { get; set; } = 0; | |||||
| /// <summary> | |||||
| /// if true, reduce VRAM usage at the cost of performance | |||||
| /// </summary> | |||||
| public bool LowVram { get; set; } = false; | |||||
| /// <summary> | |||||
| /// Number of layers to run in VRAM / GPU memory (n_gpu_layers) | |||||
| /// </summary> | |||||
| public int GpuLayerCount { get; set; } = 20; | |||||
| /// <summary> | |||||
| /// Seed for the random number generator (seed) | |||||
| /// </summary> | |||||
| public int Seed { get; set; } = 1686349486; | |||||
| /// <summary> | |||||
| /// Use f16 instead of f32 for memory kv (memory_f16) | |||||
| /// </summary> | |||||
| public bool UseFp16Memory { get; set; } = true; | |||||
| /// <summary> | |||||
| /// Use mmap for faster loads (use_mmap) | |||||
| /// </summary> | |||||
| public bool UseMemorymap { get; set; } = true; | |||||
| /// <summary> | |||||
| /// Use mlock to keep model in memory (use_mlock) | |||||
| /// </summary> | |||||
| public bool UseMemoryLock { get; set; } = false; | |||||
| /// <summary> | |||||
| /// Compute perplexity over the prompt (perplexity) | |||||
| /// </summary> | |||||
| public bool Perplexity { get; set; } = false; | |||||
| /// <summary> | |||||
| /// Model path (model) | |||||
| /// </summary> | |||||
| public string ModelPath { get; set; } | |||||
| /// <summary> | |||||
| /// model alias | |||||
| /// </summary> | |||||
| public string ModelAlias { get; set; } = "unknown"; | |||||
| /// <summary> | |||||
| /// lora adapter path (lora_adapter) | |||||
| /// </summary> | |||||
| public string LoraAdapter { get; set; } = string.Empty; | |||||
| /// <summary> | |||||
| /// base model path for the lora adapter (lora_base) | |||||
| /// </summary> | |||||
| public string LoraBase { get; set; } = string.Empty; | |||||
| /// <summary> | |||||
| /// Number of threads (-1 = autodetect) (n_threads) | |||||
| /// </summary> | |||||
| public int Threads { get; set; } = Math.Max(Environment.ProcessorCount / 2, 1); | |||||
| /// <summary> | |||||
| /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch) | |||||
| /// </summary> | |||||
| public int BatchSize { get; set; } = 512; | |||||
| /// <summary> | |||||
| /// Whether to convert eos to newline during the inference. | |||||
| /// </summary> | |||||
| public bool ConvertEosToNewLine { get; set; } = false; | |||||
| /// <summary> | |||||
| /// Whether to use embedding mode. (embedding) Note that if this is set to true, | |||||
| /// The LLamaModel won't produce text response anymore. | |||||
| /// </summary> | |||||
| public bool EmbeddingMode { get; set; } = false; | |||||
| /// <summary> | |||||
| /// how split tensors should be distributed across GPUs | |||||
| /// </summary> | |||||
| public float[] TensorSplits { get; set; } | |||||
| /// <summary> | |||||
| /// RoPE base frequency | |||||
| /// </summary> | |||||
| public float RopeFrequencyBase { get; set; } = 10000.0f; | |||||
| /// <summary> | |||||
| /// RoPE frequency scaling factor | |||||
| /// </summary> | |||||
| public float RopeFrequencyScale { get; set; } = 1.0f; | |||||
| /// <summary> | |||||
| /// Use experimental mul_mat_q kernels | |||||
| /// </summary> | |||||
| public bool MulMatQ { get; set; } | |||||
| /// <summary> | |||||
| /// lora adapter path (lora_adapter) | |||||
| /// </summary> | |||||
| public string LoraAdapter { get; set; } = string.Empty; | |||||
| /// <summary> | |||||
| /// base model path for the lora adapter (lora_base) | |||||
| /// </summary> | |||||
| public string LoraBase { get; set; } = string.Empty; | |||||
| /// <summary> | |||||
| /// Number of threads (-1 = autodetect) (n_threads) | |||||
| /// </summary> | |||||
| public int Threads { get; set; } = Math.Max(Environment.ProcessorCount / 2, 1); | |||||
| /// <summary> | |||||
| /// batch size for prompt processing (must be >=32 to use BLAS) (n_batch) | |||||
| /// </summary> | |||||
| public int BatchSize { get; set; } = 512; | |||||
| /// <summary> | |||||
| /// Whether to convert eos to newline during the inference. | |||||
| /// </summary> | |||||
| public bool ConvertEosToNewLine { get; set; } = false; | |||||
| /// <summary> | |||||
| /// Whether to use embedding mode. (embedding) Note that if this is set to true, | |||||
| /// The LLamaModel won't produce text response anymore. | |||||
| /// </summary> | |||||
| public bool EmbeddingMode { get; set; } = false; | |||||
| /// <summary> | |||||
| /// how split tensors should be distributed across GPUs | |||||
| /// </summary> | |||||
| public float[] TensorSplits { get; set; } | |||||
| /// <summary> | |||||
| /// RoPE base frequency | |||||
| /// </summary> | |||||
| public float RopeFrequencyBase { get; set; } = 10000.0f; | |||||
| /// <summary> | |||||
| /// RoPE frequency scaling factor | |||||
| /// </summary> | |||||
| public float RopeFrequencyScale { get; set; } = 1.0f; | |||||
| /// <summary> | |||||
| /// Use experimental mul_mat_q kernels | |||||
| /// </summary> | |||||
| public bool MulMatQ { get; set; } | |||||
| /// <summary> | /// <summary> | ||||
| /// The encoding to use for models | /// The encoding to use for models | ||||
| @@ -0,0 +1,106 @@ | |||||
| using LLama.Abstractions; | |||||
| using LLama.Web.Common; | |||||
| using System.Collections.Concurrent; | |||||
| namespace LLama.Web | |||||
| { | |||||
| /// <summary> | |||||
| /// Wrapper class for LLamaSharp LLamaWeights | |||||
| /// </summary> | |||||
| /// <seealso cref="System.IDisposable" /> | |||||
| public class LLamaModel : IDisposable | |||||
| { | |||||
| private readonly ModelOptions _config; | |||||
| private readonly LLamaWeights _weights; | |||||
| private readonly ConcurrentDictionary<string, LLamaContext> _contexts; | |||||
| /// <summary> | |||||
| /// Initializes a new instance of the <see cref="LLamaModel"/> class. | |||||
| /// </summary> | |||||
| /// <param name="modelParams">The model parameters.</param> | |||||
| public LLamaModel(ModelOptions modelParams) | |||||
| { | |||||
| _config = modelParams; | |||||
| _weights = LLamaWeights.LoadFromFile(modelParams); | |||||
| _contexts = new ConcurrentDictionary<string, LLamaContext>(); | |||||
| } | |||||
| /// <summary> | |||||
| /// Gets the model configuration. | |||||
| /// </summary> | |||||
| public IModelParams ModelParams => _config; | |||||
| /// <summary> | |||||
| /// Gets the LLamaWeights | |||||
| /// </summary> | |||||
| public LLamaWeights LLamaWeights => _weights; | |||||
| /// <summary> | |||||
| /// Gets the context count. | |||||
| /// </summary> | |||||
| public int ContextCount => _contexts.Count; | |||||
| /// <summary> | |||||
| /// Creates a new context session on this model | |||||
| /// </summary> | |||||
| /// <param name="contextName">The unique context identifier</param> | |||||
| /// <returns>LLamaModelContext for this LLamaModel</returns> | |||||
| /// <exception cref="Exception">Context exists</exception> | |||||
| public Task<LLamaContext> CreateContext(string contextName) | |||||
| { | |||||
| if (_contexts.TryGetValue(contextName, out var context)) | |||||
| throw new Exception($"Context with id {contextName} already exists."); | |||||
| if (_config.MaxInstances > -1 && ContextCount >= _config.MaxInstances) | |||||
| throw new Exception($"Maximum model instances reached"); | |||||
| context = _weights.CreateContext(_config); | |||||
| if (_contexts.TryAdd(contextName, context)) | |||||
| return Task.FromResult(context); | |||||
| return Task.FromResult<LLamaContext>(null); | |||||
| } | |||||
| /// <summary> | |||||
| /// Get a contexts belonging to this model | |||||
| /// </summary> | |||||
| /// <param name="contextName">The unique context identifier</param> | |||||
| /// <returns>LLamaModelContext for this LLamaModel with the specified contextName</returns> | |||||
| public Task<LLamaContext> GetContext(string contextName) | |||||
| { | |||||
| if (_contexts.TryGetValue(contextName, out var context)) | |||||
| return Task.FromResult(context); | |||||
| return Task.FromResult<LLamaContext>(null); | |||||
| } | |||||
| /// <summary> | |||||
| /// Remove a context from this model | |||||
| /// </summary> | |||||
| /// <param name="contextName">The unique context identifier</param> | |||||
| /// <returns>true if removed, otherwise false</returns> | |||||
| public Task<bool> RemoveContext(string contextName) | |||||
| { | |||||
| if (!_contexts.TryRemove(contextName, out var context)) | |||||
| return Task.FromResult(false); | |||||
| context?.Dispose(); | |||||
| return Task.FromResult(true); | |||||
| } | |||||
| /// <summary> | |||||
| /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. | |||||
| /// </summary> | |||||
| public void Dispose() | |||||
| { | |||||
| foreach (var context in _contexts.Values) | |||||
| { | |||||
| context?.Dispose(); | |||||
| } | |||||
| _weights.Dispose(); | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,76 @@ | |||||
| using LLama.Web.Common; | |||||
| namespace LLama.Web.Services | |||||
| { | |||||
| /// <summary> | |||||
| /// Service for managing language Models | |||||
| /// </summary> | |||||
| public interface IModelService | |||||
| { | |||||
| /// <summary> | |||||
| /// Gets the model with the specified name. | |||||
| /// </summary> | |||||
| /// <param name="modelName">Name of the model.</param> | |||||
| Task<LLamaModel> GetModel(string modelName); | |||||
| /// <summary> | |||||
| /// Loads a model from a ModelConfig object. | |||||
| /// </summary> | |||||
| /// <param name="modelOptions">The model configuration.</param> | |||||
| Task<LLamaModel> LoadModel(ModelOptions modelOptions); | |||||
| /// <summary> | |||||
| /// Loads all models found in appsettings.json | |||||
| /// </summary> | |||||
| Task LoadModels(); | |||||
| /// <summary> | |||||
| /// Unloads the model with the specified name. | |||||
| /// </summary> | |||||
| /// <param name="modelName">Name of the model.</param> | |||||
| Task UnloadModel(string modelName); | |||||
| /// <summary> | |||||
| /// Unloads all models. | |||||
| /// </summary> | |||||
| Task UnloadModels(); | |||||
| /// <summary> | |||||
| /// Gets a context with the specified identifier | |||||
| /// </summary> | |||||
| /// <param name="modelName">Name of the model.</param> | |||||
| /// <param name="contextName">The context identifier.</param> | |||||
| Task<LLamaContext> GetContext(string modelName, string contextName); | |||||
| /// <summary> | |||||
| /// Removes the context. | |||||
| /// </summary> | |||||
| /// <param name="modelName">Name of the model.</param> | |||||
| /// <param name="contextName">The context identifier.</param> | |||||
| Task<bool> RemoveContext(string modelName, string contextName); | |||||
| /// <summary> | |||||
| /// Creates a context. | |||||
| /// </summary> | |||||
| /// <param name="modelName">Name of the model.</param> | |||||
| /// <param name="contextName">The context identifier.</param> | |||||
| Task<LLamaContext> CreateContext(string modelName, string contextName); | |||||
| /// <summary> | |||||
| /// Gets the or create model and context. | |||||
| /// This will load a model from disk if not already loaded, and also create the context | |||||
| /// </summary> | |||||
| /// <param name="modelName">Name of the model.</param> | |||||
| /// <param name="contextName">The context identifier.</param> | |||||
| /// <returns>Both loaded Model and Context</returns> | |||||
| Task<(LLamaModel, LLamaContext)> GetOrCreateModelAndContext(string modelName, string contextName); | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,202 @@ | |||||
| using LLama.Web.Async; | |||||
| using LLama.Web.Common; | |||||
| using System.Collections.Concurrent; | |||||
| namespace LLama.Web.Services | |||||
| { | |||||
| /// <summary> | |||||
| /// Sercive for handling Models,Weights & Contexts | |||||
| /// </summary> | |||||
| public class ModelService : IModelService | |||||
| { | |||||
| private readonly AsyncLock _modelLock; | |||||
| private readonly AsyncLock _contextLock; | |||||
| private readonly LLamaOptions _configuration; | |||||
| private readonly ConcurrentDictionary<string, LLamaModel> _modelInstances; | |||||
| /// <summary> | |||||
| /// Initializes a new instance of the <see cref="ModelService"/> class. | |||||
| /// </summary> | |||||
| /// <param name="logger">The logger.</param> | |||||
| /// <param name="options">The options.</param> | |||||
| public ModelService(LLamaOptions configuration) | |||||
| { | |||||
| _modelLock = new AsyncLock(); | |||||
| _contextLock = new AsyncLock(); | |||||
| _configuration = configuration; | |||||
| _modelInstances = new ConcurrentDictionary<string, LLamaModel>(); | |||||
| } | |||||
| /// <summary> | |||||
| /// Loads a model with the provided configuration. | |||||
| /// </summary> | |||||
| /// <param name="modelOptions">The model configuration.</param> | |||||
| /// <returns></returns> | |||||
| public async Task<LLamaModel> LoadModel(ModelOptions modelOptions) | |||||
| { | |||||
| if (_modelInstances.TryGetValue(modelOptions.Name, out var existingModel)) | |||||
| return existingModel; | |||||
| using (await _modelLock.LockAsync()) | |||||
| { | |||||
| if (_modelInstances.TryGetValue(modelOptions.Name, out var model)) | |||||
| return model; | |||||
| // If in single mode unload any other models | |||||
| if (_configuration.ModelLoadType == ModelLoadType.Single | |||||
| || _configuration.ModelLoadType == ModelLoadType.PreloadSingle) | |||||
| await UnloadModels(); | |||||
| model = new LLamaModel(modelOptions); | |||||
| _modelInstances.TryAdd(modelOptions.Name, model); | |||||
| return model; | |||||
| } | |||||
| } | |||||
| /// <summary> | |||||
| /// Loads the models. | |||||
| /// </summary> | |||||
| public async Task LoadModels() | |||||
| { | |||||
| if (_configuration.ModelLoadType == ModelLoadType.Single | |||||
| || _configuration.ModelLoadType == ModelLoadType.Multiple) | |||||
| return; | |||||
| foreach (var modelConfig in _configuration.Models) | |||||
| { | |||||
| await LoadModel(modelConfig); | |||||
| //Only preload first model if in SinglePreload mode | |||||
| if (_configuration.ModelLoadType == ModelLoadType.PreloadSingle) | |||||
| break; | |||||
| } | |||||
| } | |||||
| /// <summary> | |||||
| /// Unloads the model. | |||||
| /// </summary> | |||||
| /// <param name="modelName">Name of the model.</param> | |||||
| /// <returns></returns> | |||||
| public Task UnloadModel(string modelName) | |||||
| { | |||||
| if (_modelInstances.TryRemove(modelName, out var model)) | |||||
| { | |||||
| model?.Dispose(); | |||||
| return Task.FromResult(true); | |||||
| } | |||||
| return Task.FromResult(false); | |||||
| } | |||||
| /// <summary> | |||||
| /// Unloads all models. | |||||
| /// </summary> | |||||
| public async Task UnloadModels() | |||||
| { | |||||
| foreach (var modelName in _modelInstances.Keys) | |||||
| { | |||||
| await UnloadModel(modelName); | |||||
| } | |||||
| } | |||||
| /// <summary> | |||||
| /// Gets a model ny name. | |||||
| /// </summary> | |||||
| /// <param name="modelName">Name of the model.</param> | |||||
| /// <returns></returns> | |||||
| public Task<LLamaModel> GetModel(string modelName) | |||||
| { | |||||
| _modelInstances.TryGetValue(modelName, out var model); | |||||
| return Task.FromResult(model); | |||||
| } | |||||
| /// <summary> | |||||
| /// Gets a context from the specified model. | |||||
| /// </summary> | |||||
| /// <param name="modelName">Name of the model.</param> | |||||
| /// <param name="contextName">The contextName.</param> | |||||
| /// <returns></returns> | |||||
| /// <exception cref="System.Exception">Model not found</exception> | |||||
| public async Task<LLamaContext> GetContext(string modelName, string contextName) | |||||
| { | |||||
| if (!_modelInstances.TryGetValue(modelName, out var model)) | |||||
| throw new Exception("Model not found"); | |||||
| return await model.GetContext(contextName); | |||||
| } | |||||
| /// <summary> | |||||
| /// Creates a context on the specified model. | |||||
| /// </summary> | |||||
| /// <param name="modelName">Name of the model.</param> | |||||
| /// <param name="contextName">The contextName.</param> | |||||
| /// <returns></returns> | |||||
| /// <exception cref="System.Exception">Model not found</exception> | |||||
| public async Task<LLamaContext> CreateContext(string modelName, string contextName) | |||||
| { | |||||
| if (!_modelInstances.TryGetValue(modelName, out var model)) | |||||
| throw new Exception("Model not found"); | |||||
| using (await _contextLock.LockAsync()) | |||||
| { | |||||
| return await model.CreateContext(contextName); | |||||
| } | |||||
| } | |||||
| /// <summary> | |||||
| /// Removes a context from the specified model. | |||||
| /// </summary> | |||||
| /// <param name="modelName">Name of the model.</param> | |||||
| /// <param name="contextName">The contextName.</param> | |||||
| /// <returns></returns> | |||||
| /// <exception cref="System.Exception">Model not found</exception> | |||||
| public async Task<bool> RemoveContext(string modelName, string contextName) | |||||
| { | |||||
| if (!_modelInstances.TryGetValue(modelName, out var model)) | |||||
| throw new Exception("Model not found"); | |||||
| using (await _contextLock.LockAsync()) | |||||
| { | |||||
| return await model.RemoveContext(contextName); | |||||
| } | |||||
| } | |||||
| /// <summary> | |||||
| /// Loads, Gets,Creates a Model and a Context | |||||
| /// </summary> | |||||
| /// <param name="modelName">Name of the model.</param> | |||||
| /// <param name="contextName">The contextName.</param> | |||||
| /// <returns></returns> | |||||
| /// <exception cref="System.Exception">Model option '{modelName}' not found</exception> | |||||
| public async Task<(LLamaModel, LLamaContext)> GetOrCreateModelAndContext(string modelName, string contextName) | |||||
| { | |||||
| if (_modelInstances.TryGetValue(modelName, out var model)) | |||||
| return (model, await model.GetContext(contextName) ?? await model.CreateContext(contextName)); | |||||
| // Get model configuration | |||||
| var modelConfig = _configuration.Models.FirstOrDefault(x => x.Name == modelName); | |||||
| if (modelConfig is null) | |||||
| throw new Exception($"Model option '{modelName}' not found"); | |||||
| // Load Model | |||||
| model = await LoadModel(modelConfig); | |||||
| // Get or Create Context | |||||
| return (model, await model.GetContext(contextName) ?? await model.CreateContext(contextName)); | |||||
| } | |||||
| } | |||||
| } | |||||