From 0f03e8f1a32106b59ac647f3b317fecd6ba43e64 Mon Sep 17 00:00:00 2001 From: Martin Evans Date: Tue, 3 Oct 2023 21:08:31 +0100 Subject: [PATCH] Added workaround to LLama.Web and LLama.WebAPI --- LLama.Web/Services/ConnectionSessionService.cs | 12 ++++++------ LLama.WebAPI/Services/StatefulChatService.cs | 11 ++++++++--- LLama.WebAPI/Services/StatelessChatService.cs | 10 ++++++++-- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/LLama.Web/Services/ConnectionSessionService.cs b/LLama.Web/Services/ConnectionSessionService.cs index 7dfcde39..b5867d9b 100644 --- a/LLama.Web/Services/ConnectionSessionService.cs +++ b/LLama.Web/Services/ConnectionSessionService.cs @@ -3,7 +3,6 @@ using LLama.Web.Common; using LLama.Web.Models; using Microsoft.Extensions.Options; using System.Collections.Concurrent; -using System.Drawing; namespace LLama.Web.Services { @@ -50,15 +49,16 @@ namespace LLama.Web.Services if (modelOption.MaxInstances > -1 && currentInstances >= modelOption.MaxInstances) return Task.FromResult(ServiceResult.FromError("Maximum model instances reached")); - // Create model - var llamaModel = new LLamaContext(modelOption); + // Load weights + // todo: it would be better to have a central service which loads weights and shares them between all contexts that need them! + using var weights = LLamaWeights.LoadFromFile(modelOption); // Create executor ILLamaExecutor executor = executorType switch { - LLamaExecutorType.Interactive => new InteractiveExecutor(llamaModel), - LLamaExecutorType.Instruct => new InstructExecutor(llamaModel), - LLamaExecutorType.Stateless => new StatelessExecutor(llamaModel), + LLamaExecutorType.Interactive => new InteractiveExecutor(new LLamaContext(weights, modelOption)), //todo: properly dispose of LLamaContext + LLamaExecutorType.Instruct => new InstructExecutor(new LLamaContext(weights, modelOption)), //todo: properly dispose of LLamaContext + LLamaExecutorType.Stateless => new StatelessExecutor(weights, modelOption), _ => default }; diff --git a/LLama.WebAPI/Services/StatefulChatService.cs b/LLama.WebAPI/Services/StatefulChatService.cs index ab542694..f1eb3538 100644 --- a/LLama.WebAPI/Services/StatefulChatService.cs +++ b/LLama.WebAPI/Services/StatefulChatService.cs @@ -16,10 +16,15 @@ public class StatefulChatService : IDisposable public StatefulChatService(IConfiguration configuration) { - _context = new LLamaContext(new Common.ModelParams(configuration["ModelPath"]) + var @params = new Common.ModelParams(configuration["ModelPath"]) { - ContextSize = 512 - }); + ContextSize = 512, + }; + + // todo: share weights from a central service + using var weights = LLamaWeights.LoadFromFile(@params); + + _context = new LLamaContext(weights, @params); _session = new ChatSession(new InteractiveExecutor(_context)); } diff --git a/LLama.WebAPI/Services/StatelessChatService.cs b/LLama.WebAPI/Services/StatelessChatService.cs index b924f4d8..71da775f 100644 --- a/LLama.WebAPI/Services/StatelessChatService.cs +++ b/LLama.WebAPI/Services/StatelessChatService.cs @@ -12,10 +12,16 @@ namespace LLama.WebAPI.Services public StatelessChatService(IConfiguration configuration) { - _context = new LLamaContext(new ModelParams(configuration["ModelPath"]) + var @params = new Common.ModelParams(configuration["ModelPath"]) { ContextSize = 512, - }); + }; + + // todo: share weights from a central service + using var weights = LLamaWeights.LoadFromFile(@params); + + _context = new LLamaContext(weights, @params); + // TODO: replace with a stateless executor _session = new ChatSession(new InteractiveExecutor(_context)) .WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { "User:", "Assistant:" }, redundancyLength: 8))