From 0f03e8f1a32106b59ac647f3b317fecd6ba43e64 Mon Sep 17 00:00:00 2001
From: Martin Evans <martindevans@gmail.com>
Date: Tue, 3 Oct 2023 21:08:31 +0100
Subject: [PATCH] Added workaround to LLama.Web and LLama.WebAPI

---
 LLama.Web/Services/ConnectionSessionService.cs | 12 ++++++------
 LLama.WebAPI/Services/StatefulChatService.cs   | 11 ++++++++---
 LLama.WebAPI/Services/StatelessChatService.cs  | 10 ++++++++--
 3 files changed, 22 insertions(+), 11 deletions(-)
diff --git a/LLama.Web/Services/ConnectionSessionService.cs b/LLama.Web/Services/ConnectionSessionService.cs
index 7dfcde39..b5867d9b 100644
--- a/LLama.Web/Services/ConnectionSessionService.cs
+++ b/LLama.Web/Services/ConnectionSessionService.cs
@@ -3,7 +3,6 @@ using LLama.Web.Common;
 using LLama.Web.Models;
 using Microsoft.Extensions.Options;
 using System.Collections.Concurrent;
-using System.Drawing;
 
 namespace LLama.Web.Services
 {
@@ -50,15 +49,16 @@ namespace LLama.Web.Services
             if (modelOption.MaxInstances > -1 && currentInstances >= modelOption.MaxInstances)
                 return Task.FromResult(ServiceResult.FromError<ModelSession>("Maximum model instances reached"));
 
-            // Create model
-            var llamaModel = new LLamaContext(modelOption);
+            // Load weights
+            // todo: it would be better to have a central service which loads weights and shares them between all contexts that need them!
+            using var weights = LLamaWeights.LoadFromFile(modelOption);
 
             // Create executor
             ILLamaExecutor executor = executorType switch
             {
-                LLamaExecutorType.Interactive => new InteractiveExecutor(llamaModel),
-                LLamaExecutorType.Instruct => new InstructExecutor(llamaModel),
-                LLamaExecutorType.Stateless => new StatelessExecutor(llamaModel),
+                LLamaExecutorType.Interactive => new InteractiveExecutor(new LLamaContext(weights, modelOption)), //todo: properly dispose of LLamaContext
+                LLamaExecutorType.Instruct => new InstructExecutor(new LLamaContext(weights, modelOption)), //todo: properly dispose of LLamaContext
+                LLamaExecutorType.Stateless => new StatelessExecutor(weights, modelOption),
                 _ => default
             };
 
diff --git a/LLama.WebAPI/Services/StatefulChatService.cs b/LLama.WebAPI/Services/StatefulChatService.cs
index ab542694..f1eb3538 100644
--- a/LLama.WebAPI/Services/StatefulChatService.cs
+++ b/LLama.WebAPI/Services/StatefulChatService.cs
@@ -16,10 +16,15 @@ public class StatefulChatService : IDisposable
 
     public StatefulChatService(IConfiguration configuration)
     {
-        _context = new LLamaContext(new Common.ModelParams(configuration["ModelPath"])
+        var @params = new Common.ModelParams(configuration["ModelPath"])
         {
-            ContextSize = 512
-        });
+            ContextSize = 512,
+        };
+
+        // todo: share weights from a central service
+        using var weights = LLamaWeights.LoadFromFile(@params);
+
+        _context = new LLamaContext(weights, @params);
         _session = new ChatSession(new InteractiveExecutor(_context));
     }
 
diff --git a/LLama.WebAPI/Services/StatelessChatService.cs b/LLama.WebAPI/Services/StatelessChatService.cs
index b924f4d8..71da775f 100644
--- a/LLama.WebAPI/Services/StatelessChatService.cs
+++ b/LLama.WebAPI/Services/StatelessChatService.cs
@@ -12,10 +12,16 @@ namespace LLama.WebAPI.Services
 
         public StatelessChatService(IConfiguration configuration)
         {
-            _context = new LLamaContext(new ModelParams(configuration["ModelPath"])
+            var @params = new Common.ModelParams(configuration["ModelPath"])
             {
                 ContextSize = 512,
-            });
+            };
+
+            // todo: share weights from a central service
+            using var weights = LLamaWeights.LoadFromFile(@params);
+
+            _context = new LLamaContext(weights, @params);
+
             // TODO: replace with a stateless executor
             _session = new ChatSession(new InteractiveExecutor(_context))
                         .WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { "User:", "Assistant:" }, redundancyLength: 8))