Added workaround to LLama.Web and LLama.WebAPI

2 years ago · 0f03e8f1a3
--- a/LLama.Web/Services/ConnectionSessionService.cs
+++ b/LLama.Web/Services/ConnectionSessionService.cs
@@ -3,7 +3,6 @@ using LLama.Web.Common;
 using LLama.Web.Models;
 using Microsoft.Extensions.Options;
 using System.Collections.Concurrent;
 using System.Drawing;

 namespace LLama.Web.Services
 {
@@ -50,15 +49,16 @@ namespace LLama.Web.Services
            if (modelOption.MaxInstances > -1 && currentInstances >= modelOption.MaxInstances)
                return Task.FromResult(ServiceResult.FromError<ModelSession>("Maximum model instances reached"));

            // Create model
            var llamaModel = new LLamaContext(modelOption);
            // Load weights
            // todo: it would be better to have a central service which loads weights and shares them between all contexts that need them!
            using var weights = LLamaWeights.LoadFromFile(modelOption);

            // Create executor
            ILLamaExecutor executor = executorType switch
            {
                LLamaExecutorType.Interactive => new InteractiveExecutor(llamaModel),
                LLamaExecutorType.Instruct => new InstructExecutor(llamaModel),
                LLamaExecutorType.Stateless => new StatelessExecutor(llamaModel),
                LLamaExecutorType.Interactive => new InteractiveExecutor(new LLamaContext(weights, modelOption)), //todo: properly dispose of LLamaContext
                LLamaExecutorType.Instruct => new InstructExecutor(new LLamaContext(weights, modelOption)), //todo: properly dispose of LLamaContext
                LLamaExecutorType.Stateless => new StatelessExecutor(weights, modelOption),
                _ => default
            };

--- a/LLama.WebAPI/Services/StatefulChatService.cs
+++ b/LLama.WebAPI/Services/StatefulChatService.cs
@@ -16,10 +16,15 @@ public class StatefulChatService : IDisposable

    public StatefulChatService(IConfiguration configuration)
    {
        _context = new LLamaContext(new Common.ModelParams(configuration["ModelPath"])
        var @params = new Common.ModelParams(configuration["ModelPath"])
        {
            ContextSize = 512
        });
            ContextSize = 512,
        };

        // todo: share weights from a central service
        using var weights = LLamaWeights.LoadFromFile(@params);

        _context = new LLamaContext(weights, @params);
        _session = new ChatSession(new InteractiveExecutor(_context));
    }

--- a/LLama.WebAPI/Services/StatelessChatService.cs
+++ b/LLama.WebAPI/Services/StatelessChatService.cs
@@ -12,10 +12,16 @@ namespace LLama.WebAPI.Services

        public StatelessChatService(IConfiguration configuration)
        {
            _context = new LLamaContext(new ModelParams(configuration["ModelPath"])
            var @params = new Common.ModelParams(configuration["ModelPath"])
            {
                ContextSize = 512,
            });
            };

            // todo: share weights from a central service
            using var weights = LLamaWeights.LoadFromFile(@params);

            _context = new LLamaContext(weights, @params);

            // TODO: replace with a stateless executor
            _session = new ChatSession(new InteractiveExecutor(_context))
                        .WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { "User:", "Assistant:" }, redundancyLength: 8))