| @@ -0,0 +1,107 @@ | |||||
| using System.Collections.Concurrent; | |||||
| namespace LLama.Web.Async | |||||
| { | |||||
| /// <summary> | |||||
| /// Creates a async/thread-safe guard helper | |||||
| /// </summary> | |||||
| /// <seealso cref="AsyncGuard<byte>" /> | |||||
| public class AsyncGuard : AsyncGuard<byte> | |||||
| { | |||||
| private readonly byte _key; | |||||
| private readonly ConcurrentDictionary<byte, bool> _lockData; | |||||
| /// <summary> | |||||
| /// Initializes a new instance of the <see cref="AsyncGuard"/> class. | |||||
| /// </summary> | |||||
| public AsyncGuard() | |||||
| { | |||||
| _key = 0; | |||||
| _lockData = new ConcurrentDictionary<byte, bool>(); | |||||
| } | |||||
| /// <summary> | |||||
| /// Guards this instance. | |||||
| /// </summary> | |||||
| /// <returns>true if able to enter an guard, false if already guarded</returns> | |||||
| public bool Guard() | |||||
| { | |||||
| return _lockData.TryAdd(_key, true); | |||||
| } | |||||
| /// <summary> | |||||
| /// Releases the guard. | |||||
| /// </summary> | |||||
| /// <returns></returns> | |||||
| public bool Release() | |||||
| { | |||||
| return _lockData.TryRemove(_key, out _); | |||||
| } | |||||
| /// <summary> | |||||
| /// Determines whether this instance is guarded. | |||||
| /// </summary> | |||||
| /// <returns> | |||||
| /// <c>true</c> if this instance is guarded; otherwise, <c>false</c>. | |||||
| /// </returns> | |||||
| public bool IsGuarded() | |||||
| { | |||||
| return _lockData.ContainsKey(_key); | |||||
| } | |||||
| } | |||||
| public class AsyncGuard<T> | |||||
| { | |||||
| private readonly ConcurrentDictionary<T, bool> _lockData; | |||||
| /// <summary> | |||||
| /// Initializes a new instance of the <see cref="AsyncGuard{T}"/> class. | |||||
| /// </summary> | |||||
| public AsyncGuard() | |||||
| { | |||||
| _lockData = new ConcurrentDictionary<T, bool>(); | |||||
| } | |||||
| /// <summary> | |||||
| /// Guards the specified value. | |||||
| /// </summary> | |||||
| /// <param name="value">The value.</param> | |||||
| /// <returns>true if able to enter a guard for this value, false if this value is already guarded</returns> | |||||
| public bool Guard(T value) | |||||
| { | |||||
| return _lockData.TryAdd(value, true); | |||||
| } | |||||
| /// <summary> | |||||
| /// Releases the guard on the specified value. | |||||
| /// </summary> | |||||
| /// <param name="value">The value.</param> | |||||
| /// <returns></returns> | |||||
| public bool Release(T value) | |||||
| { | |||||
| return _lockData.TryRemove(value, out _); | |||||
| } | |||||
| /// <summary> | |||||
| /// Determines whether the specified value is guarded. | |||||
| /// </summary> | |||||
| /// <param name="value">The value.</param> | |||||
| /// <returns> | |||||
| /// <c>true</c> if the specified value is guarded; otherwise, <c>false</c>. | |||||
| /// </returns> | |||||
| public bool IsGuarded(T value) | |||||
| { | |||||
| return _lockData.ContainsKey(value); | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,101 @@ | |||||
| using LLama.Common; | |||||
| using LLama.Abstractions; | |||||
| using LLama.Native; | |||||
| namespace LLama.Web.Common | |||||
| { | |||||
| public class InferenceOptions : IInferenceParams | |||||
| { | |||||
| /// <summary> | |||||
| /// number of tokens to keep from initial prompt | |||||
| /// </summary> | |||||
| public int TokensKeep { get; set; } = 0; | |||||
| /// <summary> | |||||
| /// how many new tokens to predict (n_predict), set to -1 to inifinitely generate response | |||||
| /// until it complete. | |||||
| /// </summary> | |||||
| public int MaxTokens { get; set; } = -1; | |||||
| /// <summary> | |||||
| /// logit bias for specific tokens | |||||
| /// </summary> | |||||
| public Dictionary<int, float>? LogitBias { get; set; } = null; | |||||
| /// <summary> | |||||
| /// Sequences where the model will stop generating further tokens. | |||||
| /// </summary> | |||||
| public IEnumerable<string> AntiPrompts { get; set; } = Array.Empty<string>(); | |||||
| /// <summary> | |||||
| /// path to file for saving/loading model eval state | |||||
| /// </summary> | |||||
| public string PathSession { get; set; } = string.Empty; | |||||
| /// <summary> | |||||
| /// string to suffix user inputs with | |||||
| /// </summary> | |||||
| public string InputSuffix { get; set; } = string.Empty; | |||||
| /// <summary> | |||||
| /// string to prefix user inputs with | |||||
| /// </summary> | |||||
| public string InputPrefix { get; set; } = string.Empty; | |||||
| /// <summary> | |||||
| /// 0 or lower to use vocab size | |||||
| /// </summary> | |||||
| public int TopK { get; set; } = 40; | |||||
| /// <summary> | |||||
| /// 1.0 = disabled | |||||
| /// </summary> | |||||
| public float TopP { get; set; } = 0.95f; | |||||
| /// <summary> | |||||
| /// 1.0 = disabled | |||||
| /// </summary> | |||||
| public float TfsZ { get; set; } = 1.0f; | |||||
| /// <summary> | |||||
| /// 1.0 = disabled | |||||
| /// </summary> | |||||
| public float TypicalP { get; set; } = 1.0f; | |||||
| /// <summary> | |||||
| /// 1.0 = disabled | |||||
| /// </summary> | |||||
| public float Temperature { get; set; } = 0.8f; | |||||
| /// <summary> | |||||
| /// 1.0 = disabled | |||||
| /// </summary> | |||||
| public float RepeatPenalty { get; set; } = 1.1f; | |||||
| /// <summary> | |||||
| /// last n tokens to penalize (0 = disable penalty, -1 = context size) (repeat_last_n) | |||||
| /// </summary> | |||||
| public int RepeatLastTokensCount { get; set; } = 64; | |||||
| /// <summary> | |||||
| /// frequency penalty coefficient | |||||
| /// 0.0 = disabled | |||||
| /// </summary> | |||||
| public float FrequencyPenalty { get; set; } = .0f; | |||||
| /// <summary> | |||||
| /// presence penalty coefficient | |||||
| /// 0.0 = disabled | |||||
| /// </summary> | |||||
| public float PresencePenalty { get; set; } = .0f; | |||||
| /// <summary> | |||||
| /// Mirostat uses tokens instead of words. | |||||
| /// algorithm described in the paper https://arxiv.org/abs/2007.14966. | |||||
| /// 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 | |||||
| /// </summary> | |||||
| public MirostatType Mirostat { get; set; } = MirostatType.Disable; | |||||
| /// <summary> | |||||
| /// target entropy | |||||
| /// </summary> | |||||
| public float MirostatTau { get; set; } = 5.0f; | |||||
| /// <summary> | |||||
| /// learning rate | |||||
| /// </summary> | |||||
| public float MirostatEta { get; set; } = 0.1f; | |||||
| /// <summary> | |||||
| /// consider newlines as a repeatable token (penalize_nl) | |||||
| /// </summary> | |||||
| public bool PenalizeNL { get; set; } = true; | |||||
| /// <summary> | |||||
| /// A grammar to constrain possible tokens | |||||
| /// </summary> | |||||
| public SafeLLamaGrammarHandle Grammar { get; set; } = null; | |||||
| } | |||||
| } | |||||
| @@ -4,18 +4,9 @@ | |||||
| { | { | ||||
| public ModelLoadType ModelLoadType { get; set; } | public ModelLoadType ModelLoadType { get; set; } | ||||
| public List<ModelOptions> Models { get; set; } | public List<ModelOptions> Models { get; set; } | ||||
| public List<PromptOptions> Prompts { get; set; } = new List<PromptOptions>(); | |||||
| public List<ParameterOptions> Parameters { get; set; } = new List<ParameterOptions>(); | |||||
| public void Initialize() | public void Initialize() | ||||
| { | { | ||||
| foreach (var prompt in Prompts) | |||||
| { | |||||
| if (File.Exists(prompt.Path)) | |||||
| { | |||||
| prompt.Prompt = File.ReadAllText(prompt.Path).Trim(); | |||||
| } | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -1,105 +0,0 @@ | |||||
| using LLama.Common; | |||||
| using LLama.Abstractions; | |||||
| using LLama.Native; | |||||
| namespace LLama.Web.Common | |||||
| { | |||||
| public class ParameterOptions : IInferenceParams | |||||
| { | |||||
| public string Name { get; set; } | |||||
| /// <summary> | |||||
| /// number of tokens to keep from initial prompt | |||||
| /// </summary> | |||||
| public int TokensKeep { get; set; } = 0; | |||||
| /// <summary> | |||||
| /// how many new tokens to predict (n_predict), set to -1 to inifinitely generate response | |||||
| /// until it complete. | |||||
| /// </summary> | |||||
| public int MaxTokens { get; set; } = -1; | |||||
| /// <summary> | |||||
| /// logit bias for specific tokens | |||||
| /// </summary> | |||||
| public Dictionary<int, float>? LogitBias { get; set; } = null; | |||||
| /// <summary> | |||||
| /// Sequences where the model will stop generating further tokens. | |||||
| /// </summary> | |||||
| public IEnumerable<string> AntiPrompts { get; set; } = Array.Empty<string>(); | |||||
| /// <summary> | |||||
| /// path to file for saving/loading model eval state | |||||
| /// </summary> | |||||
| public string PathSession { get; set; } = string.Empty; | |||||
| /// <summary> | |||||
| /// string to suffix user inputs with | |||||
| /// </summary> | |||||
| public string InputSuffix { get; set; } = string.Empty; | |||||
| /// <summary> | |||||
| /// string to prefix user inputs with | |||||
| /// </summary> | |||||
| public string InputPrefix { get; set; } = string.Empty; | |||||
| /// <summary> | |||||
| /// 0 or lower to use vocab size | |||||
| /// </summary> | |||||
| public int TopK { get; set; } = 40; | |||||
| /// <summary> | |||||
| /// 1.0 = disabled | |||||
| /// </summary> | |||||
| public float TopP { get; set; } = 0.95f; | |||||
| /// <summary> | |||||
| /// 1.0 = disabled | |||||
| /// </summary> | |||||
| public float TfsZ { get; set; } = 1.0f; | |||||
| /// <summary> | |||||
| /// 1.0 = disabled | |||||
| /// </summary> | |||||
| public float TypicalP { get; set; } = 1.0f; | |||||
| /// <summary> | |||||
| /// 1.0 = disabled | |||||
| /// </summary> | |||||
| public float Temperature { get; set; } = 0.8f; | |||||
| /// <summary> | |||||
| /// 1.0 = disabled | |||||
| /// </summary> | |||||
| public float RepeatPenalty { get; set; } = 1.1f; | |||||
| /// <summary> | |||||
| /// last n tokens to penalize (0 = disable penalty, -1 = context size) (repeat_last_n) | |||||
| /// </summary> | |||||
| public int RepeatLastTokensCount { get; set; } = 64; | |||||
| /// <summary> | |||||
| /// frequency penalty coefficient | |||||
| /// 0.0 = disabled | |||||
| /// </summary> | |||||
| public float FrequencyPenalty { get; set; } = .0f; | |||||
| /// <summary> | |||||
| /// presence penalty coefficient | |||||
| /// 0.0 = disabled | |||||
| /// </summary> | |||||
| public float PresencePenalty { get; set; } = .0f; | |||||
| /// <summary> | |||||
| /// Mirostat uses tokens instead of words. | |||||
| /// algorithm described in the paper https://arxiv.org/abs/2007.14966. | |||||
| /// 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 | |||||
| /// </summary> | |||||
| public MirostatType Mirostat { get; set; } = MirostatType.Disable; | |||||
| /// <summary> | |||||
| /// target entropy | |||||
| /// </summary> | |||||
| public float MirostatTau { get; set; } = 5.0f; | |||||
| /// <summary> | |||||
| /// learning rate | |||||
| /// </summary> | |||||
| public float MirostatEta { get; set; } = 0.1f; | |||||
| /// <summary> | |||||
| /// consider newlines as a repeatable token (penalize_nl) | |||||
| /// </summary> | |||||
| public bool PenalizeNL { get; set; } = true; | |||||
| /// <summary> | |||||
| /// A grammar to constrain possible tokens | |||||
| /// </summary> | |||||
| public SafeLLamaGrammarHandle Grammar { get; set; } = null; | |||||
| } | |||||
| } | |||||
| @@ -1,11 +0,0 @@ | |||||
| namespace LLama.Web.Common | |||||
| { | |||||
| public class PromptOptions | |||||
| { | |||||
| public string Name { get; set; } | |||||
| public string Path { get; set; } | |||||
| public string Prompt { get; set; } | |||||
| public List<string> AntiPrompt { get; set; } | |||||
| public List<string> OutputFilter { get; set; } | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,14 @@ | |||||
| namespace LLama.Web.Common | |||||
| { | |||||
| public class SessionOptions | |||||
| { | |||||
| public string Model { get; set; } | |||||
| public string Prompt { get; set; } | |||||
| public string AntiPrompt { get; set; } | |||||
| public List<string> AntiPrompts { get; set; } | |||||
| public string OutputFilter { get; set; } | |||||
| public List<string> OutputFilters { get; set; } | |||||
| public LLamaExecutorType ExecutorType { get; set; } | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,54 @@ | |||||
| using LLama.Web.Common; | |||||
| namespace LLama.Web | |||||
| { | |||||
| public static class Extensioms | |||||
| { | |||||
| /// <summary> | |||||
| /// Combines the AntiPrompts list and AntiPrompt csv | |||||
| /// </summary> | |||||
| /// <param name="sessionConfig">The session configuration.</param> | |||||
| /// <returns>Combined AntiPrompts with duplicates removed</returns> | |||||
| public static List<string> GetAntiPrompts(this Common.SessionOptions sessionConfig) | |||||
| { | |||||
| return CombineCSV(sessionConfig.AntiPrompts, sessionConfig.AntiPrompt); | |||||
| } | |||||
| /// <summary> | |||||
| /// Combines the OutputFilters list and OutputFilter csv | |||||
| /// </summary> | |||||
| /// <param name="sessionConfig">The session configuration.</param> | |||||
| /// <returns>Combined OutputFilters with duplicates removed</returns> | |||||
| public static List<string> GetOutputFilters(this Common.SessionOptions sessionConfig) | |||||
| { | |||||
| return CombineCSV(sessionConfig.OutputFilters, sessionConfig.OutputFilter); | |||||
| } | |||||
| /// <summary> | |||||
| /// Combines a string list and a csv and removes duplicates | |||||
| /// </summary> | |||||
| /// <param name="list">The list.</param> | |||||
| /// <param name="csv">The CSV.</param> | |||||
| /// <returns>Combined list with duplicates removed</returns> | |||||
| private static List<string> CombineCSV(List<string> list, string csv) | |||||
| { | |||||
| var results = list?.Count == 0 | |||||
| ? CommaSeperatedToList(csv) | |||||
| : CommaSeperatedToList(csv).Concat(list); | |||||
| return results | |||||
| .Distinct() | |||||
| .ToList(); | |||||
| } | |||||
| private static List<string> CommaSeperatedToList(string value) | |||||
| { | |||||
| if (string.IsNullOrEmpty(value)) | |||||
| return new List<string>(); | |||||
| return value.Split(",", StringSplitOptions.RemoveEmptyEntries) | |||||
| .Select(x => x.Trim()) | |||||
| .ToList(); | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -6,7 +6,6 @@ namespace LLama.Web.Hubs | |||||
| public interface ISessionClient | public interface ISessionClient | ||||
| { | { | ||||
| Task OnStatus(string connectionId, SessionConnectionStatus status); | Task OnStatus(string connectionId, SessionConnectionStatus status); | ||||
| Task OnResponse(ResponseFragment fragment); | |||||
| Task OnError(string error); | Task OnError(string error); | ||||
| } | } | ||||
| } | } | ||||
| @@ -2,16 +2,15 @@ | |||||
| using LLama.Web.Models; | using LLama.Web.Models; | ||||
| using LLama.Web.Services; | using LLama.Web.Services; | ||||
| using Microsoft.AspNetCore.SignalR; | using Microsoft.AspNetCore.SignalR; | ||||
| using System.Diagnostics; | |||||
| namespace LLama.Web.Hubs | namespace LLama.Web.Hubs | ||||
| { | { | ||||
| public class SessionConnectionHub : Hub<ISessionClient> | public class SessionConnectionHub : Hub<ISessionClient> | ||||
| { | { | ||||
| private readonly ILogger<SessionConnectionHub> _logger; | private readonly ILogger<SessionConnectionHub> _logger; | ||||
| private readonly ConnectionSessionService _modelSessionService; | |||||
| private readonly IModelSessionService _modelSessionService; | |||||
| public SessionConnectionHub(ILogger<SessionConnectionHub> logger, ConnectionSessionService modelSessionService) | |||||
| public SessionConnectionHub(ILogger<SessionConnectionHub> logger, IModelSessionService modelSessionService) | |||||
| { | { | ||||
| _logger = logger; | _logger = logger; | ||||
| _modelSessionService = modelSessionService; | _modelSessionService = modelSessionService; | ||||
| @@ -27,29 +26,27 @@ namespace LLama.Web.Hubs | |||||
| } | } | ||||
| public override async Task OnDisconnectedAsync(Exception? exception) | |||||
| public override async Task OnDisconnectedAsync(Exception exception) | |||||
| { | { | ||||
| _logger.Log(LogLevel.Information, "[OnDisconnectedAsync], Id: {0}", Context.ConnectionId); | _logger.Log(LogLevel.Information, "[OnDisconnectedAsync], Id: {0}", Context.ConnectionId); | ||||
| // Remove connections session on dissconnect | // Remove connections session on dissconnect | ||||
| await _modelSessionService.RemoveAsync(Context.ConnectionId); | |||||
| await _modelSessionService.CloseAsync(Context.ConnectionId); | |||||
| await base.OnDisconnectedAsync(exception); | await base.OnDisconnectedAsync(exception); | ||||
| } | } | ||||
| [HubMethodName("LoadModel")] | [HubMethodName("LoadModel")] | ||||
| public async Task OnLoadModel(LLamaExecutorType executorType, string modelName, string promptName, string parameterName) | |||||
| public async Task OnLoadModel(Common.SessionOptions sessionConfig, InferenceOptions inferenceConfig) | |||||
| { | { | ||||
| _logger.Log(LogLevel.Information, "[OnLoadModel] - Load new model, Connection: {0}, Model: {1}, Prompt: {2}, Parameter: {3}", Context.ConnectionId, modelName, promptName, parameterName); | |||||
| // Remove existing connections session | |||||
| await _modelSessionService.RemoveAsync(Context.ConnectionId); | |||||
| _logger.Log(LogLevel.Information, "[OnLoadModel] - Load new model, Connection: {0}", Context.ConnectionId); | |||||
| await _modelSessionService.CloseAsync(Context.ConnectionId); | |||||
| // Create model session | // Create model session | ||||
| var modelSessionResult = await _modelSessionService.CreateAsync(executorType, Context.ConnectionId, modelName, promptName, parameterName); | |||||
| if (modelSessionResult.HasError) | |||||
| var modelSession = await _modelSessionService.CreateAsync(Context.ConnectionId, sessionConfig, inferenceConfig); | |||||
| if (modelSession is null) | |||||
| { | { | ||||
| await Clients.Caller.OnError(modelSessionResult.Error); | |||||
| await Clients.Caller.OnError("Failed to create model session"); | |||||
| return; | return; | ||||
| } | } | ||||
| @@ -59,40 +56,12 @@ namespace LLama.Web.Hubs | |||||
| [HubMethodName("SendPrompt")] | [HubMethodName("SendPrompt")] | ||||
| public async Task OnSendPrompt(string prompt) | |||||
| public IAsyncEnumerable<TokenModel> OnSendPrompt(string prompt, InferenceOptions inferConfig, CancellationToken cancellationToken) | |||||
| { | { | ||||
| _logger.Log(LogLevel.Information, "[OnSendPrompt] - New prompt received, Connection: {0}", Context.ConnectionId); | _logger.Log(LogLevel.Information, "[OnSendPrompt] - New prompt received, Connection: {0}", Context.ConnectionId); | ||||
| // Get connections session | |||||
| var modelSession = await _modelSessionService.GetAsync(Context.ConnectionId); | |||||
| if (modelSession is null) | |||||
| { | |||||
| await Clients.Caller.OnError("No model has been loaded"); | |||||
| return; | |||||
| } | |||||
| // Create unique response id | |||||
| var responseId = Guid.NewGuid().ToString(); | |||||
| // Send begin of response | |||||
| await Clients.Caller.OnResponse(new ResponseFragment(responseId, isFirst: true)); | |||||
| // Send content of response | |||||
| var stopwatch = Stopwatch.GetTimestamp(); | |||||
| await foreach (var fragment in modelSession.InferAsync(prompt, CancellationTokenSource.CreateLinkedTokenSource(Context.ConnectionAborted))) | |||||
| { | |||||
| await Clients.Caller.OnResponse(new ResponseFragment(responseId, fragment)); | |||||
| } | |||||
| // Send end of response | |||||
| var elapsedTime = Stopwatch.GetElapsedTime(stopwatch); | |||||
| var signature = modelSession.IsInferCanceled() | |||||
| ? $"Inference cancelled after {elapsedTime.TotalSeconds:F0} seconds" | |||||
| : $"Inference completed in {elapsedTime.TotalSeconds:F0} seconds"; | |||||
| await Clients.Caller.OnResponse(new ResponseFragment(responseId, signature, isLast: true)); | |||||
| _logger.Log(LogLevel.Information, "[OnSendPrompt] - Inference complete, Connection: {0}, Elapsed: {1}, Canceled: {2}", Context.ConnectionId, elapsedTime, modelSession.IsInferCanceled()); | |||||
| var linkedCancelationToken = CancellationTokenSource.CreateLinkedTokenSource(Context.ConnectionAborted, cancellationToken); | |||||
| return _modelSessionService.InferAsync(Context.ConnectionId, prompt, inferConfig, linkedCancelationToken.Token); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -14,4 +14,8 @@ | |||||
| <Folder Include="wwwroot\image\" /> | <Folder Include="wwwroot\image\" /> | ||||
| </ItemGroup> | </ItemGroup> | ||||
| <ItemGroup> | |||||
| <PackageReference Include="System.Linq.Async" Version="6.0.1" /> | |||||
| </ItemGroup> | |||||
| </Project> | </Project> | ||||
| @@ -2,12 +2,12 @@ | |||||
| using LLama.Web.Common; | using LLama.Web.Common; | ||||
| using System.Collections.Concurrent; | using System.Collections.Concurrent; | ||||
| namespace LLama.Web | |||||
| namespace LLama.Web.Models | |||||
| { | { | ||||
| /// <summary> | /// <summary> | ||||
| /// Wrapper class for LLamaSharp LLamaWeights | /// Wrapper class for LLamaSharp LLamaWeights | ||||
| /// </summary> | /// </summary> | ||||
| /// <seealso cref="System.IDisposable" /> | |||||
| /// <seealso cref="IDisposable" /> | |||||
| public class LLamaModel : IDisposable | public class LLamaModel : IDisposable | ||||
| { | { | ||||
| private readonly ModelOptions _config; | private readonly ModelOptions _config; | ||||
| @@ -3,46 +3,97 @@ using LLama.Web.Common; | |||||
| namespace LLama.Web.Models | namespace LLama.Web.Models | ||||
| { | { | ||||
| public class ModelSession : IDisposable | |||||
| public class ModelSession | |||||
| { | { | ||||
| private bool _isFirstInteraction = true; | |||||
| private ModelOptions _modelOptions; | |||||
| private PromptOptions _promptOptions; | |||||
| private ParameterOptions _inferenceOptions; | |||||
| private ITextStreamTransform _outputTransform; | |||||
| private ILLamaExecutor _executor; | |||||
| private readonly string _sessionId; | |||||
| private readonly LLamaModel _model; | |||||
| private readonly LLamaContext _context; | |||||
| private readonly ILLamaExecutor _executor; | |||||
| private readonly Common.SessionOptions _sessionParams; | |||||
| private readonly ITextStreamTransform _outputTransform; | |||||
| private readonly InferenceOptions _defaultInferenceConfig; | |||||
| private CancellationTokenSource _cancellationTokenSource; | private CancellationTokenSource _cancellationTokenSource; | ||||
| public ModelSession(ILLamaExecutor executor, ModelOptions modelOptions, PromptOptions promptOptions, ParameterOptions parameterOptions) | |||||
| public ModelSession(LLamaModel model, LLamaContext context, string sessionId, Common.SessionOptions sessionOptions, InferenceOptions inferenceOptions = null) | |||||
| { | { | ||||
| _executor = executor; | |||||
| _modelOptions = modelOptions; | |||||
| _promptOptions = promptOptions; | |||||
| _inferenceOptions = parameterOptions; | |||||
| _inferenceOptions.AntiPrompts = _promptOptions.AntiPrompt?.Concat(_inferenceOptions.AntiPrompts ?? Enumerable.Empty<string>()).Distinct() ?? _inferenceOptions.AntiPrompts; | |||||
| if (_promptOptions.OutputFilter?.Count > 0) | |||||
| _outputTransform = new LLamaTransforms.KeywordTextOutputStreamTransform(_promptOptions.OutputFilter, redundancyLength: 5); | |||||
| _model = model; | |||||
| _context = context; | |||||
| _sessionId = sessionId; | |||||
| _sessionParams = sessionOptions; | |||||
| _defaultInferenceConfig = inferenceOptions ?? new InferenceOptions(); | |||||
| _outputTransform = CreateOutputFilter(_sessionParams); | |||||
| _executor = CreateExecutor(_model, _context, _sessionParams); | |||||
| } | } | ||||
| public string ModelName | |||||
| /// <summary> | |||||
| /// Gets the session identifier. | |||||
| /// </summary> | |||||
| public string SessionId => _sessionId; | |||||
| /// <summary> | |||||
| /// Gets the name of the model. | |||||
| /// </summary> | |||||
| public string ModelName => _sessionParams.Model; | |||||
| /// <summary> | |||||
| /// Gets the context. | |||||
| /// </summary> | |||||
| public LLamaContext Context => _context; | |||||
| /// <summary> | |||||
| /// Gets the session configuration. | |||||
| /// </summary> | |||||
| public Common.SessionOptions SessionConfig => _sessionParams; | |||||
| /// <summary> | |||||
| /// Gets the inference parameters. | |||||
| /// </summary> | |||||
| public InferenceOptions InferenceParams => _defaultInferenceConfig; | |||||
| /// <summary> | |||||
| /// Initializes the prompt. | |||||
| /// </summary> | |||||
| /// <param name="inferenceConfig">The inference configuration.</param> | |||||
| /// <param name="cancellationToken">The cancellation token.</param> | |||||
| internal async Task InitializePrompt(InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default) | |||||
| { | { | ||||
| get { return _modelOptions.Name; } | |||||
| if (_sessionParams.ExecutorType == LLamaExecutorType.Stateless) | |||||
| return; | |||||
| if (string.IsNullOrEmpty(_sessionParams.Prompt)) | |||||
| return; | |||||
| // Run Initial prompt | |||||
| var inferenceParams = ConfigureInferenceParams(inferenceConfig); | |||||
| _cancellationTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); | |||||
| await foreach (var _ in _executor.InferAsync(_sessionParams.Prompt, inferenceParams, _cancellationTokenSource.Token)) | |||||
| { | |||||
| // We dont really need the response of the initial prompt, so exit on first token | |||||
| break; | |||||
| }; | |||||
| } | } | ||||
| public IAsyncEnumerable<string> InferAsync(string message, CancellationTokenSource cancellationTokenSource) | |||||
| /// <summary> | |||||
| /// Runs inference on the model context | |||||
| /// </summary> | |||||
| /// <param name="message">The message.</param> | |||||
| /// <param name="inferenceConfig">The inference configuration.</param> | |||||
| /// <param name="cancellationToken">The cancellation token.</param> | |||||
| /// <returns></returns> | |||||
| internal IAsyncEnumerable<string> InferAsync(string message, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default) | |||||
| { | { | ||||
| _cancellationTokenSource = cancellationTokenSource; | |||||
| if (_isFirstInteraction) | |||||
| { | |||||
| _isFirstInteraction = false; | |||||
| message = _promptOptions.Prompt + message; | |||||
| } | |||||
| var inferenceParams = ConfigureInferenceParams(inferenceConfig); | |||||
| _cancellationTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); | |||||
| var inferenceStream = _executor.InferAsync(message, inferenceParams, _cancellationTokenSource.Token); | |||||
| if (_outputTransform is not null) | if (_outputTransform is not null) | ||||
| return _outputTransform.TransformAsync(_executor.InferAsync(message, _inferenceOptions, _cancellationTokenSource.Token)); | |||||
| return _outputTransform.TransformAsync(inferenceStream); | |||||
| return _executor.InferAsync(message, _inferenceOptions, _cancellationTokenSource.Token); | |||||
| return inferenceStream; | |||||
| } | } | ||||
| @@ -56,13 +107,36 @@ namespace LLama.Web.Models | |||||
| return _cancellationTokenSource.IsCancellationRequested; | return _cancellationTokenSource.IsCancellationRequested; | ||||
| } | } | ||||
| public void Dispose() | |||||
| /// <summary> | |||||
| /// Configures the inference parameters. | |||||
| /// </summary> | |||||
| /// <param name="inferenceConfig">The inference configuration.</param> | |||||
| private IInferenceParams ConfigureInferenceParams(InferenceOptions inferenceConfig) | |||||
| { | |||||
| var inferenceParams = inferenceConfig ?? _defaultInferenceConfig; | |||||
| inferenceParams.AntiPrompts = _sessionParams.GetAntiPrompts(); | |||||
| return inferenceParams; | |||||
| } | |||||
| private ITextStreamTransform CreateOutputFilter(Common.SessionOptions sessionConfig) | |||||
| { | { | ||||
| _inferenceOptions = null; | |||||
| _outputTransform = null; | |||||
| var outputFilters = sessionConfig.GetOutputFilters(); | |||||
| if (outputFilters.Count > 0) | |||||
| return new LLamaTransforms.KeywordTextOutputStreamTransform(outputFilters); | |||||
| _executor?.Context.Dispose(); | |||||
| _executor = null; | |||||
| return null; | |||||
| } | |||||
| private ILLamaExecutor CreateExecutor(LLamaModel model, LLamaContext context, Common.SessionOptions sessionConfig) | |||||
| { | |||||
| return sessionConfig.ExecutorType switch | |||||
| { | |||||
| LLamaExecutorType.Interactive => new InteractiveExecutor(_context), | |||||
| LLamaExecutorType.Instruct => new InstructExecutor(_context), | |||||
| LLamaExecutorType.Stateless => new StatelessExecutor(_model.LLamaWeights, _model.ModelParams), | |||||
| _ => default | |||||
| }; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -1,18 +0,0 @@ | |||||
| namespace LLama.Web.Models | |||||
| { | |||||
| public class ResponseFragment | |||||
| { | |||||
| public ResponseFragment(string id, string content = null, bool isFirst = false, bool isLast = false) | |||||
| { | |||||
| Id = id; | |||||
| IsLast = isLast; | |||||
| IsFirst = isFirst; | |||||
| Content = content; | |||||
| } | |||||
| public string Id { get; set; } | |||||
| public string Content { get; set; } | |||||
| public bool IsLast { get; set; } | |||||
| public bool IsFirst { get; set; } | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,24 @@ | |||||
| namespace LLama.Web.Models | |||||
| { | |||||
| public class TokenModel | |||||
| { | |||||
| public TokenModel(string id, string content = null, TokenType tokenType = TokenType.Content) | |||||
| { | |||||
| Id = id; | |||||
| Content = content; | |||||
| TokenType = tokenType; | |||||
| } | |||||
| public string Id { get; set; } | |||||
| public string Content { get; set; } | |||||
| public TokenType TokenType { get; set; } | |||||
| } | |||||
| public enum TokenType | |||||
| { | |||||
| Begin = 0, | |||||
| Content = 2, | |||||
| End = 4, | |||||
| Cancel = 10 | |||||
| } | |||||
| } | |||||
| @@ -1,96 +0,0 @@ | |||||
| @page | |||||
| @model InstructModel | |||||
| @{ | |||||
| } | |||||
| @Html.AntiForgeryToken() | |||||
| <div class="d-flex flex-row h-100 pt-1 pb-1"> | |||||
| <div class="d-flex flex-column h-100 border me-1 w-25 overflow-auto"> | |||||
| <div class="d-flex flex-row justify-content-between border-bottom p-1 align-items-center"> | |||||
| <h4>Instruct</h4> | |||||
| <div> | |||||
| <span>Hub: <b id="socket">Disconnected</b></span> | |||||
| </div> | |||||
| </div> | |||||
| <div class="m-1"> | |||||
| <small>Model</small> | |||||
| <select id="Model" class="form-control form-select input-control" required="required" autocomplete="off"> | |||||
| <option value="" disabled selected hidden>Please Select</option> | |||||
| @foreach (var modelOption in Model.Options.Models) | |||||
| { | |||||
| <option value="@modelOption.Name">@modelOption.Name</option> | |||||
| } | |||||
| </select> | |||||
| </div> | |||||
| <div class="m-1"> | |||||
| <small>Parameters</small> | |||||
| <select id="Parameter" class="form-control form-select input-control" required="required" autocomplete="off"> | |||||
| <option value="" disabled selected hidden>Please Select</option> | |||||
| @foreach (var parameterOption in Model.Options.Parameters) | |||||
| { | |||||
| <option value="@parameterOption.Name">@parameterOption.Name</option> | |||||
| } | |||||
| </select> | |||||
| </div> | |||||
| <div class="m-1"> | |||||
| <small>Prompt</small> | |||||
| <select id="Prompt" class="form-control form-select input-control" required="required" autocomplete="off"> | |||||
| <option value="" disabled selected hidden>Please Select</option> | |||||
| @foreach (var promptOption in Model.Options.Prompts) | |||||
| { | |||||
| <option value="@promptOption.Name" data-prompt="@promptOption.Prompt">@promptOption.Name</option> | |||||
| } | |||||
| </select> | |||||
| <textarea id="PromptText" class="form-control mt-1" rows="12" disabled="disabled" style="font-size:13px;resize:none"></textarea> | |||||
| </div> | |||||
| <div class="d-flex flex-grow-1"></div> | |||||
| <div id="session-details" class="m-1"></div> | |||||
| <div class="m-1"> | |||||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="load">Create Session</button> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-column h-100 w-75"> | |||||
| <div class="section-head"> | |||||
| </div> | |||||
| <div id="scroll-container" class="section-content border"> | |||||
| <div id="output-container" class="d-flex flex-column gap-1 p-1"> | |||||
| </div> | |||||
| </div> | |||||
| <div class="section-foot"> | |||||
| <div class="input-group mt-2"> | |||||
| <textarea id="input" type="text" class="form-control" value="what is a tree?" style="resize:none" rows="4">What is an apple?</textarea> | |||||
| <div class="d-flex flex-column"> | |||||
| <div class="d-flex flex-fill"> | |||||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="send" disabled="disabled" autocomplete="off">Send Message</button> | |||||
| </div> | |||||
| <div class="d-flex"> | |||||
| <button class="btn btn-outline-secondary w-100" type="button" id="cancel" autocomplete="off"> | |||||
| <i class="bi-x-circle"></i> | |||||
| </button> | |||||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="clear" disabled="disabled" autocomplete="off"> | |||||
| <i class="bi-trash3"></i> | |||||
| </button> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| @{ await Html.RenderPartialAsync("_ChatTemplates"); } | |||||
| @section Scripts { | |||||
| <script src="~/js/sessionconnectionchat.js"></script> | |||||
| <script> | |||||
| createConnectionSessionChat(Enums.LLamaExecutorType.Instruct); | |||||
| </script> | |||||
| } | |||||
| @@ -1,34 +0,0 @@ | |||||
| using LLama.Web.Common; | |||||
| using LLama.Web.Models; | |||||
| using LLama.Web.Services; | |||||
| using Microsoft.AspNetCore.Mvc; | |||||
| using Microsoft.AspNetCore.Mvc.RazorPages; | |||||
| using Microsoft.Extensions.Options; | |||||
| namespace LLama.Web.Pages | |||||
| { | |||||
| public class InstructModel : PageModel | |||||
| { | |||||
| private readonly ILogger<InstructModel> _logger; | |||||
| private readonly ConnectionSessionService _modelSessionService; | |||||
| public InstructModel(ILogger<InstructModel> logger, IOptions<LLamaOptions> options, ConnectionSessionService modelSessionService) | |||||
| { | |||||
| _logger = logger; | |||||
| Options = options.Value; | |||||
| _modelSessionService = modelSessionService; | |||||
| } | |||||
| public LLamaOptions Options { get; set; } | |||||
| public void OnGet() | |||||
| { | |||||
| } | |||||
| public async Task<IActionResult> OnPostCancel(CancelModel model) | |||||
| { | |||||
| await _modelSessionService.CancelAsync(model.ConnectionId); | |||||
| return new JsonResult(default); | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -1,4 +0,0 @@ | |||||
| .section-content { | |||||
| flex: 1; | |||||
| overflow-y: scroll; | |||||
| } | |||||
| @@ -1,96 +0,0 @@ | |||||
| @page | |||||
| @model InteractiveModel | |||||
| @{ | |||||
| } | |||||
| @Html.AntiForgeryToken() | |||||
| <div class="d-flex flex-row h-100 pt-1 pb-1"> | |||||
| <div class="d-flex flex-column h-100 border me-1 w-25 overflow-auto"> | |||||
| <div class="d-flex flex-row justify-content-between border-bottom p-1 align-items-center"> | |||||
| <h4>Interactive</h4> | |||||
| <div> | |||||
| <span>Hub: <b id="socket">Disconnected</b></span> | |||||
| </div> | |||||
| </div> | |||||
| <div class="m-1"> | |||||
| <small>Model</small> | |||||
| <select id="Model" class="form-control form-select input-control" required="required" autocomplete="off"> | |||||
| <option value="" disabled selected hidden>Please Select</option> | |||||
| @foreach (var modelOption in Model.Options.Models) | |||||
| { | |||||
| <option value="@modelOption.Name">@modelOption.Name</option> | |||||
| } | |||||
| </select> | |||||
| </div> | |||||
| <div class="m-1"> | |||||
| <small>Parameters</small> | |||||
| <select id="Parameter" class="form-control form-select input-control" required="required" autocomplete="off"> | |||||
| <option value="" disabled selected hidden>Please Select</option> | |||||
| @foreach (var parameterOption in Model.Options.Parameters) | |||||
| { | |||||
| <option value="@parameterOption.Name">@parameterOption.Name</option> | |||||
| } | |||||
| </select> | |||||
| </div> | |||||
| <div class="m-1"> | |||||
| <small>Prompt</small> | |||||
| <select id="Prompt" class="form-control form-select input-control" required="required" autocomplete="off"> | |||||
| <option value="" disabled selected hidden>Please Select</option> | |||||
| @foreach (var promptOption in Model.Options.Prompts) | |||||
| { | |||||
| <option value="@promptOption.Name" data-prompt="@promptOption.Prompt">@promptOption.Name</option> | |||||
| } | |||||
| </select> | |||||
| <textarea id="PromptText" class="form-control mt-1" rows="12" disabled="disabled" style="font-size:13px;resize:none"></textarea> | |||||
| </div> | |||||
| <div class="d-flex flex-grow-1"></div> | |||||
| <div id="session-details" class="m-1"></div> | |||||
| <div class="m-1"> | |||||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="load">Create Session</button> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-column h-100 w-75"> | |||||
| <div class="section-head"> | |||||
| </div> | |||||
| <div id="scroll-container" class="section-content border"> | |||||
| <div id="output-container" class="d-flex flex-column gap-1 p-1"> | |||||
| </div> | |||||
| </div> | |||||
| <div class="section-foot"> | |||||
| <div class="input-group mt-2"> | |||||
| <textarea id="input" type="text" class="form-control" value="what is a tree?" style="resize:none" rows="4">What is an apple?</textarea> | |||||
| <div class="d-flex flex-column"> | |||||
| <div class="d-flex flex-fill"> | |||||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="send" disabled="disabled" autocomplete="off">Send Message</button> | |||||
| </div> | |||||
| <div class="d-flex"> | |||||
| <button class="btn btn-outline-secondary w-100" type="button" id="cancel" autocomplete="off"> | |||||
| <i class="bi-x-circle"></i> | |||||
| </button> | |||||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="clear" disabled="disabled" autocomplete="off"> | |||||
| <i class="bi-trash3"></i> | |||||
| </button> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| @{ await Html.RenderPartialAsync("_ChatTemplates");} | |||||
| @section Scripts { | |||||
| <script src="~/js/sessionconnectionchat.js"></script> | |||||
| <script> | |||||
| createConnectionSessionChat(Enums.LLamaExecutorType.Interactive); | |||||
| </script> | |||||
| } | |||||
| @@ -1,34 +0,0 @@ | |||||
| using LLama.Web.Common; | |||||
| using LLama.Web.Models; | |||||
| using LLama.Web.Services; | |||||
| using Microsoft.AspNetCore.Mvc; | |||||
| using Microsoft.AspNetCore.Mvc.RazorPages; | |||||
| using Microsoft.Extensions.Options; | |||||
| namespace LLama.Web.Pages | |||||
| { | |||||
| public class InteractiveModel : PageModel | |||||
| { | |||||
| private readonly ILogger<InteractiveModel> _logger; | |||||
| private readonly ConnectionSessionService _modelSessionService; | |||||
| public InteractiveModel(ILogger<InteractiveModel> logger, IOptions<LLamaOptions> options, ConnectionSessionService modelSessionService) | |||||
| { | |||||
| _logger = logger; | |||||
| Options = options.Value; | |||||
| _modelSessionService = modelSessionService; | |||||
| } | |||||
| public LLamaOptions Options { get; set; } | |||||
| public void OnGet() | |||||
| { | |||||
| } | |||||
| public async Task<IActionResult> OnPostCancel(CancelModel model) | |||||
| { | |||||
| await _modelSessionService.CancelAsync(model.ConnectionId); | |||||
| return new JsonResult(default); | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -1,4 +0,0 @@ | |||||
| .section-content { | |||||
| flex: 1; | |||||
| overflow-y: scroll; | |||||
| } | |||||
| @@ -1,97 +0,0 @@ | |||||
| @page | |||||
| @model StatelessModel | |||||
| @{ | |||||
| } | |||||
| @Html.AntiForgeryToken() | |||||
| <div class="d-flex flex-row h-100 pt-1 pb-1"> | |||||
| <div class="d-flex flex-column h-100 border me-1 w-25 overflow-auto"> | |||||
| <div class="d-flex flex-row justify-content-between border-bottom p-1 align-items-center"> | |||||
| <h4>Stateless</h4> | |||||
| <div> | |||||
| <span>Hub: <b id="socket">Disconnected</b></span> | |||||
| </div> | |||||
| </div> | |||||
| <div class="m-1"> | |||||
| <small>Model</small> | |||||
| <select id="Model" class="form-control form-select input-control" required="required" autocomplete="off"> | |||||
| <option value="" disabled selected hidden>Please Select</option> | |||||
| @foreach (var modelOption in Model.Options.Models) | |||||
| { | |||||
| <option value="@modelOption.Name">@modelOption.Name</option> | |||||
| } | |||||
| </select> | |||||
| </div> | |||||
| <div class="m-1"> | |||||
| <small>Parameters</small> | |||||
| <select id="Parameter" class="form-control form-select input-control" required="required" autocomplete="off"> | |||||
| <option value="" disabled selected hidden>Please Select</option> | |||||
| @foreach (var parameterOption in Model.Options.Parameters) | |||||
| { | |||||
| <option value="@parameterOption.Name">@parameterOption.Name</option> | |||||
| } | |||||
| </select> | |||||
| </div> | |||||
| <div class="m-1"> | |||||
| <small>Prompt</small> | |||||
| <select id="Prompt" class="form-control form-select input-control" required="required" autocomplete="off"> | |||||
| <option value="" disabled selected hidden>Please Select</option> | |||||
| @foreach (var promptOption in Model.Options.Prompts) | |||||
| { | |||||
| <option value="@promptOption.Name" data-prompt="@promptOption.Prompt">@promptOption.Name</option> | |||||
| } | |||||
| </select> | |||||
| <textarea id="PromptText" class="form-control mt-1" rows="12" disabled="disabled" style="font-size:13px;resize:none"></textarea> | |||||
| </div> | |||||
| <div class="d-flex flex-grow-1"></div> | |||||
| <div id="session-details" class="m-1"></div> | |||||
| <div class="m-1"> | |||||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="load">Create Session</button> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-column h-100 w-75"> | |||||
| <div class="section-head"> | |||||
| </div> | |||||
| <div id="scroll-container" class="section-content border"> | |||||
| <div id="output-container" class="d-flex flex-column gap-1 p-1"> | |||||
| </div> | |||||
| </div> | |||||
| <div class="section-foot"> | |||||
| <div class="input-group mt-2"> | |||||
| <textarea id="input" type="text" class="form-control" value="what is a tree?" style="resize:none" rows="4">What is an apple?</textarea> | |||||
| <div class="d-flex flex-column"> | |||||
| <div class="d-flex flex-fill"> | |||||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="send" disabled="disabled" autocomplete="off">Send Message</button> | |||||
| </div> | |||||
| <div class="d-flex"> | |||||
| <button class="btn btn-outline-secondary w-100" type="button" id="cancel" autocomplete="off"> | |||||
| <i class="bi-x-circle"></i> | |||||
| </button> | |||||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="clear" disabled="disabled" autocomplete="off"> | |||||
| <i class="bi-trash3"></i> | |||||
| </button> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| @{ await Html.RenderPartialAsync("_ChatTemplates"); } | |||||
| @section Scripts { | |||||
| <script src="~/js/sessionconnectionchat.js"></script> | |||||
| <script> | |||||
| createConnectionSessionChat(Enums.LLamaExecutorType.Stateless); | |||||
| </script> | |||||
| } | |||||
| @@ -1,34 +0,0 @@ | |||||
| using LLama.Web.Common; | |||||
| using LLama.Web.Models; | |||||
| using LLama.Web.Services; | |||||
| using Microsoft.AspNetCore.Mvc; | |||||
| using Microsoft.AspNetCore.Mvc.RazorPages; | |||||
| using Microsoft.Extensions.Options; | |||||
| namespace LLama.Web.Pages | |||||
| { | |||||
| public class StatelessModel : PageModel | |||||
| { | |||||
| private readonly ILogger<StatelessModel> _logger; | |||||
| private readonly ConnectionSessionService _modelSessionService; | |||||
| public StatelessModel(ILogger<StatelessModel> logger, IOptions<LLamaOptions> options, ConnectionSessionService modelSessionService) | |||||
| { | |||||
| _logger = logger; | |||||
| Options = options.Value; | |||||
| _modelSessionService = modelSessionService; | |||||
| } | |||||
| public LLamaOptions Options { get; set; } | |||||
| public void OnGet() | |||||
| { | |||||
| } | |||||
| public async Task<IActionResult> OnPostCancel(CancelModel model) | |||||
| { | |||||
| await _modelSessionService.CancelAsync(model.ConnectionId); | |||||
| return new JsonResult(default); | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -1,4 +0,0 @@ | |||||
| .section-content { | |||||
| flex: 1; | |||||
| overflow-y: scroll; | |||||
| } | |||||
| @@ -1,10 +1,121 @@ | |||||
| @page | @page | ||||
| @using LLama.Web.Common; | |||||
| @model IndexModel | @model IndexModel | ||||
| @{ | @{ | ||||
| ViewData["Title"] = "Home page"; | |||||
| ViewData["Title"] = "Inference Demo"; | |||||
| } | } | ||||
| <div class="text-center"> | |||||
| <h1 class="display-4">Welcome</h1> | |||||
| <p>Learn about <a href="https://docs.microsoft.com/aspnet/core">building Web apps with ASP.NET Core</a>.</p> | |||||
| @Html.AntiForgeryToken() | |||||
| <div class="d-flex flex-row h-100 pt-1 pb-1"> | |||||
| <div class="d-flex flex-column h-100 border me-1 w-25"> | |||||
| <div class="d-flex flex-row justify-content-between border-bottom p-1 align-items-center"> | |||||
| <div> | |||||
| <span>@ViewData["Title"]</span> | |||||
| </div> | |||||
| <div> | |||||
| <small>Socket: <b id="socket">Disconnected</b></small> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-column overflow-auto"> | |||||
| <form id="SessionParameters"> | |||||
| <div class="d-flex flex-column m-1"> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>Model</small> | |||||
| @Html.DropDownListFor(m => m.SessionOptions.Model, new SelectList(Model.Options.Models, "Name", "Name"), new { @class = "form-control prompt-control" ,required="required", autocomplete="off"}) | |||||
| </div> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>Inference Type</small> | |||||
| @Html.DropDownListFor(m => m.SessionOptions.ExecutorType, Html.GetEnumSelectList<LLamaExecutorType>(), new { @class = "form-control prompt-control" ,required="required", autocomplete="off"}) | |||||
| </div> | |||||
| <nav> | |||||
| <div class="nav nav-tabs" id="nav-tab" role="tablist"> | |||||
| <button class="nav-link active w-50" id="nav-prompt-tab" data-bs-toggle="tab" data-bs-target="#nav-prompt" type="button" role="tab">Prompt</button> | |||||
| <button class="nav-link w-50" id="nav-params-tab" data-bs-toggle="tab" data-bs-target="#nav-params" type="button" role="tab">Parameters</button> | |||||
| </div> | |||||
| </nav> | |||||
| <div class="tab-content" id="nav-tabContent"> | |||||
| <div class="tab-pane fade show active" id="nav-prompt" role="tabpanel" aria-labelledby="nav-prompt-tab"> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>Prompt</small> | |||||
| @Html.TextAreaFor(m => Model.SessionOptions.Prompt, new { @type="text", @class = "form-control prompt-control", rows=8}) | |||||
| </div> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>AntiPrompts</small> | |||||
| @Html.TextBoxFor(m => Model.SessionOptions.AntiPrompt, new { @type="text", @class = "form-control prompt-control"}) | |||||
| </div> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>OutputFilter</small> | |||||
| @Html.TextBoxFor(m => Model.SessionOptions.OutputFilter, new { @type="text", @class = "form-control prompt-control"}) | |||||
| </div> | |||||
| </div> | |||||
| <div class="tab-pane fade" id="nav-params" role="tabpanel" aria-labelledby="nav-params-tab"> | |||||
| @{ | |||||
| await Html.RenderPartialAsync("_Parameters", Model.InferenceOptions); | |||||
| } | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| </form> | |||||
| </div> | |||||
| <div class="d-flex flex-grow-1"></div> | |||||
| <div id="session-details" class="m-1"></div> | |||||
| <div class="m-1"> | |||||
| <button class="btn btn-outline-success w-100" type="button" id="load"> | |||||
| <div class="d-flex align-items-center justify-content-center"> | |||||
| <img class="spinner me-2" style="display:none" src="~/image/loading.gif" width="20" /> | |||||
| Begin Session | |||||
| </div> | |||||
| </button> | |||||
| <button class="btn btn-outline-danger w-100" type="button" id="unload" style="display:none">End Session</button> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-column h-100 w-75"> | |||||
| <div class="section-head"> | |||||
| </div> | |||||
| <div id="scroll-container" class="section-content border"> | |||||
| <div id="output-container" class="d-flex flex-column gap-1 p-1"> | |||||
| </div> | |||||
| </div> | |||||
| <div class="section-foot"> | |||||
| <div class="input-group mt-2"> | |||||
| <textarea id="input" type="text" class="form-control" value="what is a tree?" style="resize:none" rows="4">What is an apple?</textarea> | |||||
| <div class="d-flex flex-column"> | |||||
| <div class="d-flex flex-fill"> | |||||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="send" disabled="disabled" autocomplete="off">Send Message</button> | |||||
| </div> | |||||
| <div class="d-flex"> | |||||
| <button class="btn btn-outline-secondary w-100" type="button" id="cancel" autocomplete="off"> | |||||
| <i class="bi-x-circle"></i> | |||||
| </button> | |||||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="clear" disabled="disabled" autocomplete="off"> | |||||
| <i class="bi-trash3"></i> | |||||
| </button> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| </div> | </div> | ||||
| @{ | |||||
| await Html.RenderPartialAsync("_ChatTemplates"); | |||||
| } | |||||
| @section Scripts { | |||||
| <script src="~/js/sessionconnectionchat.js"></script> | |||||
| <script> | |||||
| createConnectionSessionChat(); | |||||
| </script> | |||||
| } | |||||
| @@ -1,5 +1,7 @@ | |||||
| using Microsoft.AspNetCore.Mvc; | |||||
| using LLama.Web.Common; | |||||
| using Microsoft.AspNetCore.Mvc; | |||||
| using Microsoft.AspNetCore.Mvc.RazorPages; | using Microsoft.AspNetCore.Mvc.RazorPages; | ||||
| using Microsoft.Extensions.Options; | |||||
| namespace LLama.Web.Pages | namespace LLama.Web.Pages | ||||
| { | { | ||||
| @@ -7,14 +9,33 @@ namespace LLama.Web.Pages | |||||
| { | { | ||||
| private readonly ILogger<IndexModel> _logger; | private readonly ILogger<IndexModel> _logger; | ||||
| public IndexModel(ILogger<IndexModel> logger) | |||||
| public IndexModel(ILogger<IndexModel> logger, IOptions<LLamaOptions> options) | |||||
| { | { | ||||
| _logger = logger; | _logger = logger; | ||||
| Options = options.Value; | |||||
| } | } | ||||
| public LLamaOptions Options { get; set; } | |||||
| [BindProperty] | |||||
| public Common.SessionOptions SessionOptions { get; set; } | |||||
| [BindProperty] | |||||
| public InferenceOptions InferenceOptions { get; set; } | |||||
| public void OnGet() | public void OnGet() | ||||
| { | { | ||||
| SessionOptions = new Common.SessionOptions | |||||
| { | |||||
| Prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request.", | |||||
| AntiPrompt = "User:", | |||||
| // OutputFilter = "User:, Response:" | |||||
| }; | |||||
| InferenceOptions = new InferenceOptions | |||||
| { | |||||
| Temperature = 0.8f | |||||
| }; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -12,7 +12,7 @@ | |||||
| <img src="~/image/human.png" width="60"/> | <img src="~/image/human.png" width="60"/> | ||||
| </div> | </div> | ||||
| <div class="d-flex flex-column flex-fill justify-content-between"> | <div class="d-flex flex-column flex-fill justify-content-between"> | ||||
| <span class="w-100" style="resize:none" >{{text}}</span> | |||||
| <span class="content" style="resize:none" >{{text}}</span> | |||||
| <div class="d-flex justify-content-end"> | <div class="d-flex justify-content-end"> | ||||
| <i>{{date}}</i> | <i>{{date}}</i> | ||||
| </div> | </div> | ||||
| @@ -26,9 +26,7 @@ | |||||
| <img src="~/image/robot.png" width="60"/> | <img src="~/image/robot.png" width="60"/> | ||||
| </div> | </div> | ||||
| <div id="{{id}}" class="d-flex flex-column flex-fill justify-content-between"> | <div id="{{id}}" class="d-flex flex-column flex-fill justify-content-between"> | ||||
| <span class="content"> | |||||
| <img src="~/image/loading.gif" width="30" /> | |||||
| </span> | |||||
| <span class="content"><img src="~/image/loading.gif" width="30" /></span> | |||||
| <div class="d-flex justify-content-end"> | <div class="d-flex justify-content-end"> | ||||
| <div class="d-flex flex-column align-items-end"> | <div class="d-flex flex-column align-items-end"> | ||||
| <i class="date"></i> | <i class="date"></i> | ||||
| @@ -41,20 +39,6 @@ | |||||
| </div> | </div> | ||||
| </script> | </script> | ||||
| <script id="sessionDetailsTemplate" type="text/html"> | |||||
| <div> | |||||
| <small>Session Details </small> | |||||
| </div> | |||||
| <div> | |||||
| <i>Model: </i> | |||||
| <span>{{model}}</span> | |||||
| </div> | |||||
| <div> | |||||
| <i>Prompt: </i> | |||||
| <span>{{prompt}}</span> | |||||
| </div> | |||||
| <div> | |||||
| <i>Parameters: </i> | |||||
| <span>{{parameter}}</span> | |||||
| </div> | |||||
| <script id="signatureTemplate" type="text/html"> | |||||
| <span>{{content}}</span> | |||||
| </script> | </script> | ||||
| @@ -3,7 +3,7 @@ | |||||
| <head> | <head> | ||||
| <meta charset="utf-8" /> | <meta charset="utf-8" /> | ||||
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /> | <meta name="viewport" content="width=device-width, initial-scale=1.0" /> | ||||
| <title>@ViewData["Title"] - LLama.Web</title> | |||||
| <title>@ViewData["Title"] - LLamaSharp.Web</title> | |||||
| <link rel="stylesheet" href="~/lib/bootstrap/dist/css/bootstrap.min.css" /> | <link rel="stylesheet" href="~/lib/bootstrap/dist/css/bootstrap.min.css" /> | ||||
| <link href="~/lib/bootstrap/dist/css/bootstrap-icons.css" rel="stylesheet" /> | <link href="~/lib/bootstrap/dist/css/bootstrap-icons.css" rel="stylesheet" /> | ||||
| <link rel="stylesheet" href="~/css/site.css" asp-append-version="true" /> | <link rel="stylesheet" href="~/css/site.css" asp-append-version="true" /> | ||||
| @@ -13,24 +13,26 @@ | |||||
| <header> | <header> | ||||
| <nav class="navbar navbar-expand-sm navbar-toggleable-sm navbar-light bg-white border-bottom box-shadow "> | <nav class="navbar navbar-expand-sm navbar-toggleable-sm navbar-light bg-white border-bottom box-shadow "> | ||||
| <div class="container"> | <div class="container"> | ||||
| <a class="navbar-brand" asp-area="" asp-page="/Index">LLama.Web</a> | |||||
| <a class="navbar-brand" asp-area="" asp-page="/Index">LLamaSharp.Web</a> | |||||
| <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target=".navbar-collapse" aria-controls="navbarSupportedContent" | <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target=".navbar-collapse" aria-controls="navbarSupportedContent" | ||||
| aria-expanded="false" aria-label="Toggle navigation"> | aria-expanded="false" aria-label="Toggle navigation"> | ||||
| <span class="navbar-toggler-icon"></span> | <span class="navbar-toggler-icon"></span> | ||||
| </button> | </button> | ||||
| <div class="navbar-collapse collapse d-sm-inline-flex justify-content-between"> | <div class="navbar-collapse collapse d-sm-inline-flex justify-content-between"> | ||||
| <ul class="navbar-nav flex-grow-1"> | |||||
| <ul class="navbar-nav flex-grow-1 justify-content-between"> | |||||
| <li class="nav-item"> | <li class="nav-item"> | ||||
| <a class="nav-link text-dark" asp-area="" asp-page="/Index">Home</a> | |||||
| <a class="nav-link text-dark" asp-page="/Index"></a> | |||||
| </li> | </li> | ||||
| <li class="nav-item"> | <li class="nav-item"> | ||||
| <a class="nav-link text-dark" asp-area="" asp-page="/Executor/Interactive">Interactive</a> | |||||
| </li> | |||||
| <li class="nav-item"> | |||||
| <a class="nav-link text-dark" asp-area="" asp-page="/Executor/Instruct">Instruct</a> | |||||
| </li> | |||||
| <li class="nav-item"> | |||||
| <a class="nav-link text-dark" asp-area="" asp-page="/Executor/Stateless">Stateless</a> | |||||
| <a class="nav-link text-dark" href="https://github.com/SciSharp/LLamaSharp" target="_blank"> | |||||
| <div class="d-flex flex-row align-items-center"> | |||||
| <h5 class="mb-0"> | |||||
| <i class="bi bi-github"></i> | |||||
| <span>LLamaSharp</span> | |||||
| </h5> | |||||
| </div> | |||||
| </a> | |||||
| </li> | </li> | ||||
| </ul> | </ul> | ||||
| </div> | </div> | ||||
| @@ -38,14 +40,14 @@ | |||||
| </nav> | </nav> | ||||
| </header> | </header> | ||||
| <main class="container" role="main" > | |||||
| @RenderBody() | |||||
| </main> | |||||
| <main class="container" role="main"> | |||||
| @RenderBody() | |||||
| </main> | |||||
| <footer class="border-top footer text-muted"> | <footer class="border-top footer text-muted"> | ||||
| <div class="container"> | <div class="container"> | ||||
| © 2023 - LLama.Web | |||||
| © 2023 - LLamaSharp.Web | |||||
| </div> | </div> | ||||
| </footer> | </footer> | ||||
| @@ -0,0 +1,137 @@ | |||||
| @page | |||||
| @using LLama.Common; | |||||
| @model LLama.Abstractions.IInferenceParams | |||||
| } | |||||
| <div class="d-flex flex-row gap-3"> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>MaxTokens</small> | |||||
| <div class="d-flex flex-row slider-container"> | |||||
| @Html.TextBoxFor(m => m.MaxTokens, new { @type="range", @class = "slider", min="-1", max="2048", step="1" }) | |||||
| <label>0</label> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>TokensKeep</small> | |||||
| <div class="d-flex flex-row slider-container"> | |||||
| @Html.TextBoxFor(m => m.TokensKeep, new { @type="range", @class = "slider", min="0", max="2048", step="1" }) | |||||
| <label>0</label> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-row gap-3"> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>TopK</small> | |||||
| <div class="d-flex flex-row slider-container"> | |||||
| @Html.TextBoxFor(m => m.TopK, new { @type="range", @class = "slider", min="-1", max="100", step="1" }) | |||||
| <label>0</label> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>TopP</small> | |||||
| <div class="d-flex flex-row slider-container"> | |||||
| @Html.TextBoxFor(m => m.TopP, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" }) | |||||
| <label>0</label> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-row gap-3"> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>TypicalP</small> | |||||
| <div class="d-flex flex-row slider-container"> | |||||
| @Html.TextBoxFor(m => m.TypicalP, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" }) | |||||
| <label>0</label> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>Temperature</small> | |||||
| <div class="d-flex flex-row slider-container"> | |||||
| @Html.TextBoxFor(m => m.Temperature, new { @type="range", @class = "slider", min="0.0", max="1.5", step="0.01" }) | |||||
| <label>0</label> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-row gap-3"> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>RepeatPenalty</small> | |||||
| <div class="d-flex flex-row slider-container"> | |||||
| @Html.TextBoxFor(m => m.RepeatPenalty, new { @type="range", @class = "slider", min="0.0", max="2.0", step="0.01" }) | |||||
| <label>0</label> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>RepeatLastTokensCount</small> | |||||
| <div class="d-flex flex-row slider-container"> | |||||
| @Html.TextBoxFor(m => m.RepeatLastTokensCount, new { @type="range", @class = "slider", min="0", max="2048", step="1" }) | |||||
| <label>0</label> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-row gap-3"> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>FrequencyPenalty</small> | |||||
| <div class="d-flex flex-row slider-container"> | |||||
| @Html.TextBoxFor(m => m.FrequencyPenalty, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" }) | |||||
| <label>0</label> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>PresencePenalty</small> | |||||
| <div class="d-flex flex-row slider-container"> | |||||
| @Html.TextBoxFor(m => m.PresencePenalty, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" }) | |||||
| <label>0</label> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-row gap-3"> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>TfsZ</small> | |||||
| <div class="d-flex flex-row slider-container"> | |||||
| @Html.TextBoxFor(m => m.TfsZ, new { @type="range", @class = "slider",min="0.0", max="1.0", step="0.01" }) | |||||
| <label>0</label> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>-</small> | |||||
| <div class="d-flex flex-row slider-container"> | |||||
| <input class="slider" type="range" value="0" disabled /> | |||||
| <label></label> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>Sampler Type</small> | |||||
| @Html.DropDownListFor(m => m.Mirostat, Html.GetEnumSelectList<MirostatType>(), new { @class = "form-control form-select" }) | |||||
| </div> | |||||
| <div class="d-flex flex-row gap-3"> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>MirostatTau</small> | |||||
| <div class="d-flex flex-row slider-container"> | |||||
| @Html.TextBoxFor(m => m.MirostatTau, new { @type="range", @class = "slider", min="0.0", max="10.0", step="0.01" }) | |||||
| <label>0</label> | |||||
| </div> | |||||
| </div> | |||||
| <div class="d-flex flex-column mb-2"> | |||||
| <small>MirostatEta</small> | |||||
| <div class="d-flex flex-row slider-container"> | |||||
| @Html.TextBoxFor(m => m.MirostatEta, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" }) | |||||
| <label>0.0</label> | |||||
| </div> | |||||
| </div> | |||||
| </div> | |||||
| @@ -1,6 +1,7 @@ | |||||
| using LLama.Web.Common; | using LLama.Web.Common; | ||||
| using LLama.Web.Hubs; | using LLama.Web.Hubs; | ||||
| using LLama.Web.Services; | using LLama.Web.Services; | ||||
| using Microsoft.Extensions.DependencyInjection; | |||||
| namespace LLama.Web | namespace LLama.Web | ||||
| { | { | ||||
| @@ -20,7 +21,9 @@ namespace LLama.Web | |||||
| .BindConfiguration(nameof(LLamaOptions)); | .BindConfiguration(nameof(LLamaOptions)); | ||||
| // Services DI | // Services DI | ||||
| builder.Services.AddSingleton<ConnectionSessionService>(); | |||||
| builder.Services.AddHostedService<ModelLoaderService>(); | |||||
| builder.Services.AddSingleton<IModelService, ModelService>(); | |||||
| builder.Services.AddSingleton<IModelSessionService, ModelSessionService>(); | |||||
| var app = builder.Build(); | var app = builder.Build(); | ||||
| @@ -1,94 +0,0 @@ | |||||
| using LLama.Abstractions; | |||||
| using LLama.Web.Common; | |||||
| using LLama.Web.Models; | |||||
| using Microsoft.Extensions.Options; | |||||
| using System.Collections.Concurrent; | |||||
| using System.Drawing; | |||||
| namespace LLama.Web.Services | |||||
| { | |||||
| /// <summary> | |||||
| /// Example Service for handling a model session for a websockets connection lifetime | |||||
| /// Each websocket connection will create its own unique session and context allowing you to use multiple tabs to compare prompts etc | |||||
| /// </summary> | |||||
| public class ConnectionSessionService : IModelSessionService | |||||
| { | |||||
| private readonly LLamaOptions _options; | |||||
| private readonly ILogger<ConnectionSessionService> _logger; | |||||
| private readonly ConcurrentDictionary<string, ModelSession> _modelSessions; | |||||
| public ConnectionSessionService(ILogger<ConnectionSessionService> logger, IOptions<LLamaOptions> options) | |||||
| { | |||||
| _logger = logger; | |||||
| _options = options.Value; | |||||
| _modelSessions = new ConcurrentDictionary<string, ModelSession>(); | |||||
| } | |||||
| public Task<ModelSession> GetAsync(string connectionId) | |||||
| { | |||||
| _modelSessions.TryGetValue(connectionId, out var modelSession); | |||||
| return Task.FromResult(modelSession); | |||||
| } | |||||
| public Task<IServiceResult<ModelSession>> CreateAsync(LLamaExecutorType executorType, string connectionId, string modelName, string promptName, string parameterName) | |||||
| { | |||||
| var modelOption = _options.Models.FirstOrDefault(x => x.Name == modelName); | |||||
| if (modelOption is null) | |||||
| return Task.FromResult(ServiceResult.FromError<ModelSession>($"Model option '{modelName}' not found")); | |||||
| var promptOption = _options.Prompts.FirstOrDefault(x => x.Name == promptName); | |||||
| if (promptOption is null) | |||||
| return Task.FromResult(ServiceResult.FromError<ModelSession>($"Prompt option '{promptName}' not found")); | |||||
| var parameterOption = _options.Parameters.FirstOrDefault(x => x.Name == parameterName); | |||||
| if (parameterOption is null) | |||||
| return Task.FromResult(ServiceResult.FromError<ModelSession>($"Parameter option '{parameterName}' not found")); | |||||
| //Max instance | |||||
| var currentInstances = _modelSessions.Count(x => x.Value.ModelName == modelOption.Name); | |||||
| if (modelOption.MaxInstances > -1 && currentInstances >= modelOption.MaxInstances) | |||||
| return Task.FromResult(ServiceResult.FromError<ModelSession>("Maximum model instances reached")); | |||||
| // Create model | |||||
| var llamaModel = new LLamaContext(modelOption); | |||||
| // Create executor | |||||
| ILLamaExecutor executor = executorType switch | |||||
| { | |||||
| LLamaExecutorType.Interactive => new InteractiveExecutor(llamaModel), | |||||
| LLamaExecutorType.Instruct => new InstructExecutor(llamaModel), | |||||
| LLamaExecutorType.Stateless => new StatelessExecutor(llamaModel), | |||||
| _ => default | |||||
| }; | |||||
| // Create session | |||||
| var modelSession = new ModelSession(executor, modelOption, promptOption, parameterOption); | |||||
| if (!_modelSessions.TryAdd(connectionId, modelSession)) | |||||
| return Task.FromResult(ServiceResult.FromError<ModelSession>("Failed to create model session")); | |||||
| return Task.FromResult(ServiceResult.FromValue(modelSession)); | |||||
| } | |||||
| public Task<bool> RemoveAsync(string connectionId) | |||||
| { | |||||
| if (_modelSessions.TryRemove(connectionId, out var modelSession)) | |||||
| { | |||||
| modelSession.CancelInfer(); | |||||
| modelSession.Dispose(); | |||||
| return Task.FromResult(true); | |||||
| } | |||||
| return Task.FromResult(false); | |||||
| } | |||||
| public Task<bool> CancelAsync(string connectionId) | |||||
| { | |||||
| if (_modelSessions.TryGetValue(connectionId, out var modelSession)) | |||||
| { | |||||
| modelSession.CancelInfer(); | |||||
| return Task.FromResult(true); | |||||
| } | |||||
| return Task.FromResult(false); | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -1,4 +1,5 @@ | |||||
| using LLama.Web.Common; | using LLama.Web.Common; | ||||
| using LLama.Web.Models; | |||||
| namespace LLama.Web.Services | namespace LLama.Web.Services | ||||
| { | { | ||||
| @@ -1,16 +1,88 @@ | |||||
| using LLama.Abstractions; | |||||
| using LLama.Web.Common; | |||||
| using LLama.Web.Common; | |||||
| using LLama.Web.Models; | using LLama.Web.Models; | ||||
| namespace LLama.Web.Services | namespace LLama.Web.Services | ||||
| { | { | ||||
| public interface IModelSessionService | public interface IModelSessionService | ||||
| { | { | ||||
| /// <summary> | |||||
| /// Gets the ModelSession with the specified Id. | |||||
| /// </summary> | |||||
| /// <param name="sessionId">The session identifier.</param> | |||||
| /// <returns>The ModelSession if exists, otherwise null</returns> | |||||
| Task<ModelSession> GetAsync(string sessionId); | Task<ModelSession> GetAsync(string sessionId); | ||||
| Task<IServiceResult<ModelSession>> CreateAsync(LLamaExecutorType executorType, string sessionId, string modelName, string promptName, string parameterName); | |||||
| Task<bool> RemoveAsync(string sessionId); | |||||
| Task<bool> CancelAsync(string sessionId); | |||||
| } | |||||
| /// <summary> | |||||
| /// Gets all ModelSessions | |||||
| /// </summary> | |||||
| /// <returns>A collection oa all Model instances</returns> | |||||
| Task<IEnumerable<ModelSession>> GetAllAsync(); | |||||
| /// <summary> | |||||
| /// Creates a new ModelSession | |||||
| /// </summary> | |||||
| /// <param name="sessionId">The session identifier.</param> | |||||
| /// <param name="sessionOptions">The session configuration.</param> | |||||
| /// <param name="inferenceOptions">The default inference configuration, will be used for all inference where no infer configuration is supplied.</param> | |||||
| /// <param name="cancellationToken">The cancellation token.</param> | |||||
| /// <returns></returns> | |||||
| /// <exception cref="System.Exception"> | |||||
| /// Session with id {sessionId} already exists | |||||
| /// or | |||||
| /// Failed to create model session | |||||
| /// </exception> | |||||
| Task<ModelSession> CreateAsync(string sessionId, Common.SessionOptions sessionOptions, InferenceOptions inferenceOptions = null, CancellationToken cancellationToken = default); | |||||
| /// <summary> | |||||
| /// Closes the session | |||||
| /// </summary> | |||||
| /// <param name="sessionId">The session identifier.</param> | |||||
| /// <returns></returns> | |||||
| Task<bool> CloseAsync(string sessionId); | |||||
| /// <summary> | |||||
| /// Runs inference on the current ModelSession | |||||
| /// </summary> | |||||
| /// <param name="sessionId">The session identifier.</param> | |||||
| /// <param name="prompt">The prompt.</param> | |||||
| /// <param name="inferenceConfig">The inference configuration, if null session default is used</param> | |||||
| /// <param name="cancellationToken">The cancellation token.</param> | |||||
| /// <exception cref="System.Exception">Inference is already running for this session</exception> | |||||
| IAsyncEnumerable<TokenModel> InferAsync(string sessionId, string prompt, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default); | |||||
| /// <summary> | |||||
| /// Runs inference on the current ModelSession | |||||
| /// </summary> | |||||
| /// <param name="sessionId">The session identifier.</param> | |||||
| /// <param name="prompt">The prompt.</param> | |||||
| /// <param name="inferenceOptions">The inference configuration, if null session default is used</param> | |||||
| /// <param name="cancellationToken">The cancellation token.</param> | |||||
| /// <returns>Streaming async result of <see cref="System.String" /></returns> | |||||
| /// <exception cref="System.Exception">Inference is already running for this session</exception> | |||||
| IAsyncEnumerable<string> InferTextAsync(string sessionId, string prompt, InferenceOptions inferenceOptions = null, CancellationToken cancellationToken = default); | |||||
| /// <summary> | |||||
| /// Queues inference on the current ModelSession | |||||
| /// </summary> | |||||
| /// <param name="sessionId">The session identifier.</param> | |||||
| /// <param name="prompt">The prompt.</param> | |||||
| /// <param name="inferenceOptions">The inference configuration, if null session default is used</param> | |||||
| /// <param name="cancellationToken">The cancellation token.</param> | |||||
| /// <returns>Completed inference result as string</returns> | |||||
| /// <exception cref="System.Exception">Inference is already running for this session</exception> | |||||
| Task<string> InferTextCompleteAsync(string sessionId, string prompt, InferenceOptions inferenceOptions = null, CancellationToken cancellationToken = default); | |||||
| /// <summary> | |||||
| /// Cancels the current inference action. | |||||
| /// </summary> | |||||
| /// <param name="sessionId">The session identifier.</param> | |||||
| /// <returns></returns> | |||||
| Task<bool> CancelAsync(string sessionId); | |||||
| } | |||||
| } | } | ||||
| @@ -0,0 +1,42 @@ | |||||
| namespace LLama.Web.Services | |||||
| { | |||||
| /// <summary> | |||||
| /// Service for managing loading/preloading of models at app startup | |||||
| /// </summary> | |||||
| /// <typeparam name="T">Type used to identify contexts</typeparam> | |||||
| /// <seealso cref="Microsoft.Extensions.Hosting.IHostedService" /> | |||||
| public class ModelLoaderService : IHostedService | |||||
| { | |||||
| private readonly IModelService _modelService; | |||||
| /// <summary> | |||||
| /// Initializes a new instance of the <see cref="ModelLoaderService"/> class. | |||||
| /// </summary> | |||||
| /// <param name="modelService">The model service.</param> | |||||
| public ModelLoaderService(IModelService modelService) | |||||
| { | |||||
| _modelService = modelService; | |||||
| } | |||||
| /// <summary> | |||||
| /// Triggered when the application host is ready to start the service. | |||||
| /// </summary> | |||||
| /// <param name="cancellationToken">Indicates that the start process has been aborted.</param> | |||||
| public async Task StartAsync(CancellationToken cancellationToken) | |||||
| { | |||||
| await _modelService.LoadModels(); | |||||
| } | |||||
| /// <summary> | |||||
| /// Triggered when the application host is performing a graceful shutdown. | |||||
| /// </summary> | |||||
| /// <param name="cancellationToken">Indicates that the shutdown process should no longer be graceful.</param> | |||||
| public async Task StopAsync(CancellationToken cancellationToken) | |||||
| { | |||||
| await _modelService.UnloadModels(); | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -1,5 +1,6 @@ | |||||
| using LLama.Web.Async; | using LLama.Web.Async; | ||||
| using LLama.Web.Common; | using LLama.Web.Common; | ||||
| using LLama.Web.Models; | |||||
| using System.Collections.Concurrent; | using System.Collections.Concurrent; | ||||
| namespace LLama.Web.Services | namespace LLama.Web.Services | ||||
| @@ -0,0 +1,216 @@ | |||||
| using LLama.Web.Async; | |||||
| using LLama.Web.Common; | |||||
| using LLama.Web.Models; | |||||
| using System.Collections.Concurrent; | |||||
| using System.Diagnostics; | |||||
| using System.Runtime.CompilerServices; | |||||
| namespace LLama.Web.Services | |||||
| { | |||||
| /// <summary> | |||||
| /// Example Service for handling a model session for a websockets connection lifetime | |||||
| /// Each websocket connection will create its own unique session and context allowing you to use multiple tabs to compare prompts etc | |||||
| /// </summary> | |||||
| public class ModelSessionService : IModelSessionService | |||||
| { | |||||
| private readonly AsyncGuard<string> _sessionGuard; | |||||
| private readonly IModelService _modelService; | |||||
| private readonly ConcurrentDictionary<string, ModelSession> _modelSessions; | |||||
| /// <summary> | |||||
| /// Initializes a new instance of the <see cref="ModelSessionService{T}"/> class. | |||||
| /// </summary> | |||||
| /// <param name="modelService">The model service.</param> | |||||
| /// <param name="modelSessionStateService">The model session state service.</param> | |||||
| public ModelSessionService(IModelService modelService) | |||||
| { | |||||
| _modelService = modelService; | |||||
| _sessionGuard = new AsyncGuard<string>(); | |||||
| _modelSessions = new ConcurrentDictionary<string, ModelSession>(); | |||||
| } | |||||
| /// <summary> | |||||
| /// Gets the ModelSession with the specified Id. | |||||
| /// </summary> | |||||
| /// <param name="sessionId">The session identifier.</param> | |||||
| /// <returns>The ModelSession if exists, otherwise null</returns> | |||||
| public Task<ModelSession> GetAsync(string sessionId) | |||||
| { | |||||
| return Task.FromResult(_modelSessions.TryGetValue(sessionId, out var session) ? session : null); | |||||
| } | |||||
| /// <summary> | |||||
| /// Gets all ModelSessions | |||||
| /// </summary> | |||||
| /// <returns>A collection oa all Model instances</returns> | |||||
| public Task<IEnumerable<ModelSession>> GetAllAsync() | |||||
| { | |||||
| return Task.FromResult<IEnumerable<ModelSession>>(_modelSessions.Values); | |||||
| } | |||||
| /// <summary> | |||||
| /// Creates a new ModelSession | |||||
| /// </summary> | |||||
| /// <param name="sessionId">The session identifier.</param> | |||||
| /// <param name="sessionConfig">The session configuration.</param> | |||||
| /// <param name="inferenceConfig">The default inference configuration, will be used for all inference where no infer configuration is supplied.</param> | |||||
| /// <param name="cancellationToken">The cancellation token.</param> | |||||
| /// <returns></returns> | |||||
| /// <exception cref="System.Exception"> | |||||
| /// Session with id {sessionId} already exists | |||||
| /// or | |||||
| /// Failed to create model session | |||||
| /// </exception> | |||||
| public async Task<ModelSession> CreateAsync(string sessionId, Common.SessionOptions sessionConfig, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default) | |||||
| { | |||||
| if (_modelSessions.TryGetValue(sessionId, out _)) | |||||
| throw new Exception($"Session with id {sessionId} already exists"); | |||||
| // Create context | |||||
| var (model, context) = await _modelService.GetOrCreateModelAndContext(sessionConfig.Model, sessionId); | |||||
| // Create session | |||||
| var modelSession = new ModelSession(model, context, sessionId, sessionConfig, inferenceConfig); | |||||
| if (!_modelSessions.TryAdd(sessionId, modelSession)) | |||||
| throw new Exception($"Failed to create model session"); | |||||
| // Run initial Prompt | |||||
| await modelSession.InitializePrompt(inferenceConfig, cancellationToken); | |||||
| return modelSession; | |||||
| } | |||||
| /// <summary> | |||||
| /// Closes the session | |||||
| /// </summary> | |||||
| /// <param name="sessionId">The session identifier.</param> | |||||
| /// <returns></returns> | |||||
| public async Task<bool> CloseAsync(string sessionId) | |||||
| { | |||||
| if (_modelSessions.TryRemove(sessionId, out var modelSession)) | |||||
| { | |||||
| modelSession.CancelInfer(); | |||||
| return await _modelService.RemoveContext(modelSession.ModelName, sessionId); | |||||
| } | |||||
| return false; | |||||
| } | |||||
| /// <summary> | |||||
| /// Runs inference on the current ModelSession | |||||
| /// </summary> | |||||
| /// <param name="sessionId">The session identifier.</param> | |||||
| /// <param name="prompt">The prompt.</param> | |||||
| /// <param name="inferenceConfig">The inference configuration, if null session default is used</param> | |||||
| /// <param name="cancellationToken">The cancellation token.</param> | |||||
| /// <exception cref="System.Exception">Inference is already running for this session</exception> | |||||
| public async IAsyncEnumerable<TokenModel> InferAsync(string sessionId, string prompt, InferenceOptions inferenceConfig = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) | |||||
| { | |||||
| if (!_sessionGuard.Guard(sessionId)) | |||||
| throw new Exception($"Inference is already running for this session"); | |||||
| try | |||||
| { | |||||
| if (!_modelSessions.TryGetValue(sessionId, out var modelSession)) | |||||
| yield break; | |||||
| // Send begin of response | |||||
| var stopwatch = Stopwatch.GetTimestamp(); | |||||
| yield return new TokenModel(default, default, TokenType.Begin); | |||||
| // Send content of response | |||||
| await foreach (var token in modelSession.InferAsync(prompt, inferenceConfig, cancellationToken).ConfigureAwait(false)) | |||||
| { | |||||
| yield return new TokenModel(default, token); | |||||
| } | |||||
| // Send end of response | |||||
| var elapsedTime = GetElapsed(stopwatch); | |||||
| var endTokenType = modelSession.IsInferCanceled() ? TokenType.Cancel : TokenType.End; | |||||
| var signature = endTokenType == TokenType.Cancel | |||||
| ? $"Inference cancelled after {elapsedTime / 1000:F0} seconds" | |||||
| : $"Inference completed in {elapsedTime / 1000:F0} seconds"; | |||||
| yield return new TokenModel(default, signature, endTokenType); | |||||
| } | |||||
| finally | |||||
| { | |||||
| _sessionGuard.Release(sessionId); | |||||
| } | |||||
| } | |||||
| /// <summary> | |||||
| /// Runs inference on the current ModelSession | |||||
| /// </summary> | |||||
| /// <param name="sessionId">The session identifier.</param> | |||||
| /// <param name="prompt">The prompt.</param> | |||||
| /// <param name="inferenceConfig">The inference configuration, if null session default is used</param> | |||||
| /// <param name="cancellationToken">The cancellation token.</param> | |||||
| /// <returns>Streaming async result of <see cref="System.String" /></returns> | |||||
| /// <exception cref="System.Exception">Inference is already running for this session</exception> | |||||
| public IAsyncEnumerable<string> InferTextAsync(string sessionId, string prompt, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default) | |||||
| { | |||||
| async IAsyncEnumerable<string> InferTextInternal() | |||||
| { | |||||
| await foreach (var token in InferAsync(sessionId, prompt, inferenceConfig, cancellationToken).ConfigureAwait(false)) | |||||
| { | |||||
| if (token.TokenType == TokenType.Content) | |||||
| yield return token.Content; | |||||
| } | |||||
| } | |||||
| return InferTextInternal(); | |||||
| } | |||||
| /// <summary> | |||||
| /// Runs inference on the current ModelSession | |||||
| /// </summary> | |||||
| /// <param name="sessionId">The session identifier.</param> | |||||
| /// <param name="prompt">The prompt.</param> | |||||
| /// <param name="inferenceConfig">The inference configuration, if null session default is used</param> | |||||
| /// <param name="cancellationToken">The cancellation token.</param> | |||||
| /// <returns>Completed inference result as string</returns> | |||||
| /// <exception cref="System.Exception">Inference is already running for this session</exception> | |||||
| public async Task<string> InferTextCompleteAsync(string sessionId, string prompt, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default) | |||||
| { | |||||
| var inferResult = await InferAsync(sessionId, prompt, inferenceConfig, cancellationToken) | |||||
| .Where(x => x.TokenType == TokenType.Content) | |||||
| .Select(x => x.Content) | |||||
| .ToListAsync(cancellationToken: cancellationToken); | |||||
| return string.Concat(inferResult); | |||||
| } | |||||
| /// <summary> | |||||
| /// Cancels the current inference action. | |||||
| /// </summary> | |||||
| /// <param name="sessionId">The session identifier.</param> | |||||
| /// <returns></returns> | |||||
| public Task<bool> CancelAsync(string sessionId) | |||||
| { | |||||
| if (_modelSessions.TryGetValue(sessionId, out var modelSession)) | |||||
| { | |||||
| modelSession.CancelInfer(); | |||||
| return Task.FromResult(true); | |||||
| } | |||||
| return Task.FromResult(false); | |||||
| } | |||||
| /// <summary> | |||||
| /// Gets the elapsed time in milliseconds. | |||||
| /// </summary> | |||||
| /// <param name="timestamp">The timestamp.</param> | |||||
| /// <returns></returns> | |||||
| private static int GetElapsed(long timestamp) | |||||
| { | |||||
| return (int)Stopwatch.GetElapsedTime(timestamp).TotalMilliseconds; | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -7,48 +7,34 @@ | |||||
| }, | }, | ||||
| "AllowedHosts": "*", | "AllowedHosts": "*", | ||||
| "LLamaOptions": { | "LLamaOptions": { | ||||
| "ModelLoadType": "Single", | |||||
| "Models": [ | "Models": [ | ||||
| { | { | ||||
| "Name": "WizardLM-7B", | "Name": "WizardLM-7B", | ||||
| "MaxInstances": 2, | |||||
| "MaxInstances": 20, | |||||
| "ModelPath": "D:\\Repositories\\AI\\Models\\wizardLM-7B.ggmlv3.q4_0.bin", | "ModelPath": "D:\\Repositories\\AI\\Models\\wizardLM-7B.ggmlv3.q4_0.bin", | ||||
| "ContextSize": 2048 | |||||
| } | |||||
| ], | |||||
| "Parameters": [ | |||||
| { | |||||
| "Name": "Default", | |||||
| "Temperature": 0.6 | |||||
| } | |||||
| ], | |||||
| "Prompts": [ | |||||
| { | |||||
| "Name": "None", | |||||
| "Prompt": "" | |||||
| }, | |||||
| { | |||||
| "Name": "Alpaca", | |||||
| "Path": "D:\\Repositories\\AI\\Prompts\\alpaca.txt", | |||||
| "AntiPrompt": [ | |||||
| "User:" | |||||
| ], | |||||
| "OutputFilter": [ | |||||
| "Response:", | |||||
| "User:" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "Name": "ChatWithBob", | |||||
| "Path": "D:\\Repositories\\AI\\Prompts\\chat-with-bob.txt", | |||||
| "AntiPrompt": [ | |||||
| "User:" | |||||
| ], | |||||
| "OutputFilter": [ | |||||
| "Bob:", | |||||
| "User:" | |||||
| ] | |||||
| "ContextSize": 2048, | |||||
| "BatchSize": 2048, | |||||
| "Threads": 4, | |||||
| "GpuLayerCount": 6, | |||||
| "UseMemorymap": true, | |||||
| "UseMemoryLock": false, | |||||
| "MainGpu": 0, | |||||
| "LowVram": false, | |||||
| "Seed": 1686349486, | |||||
| "UseFp16Memory": true, | |||||
| "Perplexity": false, | |||||
| "LoraAdapter": "", | |||||
| "LoraBase": "", | |||||
| "EmbeddingMode": false, | |||||
| "TensorSplits": null, | |||||
| "GroupedQueryAttention": 1, | |||||
| "RmsNormEpsilon": 0.000005, | |||||
| "RopeFrequencyBase": 10000.0, | |||||
| "RopeFrequencyScale": 1.0, | |||||
| "MulMatQ": false, | |||||
| "Encoding": "UTF-8" | |||||
| } | } | ||||
| ] | ] | ||||
| } | } | ||||
| } | } | ||||
| @@ -22,13 +22,30 @@ footer { | |||||
| @media (min-width: 768px) { | @media (min-width: 768px) { | ||||
| html { | |||||
| font-size: 16px; | |||||
| } | |||||
| html { | |||||
| font-size: 16px; | |||||
| } | |||||
| } | } | ||||
| .btn:focus, .btn:active:focus, .btn-link.nav-link:focus, .form-control:focus, .form-check-input:focus { | .btn:focus, .btn:active:focus, .btn-link.nav-link:focus, .form-control:focus, .form-check-input:focus { | ||||
| box-shadow: 0 0 0 0.1rem white, 0 0 0 0.25rem #258cfb; | |||||
| box-shadow: 0 0 0 0.1rem white, 0 0 0 0.25rem #258cfb; | |||||
| } | |||||
| #scroll-container { | |||||
| flex: 1; | |||||
| overflow-y: scroll; | |||||
| } | |||||
| #output-container .content { | |||||
| white-space: break-spaces; | |||||
| } | } | ||||
| .slider-container > .slider { | |||||
| width: 100%; | |||||
| } | |||||
| .slider-container > label { | |||||
| width: 50px; | |||||
| text-align: center; | |||||
| } | |||||
| @@ -1,26 +1,26 @@ | |||||
| const createConnectionSessionChat = (LLamaExecutorType) => { | |||||
| const createConnectionSessionChat = () => { | |||||
| const outputErrorTemplate = $("#outputErrorTemplate").html(); | const outputErrorTemplate = $("#outputErrorTemplate").html(); | ||||
| const outputInfoTemplate = $("#outputInfoTemplate").html(); | const outputInfoTemplate = $("#outputInfoTemplate").html(); | ||||
| const outputUserTemplate = $("#outputUserTemplate").html(); | const outputUserTemplate = $("#outputUserTemplate").html(); | ||||
| const outputBotTemplate = $("#outputBotTemplate").html(); | const outputBotTemplate = $("#outputBotTemplate").html(); | ||||
| const sessionDetailsTemplate = $("#sessionDetailsTemplate").html(); | |||||
| const signatureTemplate = $("#signatureTemplate").html(); | |||||
| let connectionId; | |||||
| let inferenceSession; | |||||
| const connection = new signalR.HubConnectionBuilder().withUrl("/SessionConnectionHub").build(); | const connection = new signalR.HubConnectionBuilder().withUrl("/SessionConnectionHub").build(); | ||||
| const scrollContainer = $("#scroll-container"); | const scrollContainer = $("#scroll-container"); | ||||
| const outputContainer = $("#output-container"); | const outputContainer = $("#output-container"); | ||||
| const chatInput = $("#input"); | const chatInput = $("#input"); | ||||
| const onStatus = (connection, status) => { | const onStatus = (connection, status) => { | ||||
| connectionId = connection; | |||||
| if (status == Enums.SessionConnectionStatus.Connected) { | if (status == Enums.SessionConnectionStatus.Connected) { | ||||
| $("#socket").text("Connected").addClass("text-success"); | $("#socket").text("Connected").addClass("text-success"); | ||||
| } | } | ||||
| else if (status == Enums.SessionConnectionStatus.Loaded) { | else if (status == Enums.SessionConnectionStatus.Loaded) { | ||||
| loaderHide(); | |||||
| enableControls(); | enableControls(); | ||||
| $("#session-details").html(Mustache.render(sessionDetailsTemplate, { model: getSelectedModel(), prompt: getSelectedPrompt(), parameter: getSelectedParameter() })); | |||||
| $("#load").hide(); | |||||
| $("#unload").show(); | |||||
| onInfo(`New model session successfully started`) | onInfo(`New model session successfully started`) | ||||
| } | } | ||||
| } | } | ||||
| @@ -36,30 +36,31 @@ const createConnectionSessionChat = (LLamaExecutorType) => { | |||||
| let responseContent; | let responseContent; | ||||
| let responseContainer; | let responseContainer; | ||||
| let responseFirstFragment; | |||||
| let responseFirstToken; | |||||
| const onResponse = (response) => { | const onResponse = (response) => { | ||||
| if (!response) | if (!response) | ||||
| return; | return; | ||||
| if (response.isFirst) { | |||||
| outputContainer.append(Mustache.render(outputBotTemplate, response)); | |||||
| responseContainer = $(`#${response.id}`); | |||||
| if (response.tokenType == Enums.TokenType.Begin) { | |||||
| const uniqueId = randomString(); | |||||
| outputContainer.append(Mustache.render(outputBotTemplate, { id: uniqueId, ...response })); | |||||
| responseContainer = $(`#${uniqueId}`); | |||||
| responseContent = responseContainer.find(".content"); | responseContent = responseContainer.find(".content"); | ||||
| responseFirstFragment = true; | |||||
| responseFirstToken = true; | |||||
| scrollToBottom(true); | scrollToBottom(true); | ||||
| return; | return; | ||||
| } | } | ||||
| if (response.isLast) { | |||||
| if (response.tokenType == Enums.TokenType.End || response.tokenType == Enums.TokenType.Cancel) { | |||||
| enableControls(); | enableControls(); | ||||
| responseContainer.find(".signature").append(response.content); | |||||
| responseContainer.find(".signature").append(Mustache.render(signatureTemplate, response)); | |||||
| scrollToBottom(); | scrollToBottom(); | ||||
| } | } | ||||
| else { | else { | ||||
| if (responseFirstFragment) { | |||||
| if (responseFirstToken) { | |||||
| responseContent.empty(); | responseContent.empty(); | ||||
| responseFirstFragment = false; | |||||
| responseFirstToken = false; | |||||
| responseContainer.find(".date").append(getDateTime()); | responseContainer.find(".date").append(getDateTime()); | ||||
| } | } | ||||
| responseContent.append(response.content); | responseContent.append(response.content); | ||||
| @@ -67,45 +68,88 @@ const createConnectionSessionChat = (LLamaExecutorType) => { | |||||
| } | } | ||||
| } | } | ||||
| const sendPrompt = async () => { | const sendPrompt = async () => { | ||||
| const text = chatInput.val(); | const text = chatInput.val(); | ||||
| if (text) { | if (text) { | ||||
| chatInput.val(null); | |||||
| disableControls(); | disableControls(); | ||||
| outputContainer.append(Mustache.render(outputUserTemplate, { text: text, date: getDateTime() })); | outputContainer.append(Mustache.render(outputUserTemplate, { text: text, date: getDateTime() })); | ||||
| await connection.invoke('SendPrompt', text); | |||||
| chatInput.val(null); | |||||
| inferenceSession = await connection | |||||
| .stream("SendPrompt", text, serializeFormToJson('SessionParameters')) | |||||
| .subscribe({ | |||||
| next: onResponse, | |||||
| complete: onResponse, | |||||
| error: onError, | |||||
| }); | |||||
| scrollToBottom(true); | scrollToBottom(true); | ||||
| } | } | ||||
| } | } | ||||
| const cancelPrompt = async () => { | const cancelPrompt = async () => { | ||||
| await ajaxPostJsonAsync('?handler=Cancel', { connectionId: connectionId }); | |||||
| if (inferenceSession) | |||||
| inferenceSession.dispose(); | |||||
| } | } | ||||
| const loadModel = async () => { | const loadModel = async () => { | ||||
| const modelName = getSelectedModel(); | |||||
| const promptName = getSelectedPrompt(); | |||||
| const parameterName = getSelectedParameter(); | |||||
| if (!modelName || !promptName || !parameterName) { | |||||
| onError("Please select a valid Model, Parameter and Prompt"); | |||||
| return; | |||||
| } | |||||
| const sessionParams = serializeFormToJson('SessionParameters'); | |||||
| loaderShow(); | |||||
| disableControls(); | |||||
| disablePromptControls(); | |||||
| $("#load").attr("disabled", "disabled"); | |||||
| // TODO: Split parameters sets | |||||
| await connection.invoke('LoadModel', sessionParams, sessionParams); | |||||
| } | |||||
| const unloadModel = async () => { | |||||
| disableControls(); | disableControls(); | ||||
| await connection.invoke('LoadModel', LLamaExecutorType, modelName, promptName, parameterName); | |||||
| enablePromptControls(); | |||||
| $("#load").removeAttr("disabled"); | |||||
| } | } | ||||
| const serializeFormToJson = (form) => { | |||||
| const formDataJson = {}; | |||||
| const formData = new FormData(document.getElementById(form)); | |||||
| formData.forEach((value, key) => { | |||||
| if (key.includes(".")) | |||||
| key = key.split(".")[1]; | |||||
| // Convert number strings to numbers | |||||
| if (!isNaN(value) && value.trim() !== "") { | |||||
| formDataJson[key] = parseFloat(value); | |||||
| } | |||||
| // Convert boolean strings to booleans | |||||
| else if (value === "true" || value === "false") { | |||||
| formDataJson[key] = (value === "true"); | |||||
| } | |||||
| else { | |||||
| formDataJson[key] = value; | |||||
| } | |||||
| }); | |||||
| return formDataJson; | |||||
| } | |||||
| const enableControls = () => { | const enableControls = () => { | ||||
| $(".input-control").removeAttr("disabled"); | $(".input-control").removeAttr("disabled"); | ||||
| } | } | ||||
| const disableControls = () => { | const disableControls = () => { | ||||
| $(".input-control").attr("disabled", "disabled"); | $(".input-control").attr("disabled", "disabled"); | ||||
| } | } | ||||
| const enablePromptControls = () => { | |||||
| $("#load").show(); | |||||
| $("#unload").hide(); | |||||
| $(".prompt-control").removeAttr("disabled"); | |||||
| activatePromptTab(); | |||||
| } | |||||
| const disablePromptControls = () => { | |||||
| $(".prompt-control").attr("disabled", "disabled"); | |||||
| activateParamsTab(); | |||||
| } | |||||
| const clearOutput = () => { | const clearOutput = () => { | ||||
| outputContainer.empty(); | outputContainer.empty(); | ||||
| } | } | ||||
| @@ -117,27 +161,14 @@ const createConnectionSessionChat = (LLamaExecutorType) => { | |||||
| customPrompt.text(selectedValue); | customPrompt.text(selectedValue); | ||||
| } | } | ||||
| const getSelectedModel = () => { | |||||
| return $("option:selected", "#Model").val(); | |||||
| } | |||||
| const getSelectedParameter = () => { | |||||
| return $("option:selected", "#Parameter").val(); | |||||
| } | |||||
| const getSelectedPrompt = () => { | |||||
| return $("option:selected", "#Prompt").val(); | |||||
| } | |||||
| const getDateTime = () => { | const getDateTime = () => { | ||||
| const dateTime = new Date(); | const dateTime = new Date(); | ||||
| return dateTime.toLocaleString(); | return dateTime.toLocaleString(); | ||||
| } | } | ||||
| const randomString = () => { | |||||
| return Math.random().toString(36).slice(2); | |||||
| } | |||||
| const scrollToBottom = (force) => { | const scrollToBottom = (force) => { | ||||
| const scrollTop = scrollContainer.scrollTop(); | const scrollTop = scrollContainer.scrollTop(); | ||||
| @@ -151,10 +182,25 @@ const createConnectionSessionChat = (LLamaExecutorType) => { | |||||
| } | } | ||||
| } | } | ||||
| const activatePromptTab = () => { | |||||
| $("#nav-prompt-tab").trigger("click"); | |||||
| } | |||||
| const activateParamsTab = () => { | |||||
| $("#nav-params-tab").trigger("click"); | |||||
| } | |||||
| const loaderShow = () => { | |||||
| $(".spinner").show(); | |||||
| } | |||||
| const loaderHide = () => { | |||||
| $(".spinner").hide(); | |||||
| } | |||||
| // Map UI functions | // Map UI functions | ||||
| $("#load").on("click", loadModel); | $("#load").on("click", loadModel); | ||||
| $("#unload").on("click", unloadModel); | |||||
| $("#send").on("click", sendPrompt); | $("#send").on("click", sendPrompt); | ||||
| $("#clear").on("click", clearOutput); | $("#clear").on("click", clearOutput); | ||||
| $("#cancel").on("click", cancelPrompt); | $("#cancel").on("click", cancelPrompt); | ||||
| @@ -165,7 +211,10 @@ const createConnectionSessionChat = (LLamaExecutorType) => { | |||||
| sendPrompt(); | sendPrompt(); | ||||
| } | } | ||||
| }); | }); | ||||
| $(".slider").on("input", function (e) { | |||||
| const slider = $(this); | |||||
| slider.next().text(slider.val()); | |||||
| }).trigger("input"); | |||||
| // Map signalr functions | // Map signalr functions | ||||
| @@ -40,11 +40,17 @@ const Enums = { | |||||
| Loaded: 4, | Loaded: 4, | ||||
| Connected: 10 | Connected: 10 | ||||
| }), | }), | ||||
| LLamaExecutorType: Object.freeze({ | |||||
| ExecutorType: Object.freeze({ | |||||
| Interactive: 0, | Interactive: 0, | ||||
| Instruct: 1, | Instruct: 1, | ||||
| Stateless: 2 | Stateless: 2 | ||||
| }), | }), | ||||
| TokenType: Object.freeze({ | |||||
| Begin: 0, | |||||
| Content: 2, | |||||
| End: 4, | |||||
| Cancel: 10 | |||||
| }), | |||||
| GetName: (enumType, enumKey) => { | GetName: (enumType, enumKey) => { | ||||
| return Object.keys(enumType)[enumKey] | return Object.keys(enumType)[enumKey] | ||||
| }, | }, | ||||