| @@ -0,0 +1,107 @@ | |||
| using System.Collections.Concurrent; | |||
| namespace LLama.Web.Async | |||
| { | |||
| /// <summary> | |||
| /// Creates a async/thread-safe guard helper | |||
| /// </summary> | |||
| /// <seealso cref="AsyncGuard<byte>" /> | |||
| public class AsyncGuard : AsyncGuard<byte> | |||
| { | |||
| private readonly byte _key; | |||
| private readonly ConcurrentDictionary<byte, bool> _lockData; | |||
| /// <summary> | |||
| /// Initializes a new instance of the <see cref="AsyncGuard"/> class. | |||
| /// </summary> | |||
| public AsyncGuard() | |||
| { | |||
| _key = 0; | |||
| _lockData = new ConcurrentDictionary<byte, bool>(); | |||
| } | |||
| /// <summary> | |||
| /// Guards this instance. | |||
| /// </summary> | |||
| /// <returns>true if able to enter an guard, false if already guarded</returns> | |||
| public bool Guard() | |||
| { | |||
| return _lockData.TryAdd(_key, true); | |||
| } | |||
| /// <summary> | |||
| /// Releases the guard. | |||
| /// </summary> | |||
| /// <returns></returns> | |||
| public bool Release() | |||
| { | |||
| return _lockData.TryRemove(_key, out _); | |||
| } | |||
| /// <summary> | |||
| /// Determines whether this instance is guarded. | |||
| /// </summary> | |||
| /// <returns> | |||
| /// <c>true</c> if this instance is guarded; otherwise, <c>false</c>. | |||
| /// </returns> | |||
| public bool IsGuarded() | |||
| { | |||
| return _lockData.ContainsKey(_key); | |||
| } | |||
| } | |||
| public class AsyncGuard<T> | |||
| { | |||
| private readonly ConcurrentDictionary<T, bool> _lockData; | |||
| /// <summary> | |||
| /// Initializes a new instance of the <see cref="AsyncGuard{T}"/> class. | |||
| /// </summary> | |||
| public AsyncGuard() | |||
| { | |||
| _lockData = new ConcurrentDictionary<T, bool>(); | |||
| } | |||
| /// <summary> | |||
| /// Guards the specified value. | |||
| /// </summary> | |||
| /// <param name="value">The value.</param> | |||
| /// <returns>true if able to enter a guard for this value, false if this value is already guarded</returns> | |||
| public bool Guard(T value) | |||
| { | |||
| return _lockData.TryAdd(value, true); | |||
| } | |||
| /// <summary> | |||
| /// Releases the guard on the specified value. | |||
| /// </summary> | |||
| /// <param name="value">The value.</param> | |||
| /// <returns></returns> | |||
| public bool Release(T value) | |||
| { | |||
| return _lockData.TryRemove(value, out _); | |||
| } | |||
| /// <summary> | |||
| /// Determines whether the specified value is guarded. | |||
| /// </summary> | |||
| /// <param name="value">The value.</param> | |||
| /// <returns> | |||
| /// <c>true</c> if the specified value is guarded; otherwise, <c>false</c>. | |||
| /// </returns> | |||
| public bool IsGuarded(T value) | |||
| { | |||
| return _lockData.ContainsKey(value); | |||
| } | |||
| } | |||
| } | |||
| @@ -0,0 +1,101 @@ | |||
| using LLama.Common; | |||
| using LLama.Abstractions; | |||
| using LLama.Native; | |||
| namespace LLama.Web.Common | |||
| { | |||
| public class InferenceOptions : IInferenceParams | |||
| { | |||
| /// <summary> | |||
| /// number of tokens to keep from initial prompt | |||
| /// </summary> | |||
| public int TokensKeep { get; set; } = 0; | |||
| /// <summary> | |||
| /// how many new tokens to predict (n_predict), set to -1 to inifinitely generate response | |||
| /// until it complete. | |||
| /// </summary> | |||
| public int MaxTokens { get; set; } = -1; | |||
| /// <summary> | |||
| /// logit bias for specific tokens | |||
| /// </summary> | |||
| public Dictionary<int, float>? LogitBias { get; set; } = null; | |||
| /// <summary> | |||
| /// Sequences where the model will stop generating further tokens. | |||
| /// </summary> | |||
| public IEnumerable<string> AntiPrompts { get; set; } = Array.Empty<string>(); | |||
| /// <summary> | |||
| /// path to file for saving/loading model eval state | |||
| /// </summary> | |||
| public string PathSession { get; set; } = string.Empty; | |||
| /// <summary> | |||
| /// string to suffix user inputs with | |||
| /// </summary> | |||
| public string InputSuffix { get; set; } = string.Empty; | |||
| /// <summary> | |||
| /// string to prefix user inputs with | |||
| /// </summary> | |||
| public string InputPrefix { get; set; } = string.Empty; | |||
| /// <summary> | |||
| /// 0 or lower to use vocab size | |||
| /// </summary> | |||
| public int TopK { get; set; } = 40; | |||
| /// <summary> | |||
| /// 1.0 = disabled | |||
| /// </summary> | |||
| public float TopP { get; set; } = 0.95f; | |||
| /// <summary> | |||
| /// 1.0 = disabled | |||
| /// </summary> | |||
| public float TfsZ { get; set; } = 1.0f; | |||
| /// <summary> | |||
| /// 1.0 = disabled | |||
| /// </summary> | |||
| public float TypicalP { get; set; } = 1.0f; | |||
| /// <summary> | |||
| /// 1.0 = disabled | |||
| /// </summary> | |||
| public float Temperature { get; set; } = 0.8f; | |||
| /// <summary> | |||
| /// 1.0 = disabled | |||
| /// </summary> | |||
| public float RepeatPenalty { get; set; } = 1.1f; | |||
| /// <summary> | |||
| /// last n tokens to penalize (0 = disable penalty, -1 = context size) (repeat_last_n) | |||
| /// </summary> | |||
| public int RepeatLastTokensCount { get; set; } = 64; | |||
| /// <summary> | |||
| /// frequency penalty coefficient | |||
| /// 0.0 = disabled | |||
| /// </summary> | |||
| public float FrequencyPenalty { get; set; } = .0f; | |||
| /// <summary> | |||
| /// presence penalty coefficient | |||
| /// 0.0 = disabled | |||
| /// </summary> | |||
| public float PresencePenalty { get; set; } = .0f; | |||
| /// <summary> | |||
| /// Mirostat uses tokens instead of words. | |||
| /// algorithm described in the paper https://arxiv.org/abs/2007.14966. | |||
| /// 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 | |||
| /// </summary> | |||
| public MirostatType Mirostat { get; set; } = MirostatType.Disable; | |||
| /// <summary> | |||
| /// target entropy | |||
| /// </summary> | |||
| public float MirostatTau { get; set; } = 5.0f; | |||
| /// <summary> | |||
| /// learning rate | |||
| /// </summary> | |||
| public float MirostatEta { get; set; } = 0.1f; | |||
| /// <summary> | |||
| /// consider newlines as a repeatable token (penalize_nl) | |||
| /// </summary> | |||
| public bool PenalizeNL { get; set; } = true; | |||
| /// <summary> | |||
| /// A grammar to constrain possible tokens | |||
| /// </summary> | |||
| public SafeLLamaGrammarHandle Grammar { get; set; } = null; | |||
| } | |||
| } | |||
| @@ -4,18 +4,9 @@ | |||
| { | |||
| public ModelLoadType ModelLoadType { get; set; } | |||
| public List<ModelOptions> Models { get; set; } | |||
| public List<PromptOptions> Prompts { get; set; } = new List<PromptOptions>(); | |||
| public List<ParameterOptions> Parameters { get; set; } = new List<ParameterOptions>(); | |||
| public void Initialize() | |||
| { | |||
| foreach (var prompt in Prompts) | |||
| { | |||
| if (File.Exists(prompt.Path)) | |||
| { | |||
| prompt.Prompt = File.ReadAllText(prompt.Path).Trim(); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -1,105 +0,0 @@ | |||
| using LLama.Common; | |||
| using LLama.Abstractions; | |||
| using LLama.Native; | |||
| namespace LLama.Web.Common | |||
| { | |||
| public class ParameterOptions : IInferenceParams | |||
| { | |||
| public string Name { get; set; } | |||
| /// <summary> | |||
| /// number of tokens to keep from initial prompt | |||
| /// </summary> | |||
| public int TokensKeep { get; set; } = 0; | |||
| /// <summary> | |||
| /// how many new tokens to predict (n_predict), set to -1 to inifinitely generate response | |||
| /// until it complete. | |||
| /// </summary> | |||
| public int MaxTokens { get; set; } = -1; | |||
| /// <summary> | |||
| /// logit bias for specific tokens | |||
| /// </summary> | |||
| public Dictionary<int, float>? LogitBias { get; set; } = null; | |||
| /// <summary> | |||
| /// Sequences where the model will stop generating further tokens. | |||
| /// </summary> | |||
| public IEnumerable<string> AntiPrompts { get; set; } = Array.Empty<string>(); | |||
| /// <summary> | |||
| /// path to file for saving/loading model eval state | |||
| /// </summary> | |||
| public string PathSession { get; set; } = string.Empty; | |||
| /// <summary> | |||
| /// string to suffix user inputs with | |||
| /// </summary> | |||
| public string InputSuffix { get; set; } = string.Empty; | |||
| /// <summary> | |||
| /// string to prefix user inputs with | |||
| /// </summary> | |||
| public string InputPrefix { get; set; } = string.Empty; | |||
| /// <summary> | |||
| /// 0 or lower to use vocab size | |||
| /// </summary> | |||
| public int TopK { get; set; } = 40; | |||
| /// <summary> | |||
| /// 1.0 = disabled | |||
| /// </summary> | |||
| public float TopP { get; set; } = 0.95f; | |||
| /// <summary> | |||
| /// 1.0 = disabled | |||
| /// </summary> | |||
| public float TfsZ { get; set; } = 1.0f; | |||
| /// <summary> | |||
| /// 1.0 = disabled | |||
| /// </summary> | |||
| public float TypicalP { get; set; } = 1.0f; | |||
| /// <summary> | |||
| /// 1.0 = disabled | |||
| /// </summary> | |||
| public float Temperature { get; set; } = 0.8f; | |||
| /// <summary> | |||
| /// 1.0 = disabled | |||
| /// </summary> | |||
| public float RepeatPenalty { get; set; } = 1.1f; | |||
| /// <summary> | |||
| /// last n tokens to penalize (0 = disable penalty, -1 = context size) (repeat_last_n) | |||
| /// </summary> | |||
| public int RepeatLastTokensCount { get; set; } = 64; | |||
| /// <summary> | |||
| /// frequency penalty coefficient | |||
| /// 0.0 = disabled | |||
| /// </summary> | |||
| public float FrequencyPenalty { get; set; } = .0f; | |||
| /// <summary> | |||
| /// presence penalty coefficient | |||
| /// 0.0 = disabled | |||
| /// </summary> | |||
| public float PresencePenalty { get; set; } = .0f; | |||
| /// <summary> | |||
| /// Mirostat uses tokens instead of words. | |||
| /// algorithm described in the paper https://arxiv.org/abs/2007.14966. | |||
| /// 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 | |||
| /// </summary> | |||
| public MirostatType Mirostat { get; set; } = MirostatType.Disable; | |||
| /// <summary> | |||
| /// target entropy | |||
| /// </summary> | |||
| public float MirostatTau { get; set; } = 5.0f; | |||
| /// <summary> | |||
| /// learning rate | |||
| /// </summary> | |||
| public float MirostatEta { get; set; } = 0.1f; | |||
| /// <summary> | |||
| /// consider newlines as a repeatable token (penalize_nl) | |||
| /// </summary> | |||
| public bool PenalizeNL { get; set; } = true; | |||
| /// <summary> | |||
| /// A grammar to constrain possible tokens | |||
| /// </summary> | |||
| public SafeLLamaGrammarHandle Grammar { get; set; } = null; | |||
| } | |||
| } | |||
| @@ -1,11 +0,0 @@ | |||
| namespace LLama.Web.Common | |||
| { | |||
| public class PromptOptions | |||
| { | |||
| public string Name { get; set; } | |||
| public string Path { get; set; } | |||
| public string Prompt { get; set; } | |||
| public List<string> AntiPrompt { get; set; } | |||
| public List<string> OutputFilter { get; set; } | |||
| } | |||
| } | |||
| @@ -0,0 +1,14 @@ | |||
| namespace LLama.Web.Common | |||
| { | |||
| public class SessionOptions | |||
| { | |||
| public string Model { get; set; } | |||
| public string Prompt { get; set; } | |||
| public string AntiPrompt { get; set; } | |||
| public List<string> AntiPrompts { get; set; } | |||
| public string OutputFilter { get; set; } | |||
| public List<string> OutputFilters { get; set; } | |||
| public LLamaExecutorType ExecutorType { get; set; } | |||
| } | |||
| } | |||
| @@ -0,0 +1,54 @@ | |||
| using LLama.Web.Common; | |||
| namespace LLama.Web | |||
| { | |||
| public static class Extensioms | |||
| { | |||
| /// <summary> | |||
| /// Combines the AntiPrompts list and AntiPrompt csv | |||
| /// </summary> | |||
| /// <param name="sessionConfig">The session configuration.</param> | |||
| /// <returns>Combined AntiPrompts with duplicates removed</returns> | |||
| public static List<string> GetAntiPrompts(this Common.SessionOptions sessionConfig) | |||
| { | |||
| return CombineCSV(sessionConfig.AntiPrompts, sessionConfig.AntiPrompt); | |||
| } | |||
| /// <summary> | |||
| /// Combines the OutputFilters list and OutputFilter csv | |||
| /// </summary> | |||
| /// <param name="sessionConfig">The session configuration.</param> | |||
| /// <returns>Combined OutputFilters with duplicates removed</returns> | |||
| public static List<string> GetOutputFilters(this Common.SessionOptions sessionConfig) | |||
| { | |||
| return CombineCSV(sessionConfig.OutputFilters, sessionConfig.OutputFilter); | |||
| } | |||
| /// <summary> | |||
| /// Combines a string list and a csv and removes duplicates | |||
| /// </summary> | |||
| /// <param name="list">The list.</param> | |||
| /// <param name="csv">The CSV.</param> | |||
| /// <returns>Combined list with duplicates removed</returns> | |||
| private static List<string> CombineCSV(List<string> list, string csv) | |||
| { | |||
| var results = list?.Count == 0 | |||
| ? CommaSeperatedToList(csv) | |||
| : CommaSeperatedToList(csv).Concat(list); | |||
| return results | |||
| .Distinct() | |||
| .ToList(); | |||
| } | |||
| private static List<string> CommaSeperatedToList(string value) | |||
| { | |||
| if (string.IsNullOrEmpty(value)) | |||
| return new List<string>(); | |||
| return value.Split(",", StringSplitOptions.RemoveEmptyEntries) | |||
| .Select(x => x.Trim()) | |||
| .ToList(); | |||
| } | |||
| } | |||
| } | |||
| @@ -6,7 +6,6 @@ namespace LLama.Web.Hubs | |||
| public interface ISessionClient | |||
| { | |||
| Task OnStatus(string connectionId, SessionConnectionStatus status); | |||
| Task OnResponse(ResponseFragment fragment); | |||
| Task OnError(string error); | |||
| } | |||
| } | |||
| @@ -2,16 +2,15 @@ | |||
| using LLama.Web.Models; | |||
| using LLama.Web.Services; | |||
| using Microsoft.AspNetCore.SignalR; | |||
| using System.Diagnostics; | |||
| namespace LLama.Web.Hubs | |||
| { | |||
| public class SessionConnectionHub : Hub<ISessionClient> | |||
| { | |||
| private readonly ILogger<SessionConnectionHub> _logger; | |||
| private readonly ConnectionSessionService _modelSessionService; | |||
| private readonly IModelSessionService _modelSessionService; | |||
| public SessionConnectionHub(ILogger<SessionConnectionHub> logger, ConnectionSessionService modelSessionService) | |||
| public SessionConnectionHub(ILogger<SessionConnectionHub> logger, IModelSessionService modelSessionService) | |||
| { | |||
| _logger = logger; | |||
| _modelSessionService = modelSessionService; | |||
| @@ -27,29 +26,27 @@ namespace LLama.Web.Hubs | |||
| } | |||
| public override async Task OnDisconnectedAsync(Exception? exception) | |||
| public override async Task OnDisconnectedAsync(Exception exception) | |||
| { | |||
| _logger.Log(LogLevel.Information, "[OnDisconnectedAsync], Id: {0}", Context.ConnectionId); | |||
| // Remove connections session on dissconnect | |||
| await _modelSessionService.RemoveAsync(Context.ConnectionId); | |||
| await _modelSessionService.CloseAsync(Context.ConnectionId); | |||
| await base.OnDisconnectedAsync(exception); | |||
| } | |||
| [HubMethodName("LoadModel")] | |||
| public async Task OnLoadModel(LLamaExecutorType executorType, string modelName, string promptName, string parameterName) | |||
| public async Task OnLoadModel(Common.SessionOptions sessionConfig, InferenceOptions inferenceConfig) | |||
| { | |||
| _logger.Log(LogLevel.Information, "[OnLoadModel] - Load new model, Connection: {0}, Model: {1}, Prompt: {2}, Parameter: {3}", Context.ConnectionId, modelName, promptName, parameterName); | |||
| // Remove existing connections session | |||
| await _modelSessionService.RemoveAsync(Context.ConnectionId); | |||
| _logger.Log(LogLevel.Information, "[OnLoadModel] - Load new model, Connection: {0}", Context.ConnectionId); | |||
| await _modelSessionService.CloseAsync(Context.ConnectionId); | |||
| // Create model session | |||
| var modelSessionResult = await _modelSessionService.CreateAsync(executorType, Context.ConnectionId, modelName, promptName, parameterName); | |||
| if (modelSessionResult.HasError) | |||
| var modelSession = await _modelSessionService.CreateAsync(Context.ConnectionId, sessionConfig, inferenceConfig); | |||
| if (modelSession is null) | |||
| { | |||
| await Clients.Caller.OnError(modelSessionResult.Error); | |||
| await Clients.Caller.OnError("Failed to create model session"); | |||
| return; | |||
| } | |||
| @@ -59,40 +56,12 @@ namespace LLama.Web.Hubs | |||
| [HubMethodName("SendPrompt")] | |||
| public async Task OnSendPrompt(string prompt) | |||
| public IAsyncEnumerable<TokenModel> OnSendPrompt(string prompt, InferenceOptions inferConfig, CancellationToken cancellationToken) | |||
| { | |||
| _logger.Log(LogLevel.Information, "[OnSendPrompt] - New prompt received, Connection: {0}", Context.ConnectionId); | |||
| // Get connections session | |||
| var modelSession = await _modelSessionService.GetAsync(Context.ConnectionId); | |||
| if (modelSession is null) | |||
| { | |||
| await Clients.Caller.OnError("No model has been loaded"); | |||
| return; | |||
| } | |||
| // Create unique response id | |||
| var responseId = Guid.NewGuid().ToString(); | |||
| // Send begin of response | |||
| await Clients.Caller.OnResponse(new ResponseFragment(responseId, isFirst: true)); | |||
| // Send content of response | |||
| var stopwatch = Stopwatch.GetTimestamp(); | |||
| await foreach (var fragment in modelSession.InferAsync(prompt, CancellationTokenSource.CreateLinkedTokenSource(Context.ConnectionAborted))) | |||
| { | |||
| await Clients.Caller.OnResponse(new ResponseFragment(responseId, fragment)); | |||
| } | |||
| // Send end of response | |||
| var elapsedTime = Stopwatch.GetElapsedTime(stopwatch); | |||
| var signature = modelSession.IsInferCanceled() | |||
| ? $"Inference cancelled after {elapsedTime.TotalSeconds:F0} seconds" | |||
| : $"Inference completed in {elapsedTime.TotalSeconds:F0} seconds"; | |||
| await Clients.Caller.OnResponse(new ResponseFragment(responseId, signature, isLast: true)); | |||
| _logger.Log(LogLevel.Information, "[OnSendPrompt] - Inference complete, Connection: {0}, Elapsed: {1}, Canceled: {2}", Context.ConnectionId, elapsedTime, modelSession.IsInferCanceled()); | |||
| var linkedCancelationToken = CancellationTokenSource.CreateLinkedTokenSource(Context.ConnectionAborted, cancellationToken); | |||
| return _modelSessionService.InferAsync(Context.ConnectionId, prompt, inferConfig, linkedCancelationToken.Token); | |||
| } | |||
| } | |||
| } | |||
| @@ -14,4 +14,8 @@ | |||
| <Folder Include="wwwroot\image\" /> | |||
| </ItemGroup> | |||
| <ItemGroup> | |||
| <PackageReference Include="System.Linq.Async" Version="6.0.1" /> | |||
| </ItemGroup> | |||
| </Project> | |||
| @@ -2,12 +2,12 @@ | |||
| using LLama.Web.Common; | |||
| using System.Collections.Concurrent; | |||
| namespace LLama.Web | |||
| namespace LLama.Web.Models | |||
| { | |||
| /// <summary> | |||
| /// Wrapper class for LLamaSharp LLamaWeights | |||
| /// </summary> | |||
| /// <seealso cref="System.IDisposable" /> | |||
| /// <seealso cref="IDisposable" /> | |||
| public class LLamaModel : IDisposable | |||
| { | |||
| private readonly ModelOptions _config; | |||
| @@ -3,46 +3,97 @@ using LLama.Web.Common; | |||
| namespace LLama.Web.Models | |||
| { | |||
| public class ModelSession : IDisposable | |||
| public class ModelSession | |||
| { | |||
| private bool _isFirstInteraction = true; | |||
| private ModelOptions _modelOptions; | |||
| private PromptOptions _promptOptions; | |||
| private ParameterOptions _inferenceOptions; | |||
| private ITextStreamTransform _outputTransform; | |||
| private ILLamaExecutor _executor; | |||
| private readonly string _sessionId; | |||
| private readonly LLamaModel _model; | |||
| private readonly LLamaContext _context; | |||
| private readonly ILLamaExecutor _executor; | |||
| private readonly Common.SessionOptions _sessionParams; | |||
| private readonly ITextStreamTransform _outputTransform; | |||
| private readonly InferenceOptions _defaultInferenceConfig; | |||
| private CancellationTokenSource _cancellationTokenSource; | |||
| public ModelSession(ILLamaExecutor executor, ModelOptions modelOptions, PromptOptions promptOptions, ParameterOptions parameterOptions) | |||
| public ModelSession(LLamaModel model, LLamaContext context, string sessionId, Common.SessionOptions sessionOptions, InferenceOptions inferenceOptions = null) | |||
| { | |||
| _executor = executor; | |||
| _modelOptions = modelOptions; | |||
| _promptOptions = promptOptions; | |||
| _inferenceOptions = parameterOptions; | |||
| _inferenceOptions.AntiPrompts = _promptOptions.AntiPrompt?.Concat(_inferenceOptions.AntiPrompts ?? Enumerable.Empty<string>()).Distinct() ?? _inferenceOptions.AntiPrompts; | |||
| if (_promptOptions.OutputFilter?.Count > 0) | |||
| _outputTransform = new LLamaTransforms.KeywordTextOutputStreamTransform(_promptOptions.OutputFilter, redundancyLength: 5); | |||
| _model = model; | |||
| _context = context; | |||
| _sessionId = sessionId; | |||
| _sessionParams = sessionOptions; | |||
| _defaultInferenceConfig = inferenceOptions ?? new InferenceOptions(); | |||
| _outputTransform = CreateOutputFilter(_sessionParams); | |||
| _executor = CreateExecutor(_model, _context, _sessionParams); | |||
| } | |||
| public string ModelName | |||
| /// <summary> | |||
| /// Gets the session identifier. | |||
| /// </summary> | |||
| public string SessionId => _sessionId; | |||
| /// <summary> | |||
| /// Gets the name of the model. | |||
| /// </summary> | |||
| public string ModelName => _sessionParams.Model; | |||
| /// <summary> | |||
| /// Gets the context. | |||
| /// </summary> | |||
| public LLamaContext Context => _context; | |||
| /// <summary> | |||
| /// Gets the session configuration. | |||
| /// </summary> | |||
| public Common.SessionOptions SessionConfig => _sessionParams; | |||
| /// <summary> | |||
| /// Gets the inference parameters. | |||
| /// </summary> | |||
| public InferenceOptions InferenceParams => _defaultInferenceConfig; | |||
| /// <summary> | |||
| /// Initializes the prompt. | |||
| /// </summary> | |||
| /// <param name="inferenceConfig">The inference configuration.</param> | |||
| /// <param name="cancellationToken">The cancellation token.</param> | |||
| internal async Task InitializePrompt(InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default) | |||
| { | |||
| get { return _modelOptions.Name; } | |||
| if (_sessionParams.ExecutorType == LLamaExecutorType.Stateless) | |||
| return; | |||
| if (string.IsNullOrEmpty(_sessionParams.Prompt)) | |||
| return; | |||
| // Run Initial prompt | |||
| var inferenceParams = ConfigureInferenceParams(inferenceConfig); | |||
| _cancellationTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); | |||
| await foreach (var _ in _executor.InferAsync(_sessionParams.Prompt, inferenceParams, _cancellationTokenSource.Token)) | |||
| { | |||
| // We dont really need the response of the initial prompt, so exit on first token | |||
| break; | |||
| }; | |||
| } | |||
| public IAsyncEnumerable<string> InferAsync(string message, CancellationTokenSource cancellationTokenSource) | |||
| /// <summary> | |||
| /// Runs inference on the model context | |||
| /// </summary> | |||
| /// <param name="message">The message.</param> | |||
| /// <param name="inferenceConfig">The inference configuration.</param> | |||
| /// <param name="cancellationToken">The cancellation token.</param> | |||
| /// <returns></returns> | |||
| internal IAsyncEnumerable<string> InferAsync(string message, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default) | |||
| { | |||
| _cancellationTokenSource = cancellationTokenSource; | |||
| if (_isFirstInteraction) | |||
| { | |||
| _isFirstInteraction = false; | |||
| message = _promptOptions.Prompt + message; | |||
| } | |||
| var inferenceParams = ConfigureInferenceParams(inferenceConfig); | |||
| _cancellationTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); | |||
| var inferenceStream = _executor.InferAsync(message, inferenceParams, _cancellationTokenSource.Token); | |||
| if (_outputTransform is not null) | |||
| return _outputTransform.TransformAsync(_executor.InferAsync(message, _inferenceOptions, _cancellationTokenSource.Token)); | |||
| return _outputTransform.TransformAsync(inferenceStream); | |||
| return _executor.InferAsync(message, _inferenceOptions, _cancellationTokenSource.Token); | |||
| return inferenceStream; | |||
| } | |||
| @@ -56,13 +107,36 @@ namespace LLama.Web.Models | |||
| return _cancellationTokenSource.IsCancellationRequested; | |||
| } | |||
| public void Dispose() | |||
| /// <summary> | |||
| /// Configures the inference parameters. | |||
| /// </summary> | |||
| /// <param name="inferenceConfig">The inference configuration.</param> | |||
| private IInferenceParams ConfigureInferenceParams(InferenceOptions inferenceConfig) | |||
| { | |||
| var inferenceParams = inferenceConfig ?? _defaultInferenceConfig; | |||
| inferenceParams.AntiPrompts = _sessionParams.GetAntiPrompts(); | |||
| return inferenceParams; | |||
| } | |||
| private ITextStreamTransform CreateOutputFilter(Common.SessionOptions sessionConfig) | |||
| { | |||
| _inferenceOptions = null; | |||
| _outputTransform = null; | |||
| var outputFilters = sessionConfig.GetOutputFilters(); | |||
| if (outputFilters.Count > 0) | |||
| return new LLamaTransforms.KeywordTextOutputStreamTransform(outputFilters); | |||
| _executor?.Context.Dispose(); | |||
| _executor = null; | |||
| return null; | |||
| } | |||
| private ILLamaExecutor CreateExecutor(LLamaModel model, LLamaContext context, Common.SessionOptions sessionConfig) | |||
| { | |||
| return sessionConfig.ExecutorType switch | |||
| { | |||
| LLamaExecutorType.Interactive => new InteractiveExecutor(_context), | |||
| LLamaExecutorType.Instruct => new InstructExecutor(_context), | |||
| LLamaExecutorType.Stateless => new StatelessExecutor(_model.LLamaWeights, _model.ModelParams), | |||
| _ => default | |||
| }; | |||
| } | |||
| } | |||
| } | |||
| @@ -1,18 +0,0 @@ | |||
| namespace LLama.Web.Models | |||
| { | |||
| public class ResponseFragment | |||
| { | |||
| public ResponseFragment(string id, string content = null, bool isFirst = false, bool isLast = false) | |||
| { | |||
| Id = id; | |||
| IsLast = isLast; | |||
| IsFirst = isFirst; | |||
| Content = content; | |||
| } | |||
| public string Id { get; set; } | |||
| public string Content { get; set; } | |||
| public bool IsLast { get; set; } | |||
| public bool IsFirst { get; set; } | |||
| } | |||
| } | |||
| @@ -0,0 +1,24 @@ | |||
| namespace LLama.Web.Models | |||
| { | |||
| public class TokenModel | |||
| { | |||
| public TokenModel(string id, string content = null, TokenType tokenType = TokenType.Content) | |||
| { | |||
| Id = id; | |||
| Content = content; | |||
| TokenType = tokenType; | |||
| } | |||
| public string Id { get; set; } | |||
| public string Content { get; set; } | |||
| public TokenType TokenType { get; set; } | |||
| } | |||
| public enum TokenType | |||
| { | |||
| Begin = 0, | |||
| Content = 2, | |||
| End = 4, | |||
| Cancel = 10 | |||
| } | |||
| } | |||
| @@ -1,96 +0,0 @@ | |||
| @page | |||
| @model InstructModel | |||
| @{ | |||
| } | |||
| @Html.AntiForgeryToken() | |||
| <div class="d-flex flex-row h-100 pt-1 pb-1"> | |||
| <div class="d-flex flex-column h-100 border me-1 w-25 overflow-auto"> | |||
| <div class="d-flex flex-row justify-content-between border-bottom p-1 align-items-center"> | |||
| <h4>Instruct</h4> | |||
| <div> | |||
| <span>Hub: <b id="socket">Disconnected</b></span> | |||
| </div> | |||
| </div> | |||
| <div class="m-1"> | |||
| <small>Model</small> | |||
| <select id="Model" class="form-control form-select input-control" required="required" autocomplete="off"> | |||
| <option value="" disabled selected hidden>Please Select</option> | |||
| @foreach (var modelOption in Model.Options.Models) | |||
| { | |||
| <option value="@modelOption.Name">@modelOption.Name</option> | |||
| } | |||
| </select> | |||
| </div> | |||
| <div class="m-1"> | |||
| <small>Parameters</small> | |||
| <select id="Parameter" class="form-control form-select input-control" required="required" autocomplete="off"> | |||
| <option value="" disabled selected hidden>Please Select</option> | |||
| @foreach (var parameterOption in Model.Options.Parameters) | |||
| { | |||
| <option value="@parameterOption.Name">@parameterOption.Name</option> | |||
| } | |||
| </select> | |||
| </div> | |||
| <div class="m-1"> | |||
| <small>Prompt</small> | |||
| <select id="Prompt" class="form-control form-select input-control" required="required" autocomplete="off"> | |||
| <option value="" disabled selected hidden>Please Select</option> | |||
| @foreach (var promptOption in Model.Options.Prompts) | |||
| { | |||
| <option value="@promptOption.Name" data-prompt="@promptOption.Prompt">@promptOption.Name</option> | |||
| } | |||
| </select> | |||
| <textarea id="PromptText" class="form-control mt-1" rows="12" disabled="disabled" style="font-size:13px;resize:none"></textarea> | |||
| </div> | |||
| <div class="d-flex flex-grow-1"></div> | |||
| <div id="session-details" class="m-1"></div> | |||
| <div class="m-1"> | |||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="load">Create Session</button> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-column h-100 w-75"> | |||
| <div class="section-head"> | |||
| </div> | |||
| <div id="scroll-container" class="section-content border"> | |||
| <div id="output-container" class="d-flex flex-column gap-1 p-1"> | |||
| </div> | |||
| </div> | |||
| <div class="section-foot"> | |||
| <div class="input-group mt-2"> | |||
| <textarea id="input" type="text" class="form-control" value="what is a tree?" style="resize:none" rows="4">What is an apple?</textarea> | |||
| <div class="d-flex flex-column"> | |||
| <div class="d-flex flex-fill"> | |||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="send" disabled="disabled" autocomplete="off">Send Message</button> | |||
| </div> | |||
| <div class="d-flex"> | |||
| <button class="btn btn-outline-secondary w-100" type="button" id="cancel" autocomplete="off"> | |||
| <i class="bi-x-circle"></i> | |||
| </button> | |||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="clear" disabled="disabled" autocomplete="off"> | |||
| <i class="bi-trash3"></i> | |||
| </button> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| @{ await Html.RenderPartialAsync("_ChatTemplates"); } | |||
| @section Scripts { | |||
| <script src="~/js/sessionconnectionchat.js"></script> | |||
| <script> | |||
| createConnectionSessionChat(Enums.LLamaExecutorType.Instruct); | |||
| </script> | |||
| } | |||
| @@ -1,34 +0,0 @@ | |||
| using LLama.Web.Common; | |||
| using LLama.Web.Models; | |||
| using LLama.Web.Services; | |||
| using Microsoft.AspNetCore.Mvc; | |||
| using Microsoft.AspNetCore.Mvc.RazorPages; | |||
| using Microsoft.Extensions.Options; | |||
| namespace LLama.Web.Pages | |||
| { | |||
| public class InstructModel : PageModel | |||
| { | |||
| private readonly ILogger<InstructModel> _logger; | |||
| private readonly ConnectionSessionService _modelSessionService; | |||
| public InstructModel(ILogger<InstructModel> logger, IOptions<LLamaOptions> options, ConnectionSessionService modelSessionService) | |||
| { | |||
| _logger = logger; | |||
| Options = options.Value; | |||
| _modelSessionService = modelSessionService; | |||
| } | |||
| public LLamaOptions Options { get; set; } | |||
| public void OnGet() | |||
| { | |||
| } | |||
| public async Task<IActionResult> OnPostCancel(CancelModel model) | |||
| { | |||
| await _modelSessionService.CancelAsync(model.ConnectionId); | |||
| return new JsonResult(default); | |||
| } | |||
| } | |||
| } | |||
| @@ -1,4 +0,0 @@ | |||
| .section-content { | |||
| flex: 1; | |||
| overflow-y: scroll; | |||
| } | |||
| @@ -1,96 +0,0 @@ | |||
| @page | |||
| @model InteractiveModel | |||
| @{ | |||
| } | |||
| @Html.AntiForgeryToken() | |||
| <div class="d-flex flex-row h-100 pt-1 pb-1"> | |||
| <div class="d-flex flex-column h-100 border me-1 w-25 overflow-auto"> | |||
| <div class="d-flex flex-row justify-content-between border-bottom p-1 align-items-center"> | |||
| <h4>Interactive</h4> | |||
| <div> | |||
| <span>Hub: <b id="socket">Disconnected</b></span> | |||
| </div> | |||
| </div> | |||
| <div class="m-1"> | |||
| <small>Model</small> | |||
| <select id="Model" class="form-control form-select input-control" required="required" autocomplete="off"> | |||
| <option value="" disabled selected hidden>Please Select</option> | |||
| @foreach (var modelOption in Model.Options.Models) | |||
| { | |||
| <option value="@modelOption.Name">@modelOption.Name</option> | |||
| } | |||
| </select> | |||
| </div> | |||
| <div class="m-1"> | |||
| <small>Parameters</small> | |||
| <select id="Parameter" class="form-control form-select input-control" required="required" autocomplete="off"> | |||
| <option value="" disabled selected hidden>Please Select</option> | |||
| @foreach (var parameterOption in Model.Options.Parameters) | |||
| { | |||
| <option value="@parameterOption.Name">@parameterOption.Name</option> | |||
| } | |||
| </select> | |||
| </div> | |||
| <div class="m-1"> | |||
| <small>Prompt</small> | |||
| <select id="Prompt" class="form-control form-select input-control" required="required" autocomplete="off"> | |||
| <option value="" disabled selected hidden>Please Select</option> | |||
| @foreach (var promptOption in Model.Options.Prompts) | |||
| { | |||
| <option value="@promptOption.Name" data-prompt="@promptOption.Prompt">@promptOption.Name</option> | |||
| } | |||
| </select> | |||
| <textarea id="PromptText" class="form-control mt-1" rows="12" disabled="disabled" style="font-size:13px;resize:none"></textarea> | |||
| </div> | |||
| <div class="d-flex flex-grow-1"></div> | |||
| <div id="session-details" class="m-1"></div> | |||
| <div class="m-1"> | |||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="load">Create Session</button> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-column h-100 w-75"> | |||
| <div class="section-head"> | |||
| </div> | |||
| <div id="scroll-container" class="section-content border"> | |||
| <div id="output-container" class="d-flex flex-column gap-1 p-1"> | |||
| </div> | |||
| </div> | |||
| <div class="section-foot"> | |||
| <div class="input-group mt-2"> | |||
| <textarea id="input" type="text" class="form-control" value="what is a tree?" style="resize:none" rows="4">What is an apple?</textarea> | |||
| <div class="d-flex flex-column"> | |||
| <div class="d-flex flex-fill"> | |||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="send" disabled="disabled" autocomplete="off">Send Message</button> | |||
| </div> | |||
| <div class="d-flex"> | |||
| <button class="btn btn-outline-secondary w-100" type="button" id="cancel" autocomplete="off"> | |||
| <i class="bi-x-circle"></i> | |||
| </button> | |||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="clear" disabled="disabled" autocomplete="off"> | |||
| <i class="bi-trash3"></i> | |||
| </button> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| @{ await Html.RenderPartialAsync("_ChatTemplates");} | |||
| @section Scripts { | |||
| <script src="~/js/sessionconnectionchat.js"></script> | |||
| <script> | |||
| createConnectionSessionChat(Enums.LLamaExecutorType.Interactive); | |||
| </script> | |||
| } | |||
| @@ -1,34 +0,0 @@ | |||
| using LLama.Web.Common; | |||
| using LLama.Web.Models; | |||
| using LLama.Web.Services; | |||
| using Microsoft.AspNetCore.Mvc; | |||
| using Microsoft.AspNetCore.Mvc.RazorPages; | |||
| using Microsoft.Extensions.Options; | |||
| namespace LLama.Web.Pages | |||
| { | |||
| public class InteractiveModel : PageModel | |||
| { | |||
| private readonly ILogger<InteractiveModel> _logger; | |||
| private readonly ConnectionSessionService _modelSessionService; | |||
| public InteractiveModel(ILogger<InteractiveModel> logger, IOptions<LLamaOptions> options, ConnectionSessionService modelSessionService) | |||
| { | |||
| _logger = logger; | |||
| Options = options.Value; | |||
| _modelSessionService = modelSessionService; | |||
| } | |||
| public LLamaOptions Options { get; set; } | |||
| public void OnGet() | |||
| { | |||
| } | |||
| public async Task<IActionResult> OnPostCancel(CancelModel model) | |||
| { | |||
| await _modelSessionService.CancelAsync(model.ConnectionId); | |||
| return new JsonResult(default); | |||
| } | |||
| } | |||
| } | |||
| @@ -1,4 +0,0 @@ | |||
| .section-content { | |||
| flex: 1; | |||
| overflow-y: scroll; | |||
| } | |||
| @@ -1,97 +0,0 @@ | |||
| @page | |||
| @model StatelessModel | |||
| @{ | |||
| } | |||
| @Html.AntiForgeryToken() | |||
| <div class="d-flex flex-row h-100 pt-1 pb-1"> | |||
| <div class="d-flex flex-column h-100 border me-1 w-25 overflow-auto"> | |||
| <div class="d-flex flex-row justify-content-between border-bottom p-1 align-items-center"> | |||
| <h4>Stateless</h4> | |||
| <div> | |||
| <span>Hub: <b id="socket">Disconnected</b></span> | |||
| </div> | |||
| </div> | |||
| <div class="m-1"> | |||
| <small>Model</small> | |||
| <select id="Model" class="form-control form-select input-control" required="required" autocomplete="off"> | |||
| <option value="" disabled selected hidden>Please Select</option> | |||
| @foreach (var modelOption in Model.Options.Models) | |||
| { | |||
| <option value="@modelOption.Name">@modelOption.Name</option> | |||
| } | |||
| </select> | |||
| </div> | |||
| <div class="m-1"> | |||
| <small>Parameters</small> | |||
| <select id="Parameter" class="form-control form-select input-control" required="required" autocomplete="off"> | |||
| <option value="" disabled selected hidden>Please Select</option> | |||
| @foreach (var parameterOption in Model.Options.Parameters) | |||
| { | |||
| <option value="@parameterOption.Name">@parameterOption.Name</option> | |||
| } | |||
| </select> | |||
| </div> | |||
| <div class="m-1"> | |||
| <small>Prompt</small> | |||
| <select id="Prompt" class="form-control form-select input-control" required="required" autocomplete="off"> | |||
| <option value="" disabled selected hidden>Please Select</option> | |||
| @foreach (var promptOption in Model.Options.Prompts) | |||
| { | |||
| <option value="@promptOption.Name" data-prompt="@promptOption.Prompt">@promptOption.Name</option> | |||
| } | |||
| </select> | |||
| <textarea id="PromptText" class="form-control mt-1" rows="12" disabled="disabled" style="font-size:13px;resize:none"></textarea> | |||
| </div> | |||
| <div class="d-flex flex-grow-1"></div> | |||
| <div id="session-details" class="m-1"></div> | |||
| <div class="m-1"> | |||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="load">Create Session</button> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-column h-100 w-75"> | |||
| <div class="section-head"> | |||
| </div> | |||
| <div id="scroll-container" class="section-content border"> | |||
| <div id="output-container" class="d-flex flex-column gap-1 p-1"> | |||
| </div> | |||
| </div> | |||
| <div class="section-foot"> | |||
| <div class="input-group mt-2"> | |||
| <textarea id="input" type="text" class="form-control" value="what is a tree?" style="resize:none" rows="4">What is an apple?</textarea> | |||
| <div class="d-flex flex-column"> | |||
| <div class="d-flex flex-fill"> | |||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="send" disabled="disabled" autocomplete="off">Send Message</button> | |||
| </div> | |||
| <div class="d-flex"> | |||
| <button class="btn btn-outline-secondary w-100" type="button" id="cancel" autocomplete="off"> | |||
| <i class="bi-x-circle"></i> | |||
| </button> | |||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="clear" disabled="disabled" autocomplete="off"> | |||
| <i class="bi-trash3"></i> | |||
| </button> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| @{ await Html.RenderPartialAsync("_ChatTemplates"); } | |||
| @section Scripts { | |||
| <script src="~/js/sessionconnectionchat.js"></script> | |||
| <script> | |||
| createConnectionSessionChat(Enums.LLamaExecutorType.Stateless); | |||
| </script> | |||
| } | |||
| @@ -1,34 +0,0 @@ | |||
| using LLama.Web.Common; | |||
| using LLama.Web.Models; | |||
| using LLama.Web.Services; | |||
| using Microsoft.AspNetCore.Mvc; | |||
| using Microsoft.AspNetCore.Mvc.RazorPages; | |||
| using Microsoft.Extensions.Options; | |||
| namespace LLama.Web.Pages | |||
| { | |||
| public class StatelessModel : PageModel | |||
| { | |||
| private readonly ILogger<StatelessModel> _logger; | |||
| private readonly ConnectionSessionService _modelSessionService; | |||
| public StatelessModel(ILogger<StatelessModel> logger, IOptions<LLamaOptions> options, ConnectionSessionService modelSessionService) | |||
| { | |||
| _logger = logger; | |||
| Options = options.Value; | |||
| _modelSessionService = modelSessionService; | |||
| } | |||
| public LLamaOptions Options { get; set; } | |||
| public void OnGet() | |||
| { | |||
| } | |||
| public async Task<IActionResult> OnPostCancel(CancelModel model) | |||
| { | |||
| await _modelSessionService.CancelAsync(model.ConnectionId); | |||
| return new JsonResult(default); | |||
| } | |||
| } | |||
| } | |||
| @@ -1,4 +0,0 @@ | |||
| .section-content { | |||
| flex: 1; | |||
| overflow-y: scroll; | |||
| } | |||
| @@ -1,10 +1,121 @@ | |||
| @page | |||
| @using LLama.Web.Common; | |||
| @model IndexModel | |||
| @{ | |||
| ViewData["Title"] = "Home page"; | |||
| ViewData["Title"] = "Inference Demo"; | |||
| } | |||
| <div class="text-center"> | |||
| <h1 class="display-4">Welcome</h1> | |||
| <p>Learn about <a href="https://docs.microsoft.com/aspnet/core">building Web apps with ASP.NET Core</a>.</p> | |||
| @Html.AntiForgeryToken() | |||
| <div class="d-flex flex-row h-100 pt-1 pb-1"> | |||
| <div class="d-flex flex-column h-100 border me-1 w-25"> | |||
| <div class="d-flex flex-row justify-content-between border-bottom p-1 align-items-center"> | |||
| <div> | |||
| <span>@ViewData["Title"]</span> | |||
| </div> | |||
| <div> | |||
| <small>Socket: <b id="socket">Disconnected</b></small> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-column overflow-auto"> | |||
| <form id="SessionParameters"> | |||
| <div class="d-flex flex-column m-1"> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>Model</small> | |||
| @Html.DropDownListFor(m => m.SessionOptions.Model, new SelectList(Model.Options.Models, "Name", "Name"), new { @class = "form-control prompt-control" ,required="required", autocomplete="off"}) | |||
| </div> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>Inference Type</small> | |||
| @Html.DropDownListFor(m => m.SessionOptions.ExecutorType, Html.GetEnumSelectList<LLamaExecutorType>(), new { @class = "form-control prompt-control" ,required="required", autocomplete="off"}) | |||
| </div> | |||
| <nav> | |||
| <div class="nav nav-tabs" id="nav-tab" role="tablist"> | |||
| <button class="nav-link active w-50" id="nav-prompt-tab" data-bs-toggle="tab" data-bs-target="#nav-prompt" type="button" role="tab">Prompt</button> | |||
| <button class="nav-link w-50" id="nav-params-tab" data-bs-toggle="tab" data-bs-target="#nav-params" type="button" role="tab">Parameters</button> | |||
| </div> | |||
| </nav> | |||
| <div class="tab-content" id="nav-tabContent"> | |||
| <div class="tab-pane fade show active" id="nav-prompt" role="tabpanel" aria-labelledby="nav-prompt-tab"> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>Prompt</small> | |||
| @Html.TextAreaFor(m => Model.SessionOptions.Prompt, new { @type="text", @class = "form-control prompt-control", rows=8}) | |||
| </div> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>AntiPrompts</small> | |||
| @Html.TextBoxFor(m => Model.SessionOptions.AntiPrompt, new { @type="text", @class = "form-control prompt-control"}) | |||
| </div> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>OutputFilter</small> | |||
| @Html.TextBoxFor(m => Model.SessionOptions.OutputFilter, new { @type="text", @class = "form-control prompt-control"}) | |||
| </div> | |||
| </div> | |||
| <div class="tab-pane fade" id="nav-params" role="tabpanel" aria-labelledby="nav-params-tab"> | |||
| @{ | |||
| await Html.RenderPartialAsync("_Parameters", Model.InferenceOptions); | |||
| } | |||
| </div> | |||
| </div> | |||
| </div> | |||
| </form> | |||
| </div> | |||
| <div class="d-flex flex-grow-1"></div> | |||
| <div id="session-details" class="m-1"></div> | |||
| <div class="m-1"> | |||
| <button class="btn btn-outline-success w-100" type="button" id="load"> | |||
| <div class="d-flex align-items-center justify-content-center"> | |||
| <img class="spinner me-2" style="display:none" src="~/image/loading.gif" width="20" /> | |||
| Begin Session | |||
| </div> | |||
| </button> | |||
| <button class="btn btn-outline-danger w-100" type="button" id="unload" style="display:none">End Session</button> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-column h-100 w-75"> | |||
| <div class="section-head"> | |||
| </div> | |||
| <div id="scroll-container" class="section-content border"> | |||
| <div id="output-container" class="d-flex flex-column gap-1 p-1"> | |||
| </div> | |||
| </div> | |||
| <div class="section-foot"> | |||
| <div class="input-group mt-2"> | |||
| <textarea id="input" type="text" class="form-control" value="what is a tree?" style="resize:none" rows="4">What is an apple?</textarea> | |||
| <div class="d-flex flex-column"> | |||
| <div class="d-flex flex-fill"> | |||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="send" disabled="disabled" autocomplete="off">Send Message</button> | |||
| </div> | |||
| <div class="d-flex"> | |||
| <button class="btn btn-outline-secondary w-100" type="button" id="cancel" autocomplete="off"> | |||
| <i class="bi-x-circle"></i> | |||
| </button> | |||
| <button class="btn btn-outline-secondary input-control w-100" type="button" id="clear" disabled="disabled" autocomplete="off"> | |||
| <i class="bi-trash3"></i> | |||
| </button> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| @{ | |||
| await Html.RenderPartialAsync("_ChatTemplates"); | |||
| } | |||
| @section Scripts { | |||
| <script src="~/js/sessionconnectionchat.js"></script> | |||
| <script> | |||
| createConnectionSessionChat(); | |||
| </script> | |||
| } | |||
| @@ -1,5 +1,7 @@ | |||
| using Microsoft.AspNetCore.Mvc; | |||
| using LLama.Web.Common; | |||
| using Microsoft.AspNetCore.Mvc; | |||
| using Microsoft.AspNetCore.Mvc.RazorPages; | |||
| using Microsoft.Extensions.Options; | |||
| namespace LLama.Web.Pages | |||
| { | |||
| @@ -7,14 +9,33 @@ namespace LLama.Web.Pages | |||
| { | |||
| private readonly ILogger<IndexModel> _logger; | |||
| public IndexModel(ILogger<IndexModel> logger) | |||
| public IndexModel(ILogger<IndexModel> logger, IOptions<LLamaOptions> options) | |||
| { | |||
| _logger = logger; | |||
| Options = options.Value; | |||
| } | |||
| public LLamaOptions Options { get; set; } | |||
| [BindProperty] | |||
| public Common.SessionOptions SessionOptions { get; set; } | |||
| [BindProperty] | |||
| public InferenceOptions InferenceOptions { get; set; } | |||
| public void OnGet() | |||
| { | |||
| SessionOptions = new Common.SessionOptions | |||
| { | |||
| Prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request.", | |||
| AntiPrompt = "User:", | |||
| // OutputFilter = "User:, Response:" | |||
| }; | |||
| InferenceOptions = new InferenceOptions | |||
| { | |||
| Temperature = 0.8f | |||
| }; | |||
| } | |||
| } | |||
| } | |||
| @@ -12,7 +12,7 @@ | |||
| <img src="~/image/human.png" width="60"/> | |||
| </div> | |||
| <div class="d-flex flex-column flex-fill justify-content-between"> | |||
| <span class="w-100" style="resize:none" >{{text}}</span> | |||
| <span class="content" style="resize:none" >{{text}}</span> | |||
| <div class="d-flex justify-content-end"> | |||
| <i>{{date}}</i> | |||
| </div> | |||
| @@ -26,9 +26,7 @@ | |||
| <img src="~/image/robot.png" width="60"/> | |||
| </div> | |||
| <div id="{{id}}" class="d-flex flex-column flex-fill justify-content-between"> | |||
| <span class="content"> | |||
| <img src="~/image/loading.gif" width="30" /> | |||
| </span> | |||
| <span class="content"><img src="~/image/loading.gif" width="30" /></span> | |||
| <div class="d-flex justify-content-end"> | |||
| <div class="d-flex flex-column align-items-end"> | |||
| <i class="date"></i> | |||
| @@ -41,20 +39,6 @@ | |||
| </div> | |||
| </script> | |||
| <script id="sessionDetailsTemplate" type="text/html"> | |||
| <div> | |||
| <small>Session Details </small> | |||
| </div> | |||
| <div> | |||
| <i>Model: </i> | |||
| <span>{{model}}</span> | |||
| </div> | |||
| <div> | |||
| <i>Prompt: </i> | |||
| <span>{{prompt}}</span> | |||
| </div> | |||
| <div> | |||
| <i>Parameters: </i> | |||
| <span>{{parameter}}</span> | |||
| </div> | |||
| <script id="signatureTemplate" type="text/html"> | |||
| <span>{{content}}</span> | |||
| </script> | |||
| @@ -3,7 +3,7 @@ | |||
| <head> | |||
| <meta charset="utf-8" /> | |||
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /> | |||
| <title>@ViewData["Title"] - LLama.Web</title> | |||
| <title>@ViewData["Title"] - LLamaSharp.Web</title> | |||
| <link rel="stylesheet" href="~/lib/bootstrap/dist/css/bootstrap.min.css" /> | |||
| <link href="~/lib/bootstrap/dist/css/bootstrap-icons.css" rel="stylesheet" /> | |||
| <link rel="stylesheet" href="~/css/site.css" asp-append-version="true" /> | |||
| @@ -13,24 +13,26 @@ | |||
| <header> | |||
| <nav class="navbar navbar-expand-sm navbar-toggleable-sm navbar-light bg-white border-bottom box-shadow "> | |||
| <div class="container"> | |||
| <a class="navbar-brand" asp-area="" asp-page="/Index">LLama.Web</a> | |||
| <a class="navbar-brand" asp-area="" asp-page="/Index">LLamaSharp.Web</a> | |||
| <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target=".navbar-collapse" aria-controls="navbarSupportedContent" | |||
| aria-expanded="false" aria-label="Toggle navigation"> | |||
| <span class="navbar-toggler-icon"></span> | |||
| </button> | |||
| <div class="navbar-collapse collapse d-sm-inline-flex justify-content-between"> | |||
| <ul class="navbar-nav flex-grow-1"> | |||
| <ul class="navbar-nav flex-grow-1 justify-content-between"> | |||
| <li class="nav-item"> | |||
| <a class="nav-link text-dark" asp-area="" asp-page="/Index">Home</a> | |||
| <a class="nav-link text-dark" asp-page="/Index"></a> | |||
| </li> | |||
| <li class="nav-item"> | |||
| <a class="nav-link text-dark" asp-area="" asp-page="/Executor/Interactive">Interactive</a> | |||
| </li> | |||
| <li class="nav-item"> | |||
| <a class="nav-link text-dark" asp-area="" asp-page="/Executor/Instruct">Instruct</a> | |||
| </li> | |||
| <li class="nav-item"> | |||
| <a class="nav-link text-dark" asp-area="" asp-page="/Executor/Stateless">Stateless</a> | |||
| <a class="nav-link text-dark" href="https://github.com/SciSharp/LLamaSharp" target="_blank"> | |||
| <div class="d-flex flex-row align-items-center"> | |||
| <h5 class="mb-0"> | |||
| <i class="bi bi-github"></i> | |||
| <span>LLamaSharp</span> | |||
| </h5> | |||
| </div> | |||
| </a> | |||
| </li> | |||
| </ul> | |||
| </div> | |||
| @@ -38,14 +40,14 @@ | |||
| </nav> | |||
| </header> | |||
| <main class="container" role="main" > | |||
| @RenderBody() | |||
| </main> | |||
| <main class="container" role="main"> | |||
| @RenderBody() | |||
| </main> | |||
| <footer class="border-top footer text-muted"> | |||
| <div class="container"> | |||
| © 2023 - LLama.Web | |||
| © 2023 - LLamaSharp.Web | |||
| </div> | |||
| </footer> | |||
| @@ -0,0 +1,137 @@ | |||
| @page | |||
| @using LLama.Common; | |||
| @model LLama.Abstractions.IInferenceParams | |||
| } | |||
| <div class="d-flex flex-row gap-3"> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>MaxTokens</small> | |||
| <div class="d-flex flex-row slider-container"> | |||
| @Html.TextBoxFor(m => m.MaxTokens, new { @type="range", @class = "slider", min="-1", max="2048", step="1" }) | |||
| <label>0</label> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>TokensKeep</small> | |||
| <div class="d-flex flex-row slider-container"> | |||
| @Html.TextBoxFor(m => m.TokensKeep, new { @type="range", @class = "slider", min="0", max="2048", step="1" }) | |||
| <label>0</label> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-row gap-3"> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>TopK</small> | |||
| <div class="d-flex flex-row slider-container"> | |||
| @Html.TextBoxFor(m => m.TopK, new { @type="range", @class = "slider", min="-1", max="100", step="1" }) | |||
| <label>0</label> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>TopP</small> | |||
| <div class="d-flex flex-row slider-container"> | |||
| @Html.TextBoxFor(m => m.TopP, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" }) | |||
| <label>0</label> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-row gap-3"> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>TypicalP</small> | |||
| <div class="d-flex flex-row slider-container"> | |||
| @Html.TextBoxFor(m => m.TypicalP, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" }) | |||
| <label>0</label> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>Temperature</small> | |||
| <div class="d-flex flex-row slider-container"> | |||
| @Html.TextBoxFor(m => m.Temperature, new { @type="range", @class = "slider", min="0.0", max="1.5", step="0.01" }) | |||
| <label>0</label> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-row gap-3"> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>RepeatPenalty</small> | |||
| <div class="d-flex flex-row slider-container"> | |||
| @Html.TextBoxFor(m => m.RepeatPenalty, new { @type="range", @class = "slider", min="0.0", max="2.0", step="0.01" }) | |||
| <label>0</label> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>RepeatLastTokensCount</small> | |||
| <div class="d-flex flex-row slider-container"> | |||
| @Html.TextBoxFor(m => m.RepeatLastTokensCount, new { @type="range", @class = "slider", min="0", max="2048", step="1" }) | |||
| <label>0</label> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-row gap-3"> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>FrequencyPenalty</small> | |||
| <div class="d-flex flex-row slider-container"> | |||
| @Html.TextBoxFor(m => m.FrequencyPenalty, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" }) | |||
| <label>0</label> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>PresencePenalty</small> | |||
| <div class="d-flex flex-row slider-container"> | |||
| @Html.TextBoxFor(m => m.PresencePenalty, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" }) | |||
| <label>0</label> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-row gap-3"> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>TfsZ</small> | |||
| <div class="d-flex flex-row slider-container"> | |||
| @Html.TextBoxFor(m => m.TfsZ, new { @type="range", @class = "slider",min="0.0", max="1.0", step="0.01" }) | |||
| <label>0</label> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>-</small> | |||
| <div class="d-flex flex-row slider-container"> | |||
| <input class="slider" type="range" value="0" disabled /> | |||
| <label></label> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>Sampler Type</small> | |||
| @Html.DropDownListFor(m => m.Mirostat, Html.GetEnumSelectList<MirostatType>(), new { @class = "form-control form-select" }) | |||
| </div> | |||
| <div class="d-flex flex-row gap-3"> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>MirostatTau</small> | |||
| <div class="d-flex flex-row slider-container"> | |||
| @Html.TextBoxFor(m => m.MirostatTau, new { @type="range", @class = "slider", min="0.0", max="10.0", step="0.01" }) | |||
| <label>0</label> | |||
| </div> | |||
| </div> | |||
| <div class="d-flex flex-column mb-2"> | |||
| <small>MirostatEta</small> | |||
| <div class="d-flex flex-row slider-container"> | |||
| @Html.TextBoxFor(m => m.MirostatEta, new { @type="range", @class = "slider", min="0.0", max="1.0", step="0.01" }) | |||
| <label>0.0</label> | |||
| </div> | |||
| </div> | |||
| </div> | |||
| @@ -1,6 +1,7 @@ | |||
| using LLama.Web.Common; | |||
| using LLama.Web.Hubs; | |||
| using LLama.Web.Services; | |||
| using Microsoft.Extensions.DependencyInjection; | |||
| namespace LLama.Web | |||
| { | |||
| @@ -20,7 +21,9 @@ namespace LLama.Web | |||
| .BindConfiguration(nameof(LLamaOptions)); | |||
| // Services DI | |||
| builder.Services.AddSingleton<ConnectionSessionService>(); | |||
| builder.Services.AddHostedService<ModelLoaderService>(); | |||
| builder.Services.AddSingleton<IModelService, ModelService>(); | |||
| builder.Services.AddSingleton<IModelSessionService, ModelSessionService>(); | |||
| var app = builder.Build(); | |||
| @@ -1,94 +0,0 @@ | |||
| using LLama.Abstractions; | |||
| using LLama.Web.Common; | |||
| using LLama.Web.Models; | |||
| using Microsoft.Extensions.Options; | |||
| using System.Collections.Concurrent; | |||
| using System.Drawing; | |||
| namespace LLama.Web.Services | |||
| { | |||
| /// <summary> | |||
| /// Example Service for handling a model session for a websockets connection lifetime | |||
| /// Each websocket connection will create its own unique session and context allowing you to use multiple tabs to compare prompts etc | |||
| /// </summary> | |||
| public class ConnectionSessionService : IModelSessionService | |||
| { | |||
| private readonly LLamaOptions _options; | |||
| private readonly ILogger<ConnectionSessionService> _logger; | |||
| private readonly ConcurrentDictionary<string, ModelSession> _modelSessions; | |||
| public ConnectionSessionService(ILogger<ConnectionSessionService> logger, IOptions<LLamaOptions> options) | |||
| { | |||
| _logger = logger; | |||
| _options = options.Value; | |||
| _modelSessions = new ConcurrentDictionary<string, ModelSession>(); | |||
| } | |||
| public Task<ModelSession> GetAsync(string connectionId) | |||
| { | |||
| _modelSessions.TryGetValue(connectionId, out var modelSession); | |||
| return Task.FromResult(modelSession); | |||
| } | |||
| public Task<IServiceResult<ModelSession>> CreateAsync(LLamaExecutorType executorType, string connectionId, string modelName, string promptName, string parameterName) | |||
| { | |||
| var modelOption = _options.Models.FirstOrDefault(x => x.Name == modelName); | |||
| if (modelOption is null) | |||
| return Task.FromResult(ServiceResult.FromError<ModelSession>($"Model option '{modelName}' not found")); | |||
| var promptOption = _options.Prompts.FirstOrDefault(x => x.Name == promptName); | |||
| if (promptOption is null) | |||
| return Task.FromResult(ServiceResult.FromError<ModelSession>($"Prompt option '{promptName}' not found")); | |||
| var parameterOption = _options.Parameters.FirstOrDefault(x => x.Name == parameterName); | |||
| if (parameterOption is null) | |||
| return Task.FromResult(ServiceResult.FromError<ModelSession>($"Parameter option '{parameterName}' not found")); | |||
| //Max instance | |||
| var currentInstances = _modelSessions.Count(x => x.Value.ModelName == modelOption.Name); | |||
| if (modelOption.MaxInstances > -1 && currentInstances >= modelOption.MaxInstances) | |||
| return Task.FromResult(ServiceResult.FromError<ModelSession>("Maximum model instances reached")); | |||
| // Create model | |||
| var llamaModel = new LLamaContext(modelOption); | |||
| // Create executor | |||
| ILLamaExecutor executor = executorType switch | |||
| { | |||
| LLamaExecutorType.Interactive => new InteractiveExecutor(llamaModel), | |||
| LLamaExecutorType.Instruct => new InstructExecutor(llamaModel), | |||
| LLamaExecutorType.Stateless => new StatelessExecutor(llamaModel), | |||
| _ => default | |||
| }; | |||
| // Create session | |||
| var modelSession = new ModelSession(executor, modelOption, promptOption, parameterOption); | |||
| if (!_modelSessions.TryAdd(connectionId, modelSession)) | |||
| return Task.FromResult(ServiceResult.FromError<ModelSession>("Failed to create model session")); | |||
| return Task.FromResult(ServiceResult.FromValue(modelSession)); | |||
| } | |||
| public Task<bool> RemoveAsync(string connectionId) | |||
| { | |||
| if (_modelSessions.TryRemove(connectionId, out var modelSession)) | |||
| { | |||
| modelSession.CancelInfer(); | |||
| modelSession.Dispose(); | |||
| return Task.FromResult(true); | |||
| } | |||
| return Task.FromResult(false); | |||
| } | |||
| public Task<bool> CancelAsync(string connectionId) | |||
| { | |||
| if (_modelSessions.TryGetValue(connectionId, out var modelSession)) | |||
| { | |||
| modelSession.CancelInfer(); | |||
| return Task.FromResult(true); | |||
| } | |||
| return Task.FromResult(false); | |||
| } | |||
| } | |||
| } | |||
| @@ -1,4 +1,5 @@ | |||
| using LLama.Web.Common; | |||
| using LLama.Web.Models; | |||
| namespace LLama.Web.Services | |||
| { | |||
| @@ -1,16 +1,88 @@ | |||
| using LLama.Abstractions; | |||
| using LLama.Web.Common; | |||
| using LLama.Web.Common; | |||
| using LLama.Web.Models; | |||
| namespace LLama.Web.Services | |||
| { | |||
| public interface IModelSessionService | |||
| { | |||
| /// <summary> | |||
| /// Gets the ModelSession with the specified Id. | |||
| /// </summary> | |||
| /// <param name="sessionId">The session identifier.</param> | |||
| /// <returns>The ModelSession if exists, otherwise null</returns> | |||
| Task<ModelSession> GetAsync(string sessionId); | |||
| Task<IServiceResult<ModelSession>> CreateAsync(LLamaExecutorType executorType, string sessionId, string modelName, string promptName, string parameterName); | |||
| Task<bool> RemoveAsync(string sessionId); | |||
| Task<bool> CancelAsync(string sessionId); | |||
| } | |||
| /// <summary> | |||
| /// Gets all ModelSessions | |||
| /// </summary> | |||
| /// <returns>A collection oa all Model instances</returns> | |||
| Task<IEnumerable<ModelSession>> GetAllAsync(); | |||
| /// <summary> | |||
| /// Creates a new ModelSession | |||
| /// </summary> | |||
| /// <param name="sessionId">The session identifier.</param> | |||
| /// <param name="sessionOptions">The session configuration.</param> | |||
| /// <param name="inferenceOptions">The default inference configuration, will be used for all inference where no infer configuration is supplied.</param> | |||
| /// <param name="cancellationToken">The cancellation token.</param> | |||
| /// <returns></returns> | |||
| /// <exception cref="System.Exception"> | |||
| /// Session with id {sessionId} already exists | |||
| /// or | |||
| /// Failed to create model session | |||
| /// </exception> | |||
| Task<ModelSession> CreateAsync(string sessionId, Common.SessionOptions sessionOptions, InferenceOptions inferenceOptions = null, CancellationToken cancellationToken = default); | |||
| /// <summary> | |||
| /// Closes the session | |||
| /// </summary> | |||
| /// <param name="sessionId">The session identifier.</param> | |||
| /// <returns></returns> | |||
| Task<bool> CloseAsync(string sessionId); | |||
| /// <summary> | |||
| /// Runs inference on the current ModelSession | |||
| /// </summary> | |||
| /// <param name="sessionId">The session identifier.</param> | |||
| /// <param name="prompt">The prompt.</param> | |||
| /// <param name="inferenceConfig">The inference configuration, if null session default is used</param> | |||
| /// <param name="cancellationToken">The cancellation token.</param> | |||
| /// <exception cref="System.Exception">Inference is already running for this session</exception> | |||
| IAsyncEnumerable<TokenModel> InferAsync(string sessionId, string prompt, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default); | |||
| /// <summary> | |||
| /// Runs inference on the current ModelSession | |||
| /// </summary> | |||
| /// <param name="sessionId">The session identifier.</param> | |||
| /// <param name="prompt">The prompt.</param> | |||
| /// <param name="inferenceOptions">The inference configuration, if null session default is used</param> | |||
| /// <param name="cancellationToken">The cancellation token.</param> | |||
| /// <returns>Streaming async result of <see cref="System.String" /></returns> | |||
| /// <exception cref="System.Exception">Inference is already running for this session</exception> | |||
| IAsyncEnumerable<string> InferTextAsync(string sessionId, string prompt, InferenceOptions inferenceOptions = null, CancellationToken cancellationToken = default); | |||
| /// <summary> | |||
| /// Queues inference on the current ModelSession | |||
| /// </summary> | |||
| /// <param name="sessionId">The session identifier.</param> | |||
| /// <param name="prompt">The prompt.</param> | |||
| /// <param name="inferenceOptions">The inference configuration, if null session default is used</param> | |||
| /// <param name="cancellationToken">The cancellation token.</param> | |||
| /// <returns>Completed inference result as string</returns> | |||
| /// <exception cref="System.Exception">Inference is already running for this session</exception> | |||
| Task<string> InferTextCompleteAsync(string sessionId, string prompt, InferenceOptions inferenceOptions = null, CancellationToken cancellationToken = default); | |||
| /// <summary> | |||
| /// Cancels the current inference action. | |||
| /// </summary> | |||
| /// <param name="sessionId">The session identifier.</param> | |||
| /// <returns></returns> | |||
| Task<bool> CancelAsync(string sessionId); | |||
| } | |||
| } | |||
| @@ -0,0 +1,42 @@ | |||
| namespace LLama.Web.Services | |||
| { | |||
| /// <summary> | |||
| /// Service for managing loading/preloading of models at app startup | |||
| /// </summary> | |||
| /// <typeparam name="T">Type used to identify contexts</typeparam> | |||
| /// <seealso cref="Microsoft.Extensions.Hosting.IHostedService" /> | |||
| public class ModelLoaderService : IHostedService | |||
| { | |||
| private readonly IModelService _modelService; | |||
| /// <summary> | |||
| /// Initializes a new instance of the <see cref="ModelLoaderService"/> class. | |||
| /// </summary> | |||
| /// <param name="modelService">The model service.</param> | |||
| public ModelLoaderService(IModelService modelService) | |||
| { | |||
| _modelService = modelService; | |||
| } | |||
| /// <summary> | |||
| /// Triggered when the application host is ready to start the service. | |||
| /// </summary> | |||
| /// <param name="cancellationToken">Indicates that the start process has been aborted.</param> | |||
| public async Task StartAsync(CancellationToken cancellationToken) | |||
| { | |||
| await _modelService.LoadModels(); | |||
| } | |||
| /// <summary> | |||
| /// Triggered when the application host is performing a graceful shutdown. | |||
| /// </summary> | |||
| /// <param name="cancellationToken">Indicates that the shutdown process should no longer be graceful.</param> | |||
| public async Task StopAsync(CancellationToken cancellationToken) | |||
| { | |||
| await _modelService.UnloadModels(); | |||
| } | |||
| } | |||
| } | |||
| @@ -1,5 +1,6 @@ | |||
| using LLama.Web.Async; | |||
| using LLama.Web.Common; | |||
| using LLama.Web.Models; | |||
| using System.Collections.Concurrent; | |||
| namespace LLama.Web.Services | |||
| @@ -0,0 +1,216 @@ | |||
| using LLama.Web.Async; | |||
| using LLama.Web.Common; | |||
| using LLama.Web.Models; | |||
| using System.Collections.Concurrent; | |||
| using System.Diagnostics; | |||
| using System.Runtime.CompilerServices; | |||
| namespace LLama.Web.Services | |||
| { | |||
| /// <summary> | |||
| /// Example Service for handling a model session for a websockets connection lifetime | |||
| /// Each websocket connection will create its own unique session and context allowing you to use multiple tabs to compare prompts etc | |||
| /// </summary> | |||
| public class ModelSessionService : IModelSessionService | |||
| { | |||
| private readonly AsyncGuard<string> _sessionGuard; | |||
| private readonly IModelService _modelService; | |||
| private readonly ConcurrentDictionary<string, ModelSession> _modelSessions; | |||
| /// <summary> | |||
| /// Initializes a new instance of the <see cref="ModelSessionService{T}"/> class. | |||
| /// </summary> | |||
| /// <param name="modelService">The model service.</param> | |||
| /// <param name="modelSessionStateService">The model session state service.</param> | |||
| public ModelSessionService(IModelService modelService) | |||
| { | |||
| _modelService = modelService; | |||
| _sessionGuard = new AsyncGuard<string>(); | |||
| _modelSessions = new ConcurrentDictionary<string, ModelSession>(); | |||
| } | |||
| /// <summary> | |||
| /// Gets the ModelSession with the specified Id. | |||
| /// </summary> | |||
| /// <param name="sessionId">The session identifier.</param> | |||
| /// <returns>The ModelSession if exists, otherwise null</returns> | |||
| public Task<ModelSession> GetAsync(string sessionId) | |||
| { | |||
| return Task.FromResult(_modelSessions.TryGetValue(sessionId, out var session) ? session : null); | |||
| } | |||
| /// <summary> | |||
| /// Gets all ModelSessions | |||
| /// </summary> | |||
| /// <returns>A collection oa all Model instances</returns> | |||
| public Task<IEnumerable<ModelSession>> GetAllAsync() | |||
| { | |||
| return Task.FromResult<IEnumerable<ModelSession>>(_modelSessions.Values); | |||
| } | |||
| /// <summary> | |||
| /// Creates a new ModelSession | |||
| /// </summary> | |||
| /// <param name="sessionId">The session identifier.</param> | |||
| /// <param name="sessionConfig">The session configuration.</param> | |||
| /// <param name="inferenceConfig">The default inference configuration, will be used for all inference where no infer configuration is supplied.</param> | |||
| /// <param name="cancellationToken">The cancellation token.</param> | |||
| /// <returns></returns> | |||
| /// <exception cref="System.Exception"> | |||
| /// Session with id {sessionId} already exists | |||
| /// or | |||
| /// Failed to create model session | |||
| /// </exception> | |||
| public async Task<ModelSession> CreateAsync(string sessionId, Common.SessionOptions sessionConfig, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default) | |||
| { | |||
| if (_modelSessions.TryGetValue(sessionId, out _)) | |||
| throw new Exception($"Session with id {sessionId} already exists"); | |||
| // Create context | |||
| var (model, context) = await _modelService.GetOrCreateModelAndContext(sessionConfig.Model, sessionId); | |||
| // Create session | |||
| var modelSession = new ModelSession(model, context, sessionId, sessionConfig, inferenceConfig); | |||
| if (!_modelSessions.TryAdd(sessionId, modelSession)) | |||
| throw new Exception($"Failed to create model session"); | |||
| // Run initial Prompt | |||
| await modelSession.InitializePrompt(inferenceConfig, cancellationToken); | |||
| return modelSession; | |||
| } | |||
| /// <summary> | |||
| /// Closes the session | |||
| /// </summary> | |||
| /// <param name="sessionId">The session identifier.</param> | |||
| /// <returns></returns> | |||
| public async Task<bool> CloseAsync(string sessionId) | |||
| { | |||
| if (_modelSessions.TryRemove(sessionId, out var modelSession)) | |||
| { | |||
| modelSession.CancelInfer(); | |||
| return await _modelService.RemoveContext(modelSession.ModelName, sessionId); | |||
| } | |||
| return false; | |||
| } | |||
| /// <summary> | |||
| /// Runs inference on the current ModelSession | |||
| /// </summary> | |||
| /// <param name="sessionId">The session identifier.</param> | |||
| /// <param name="prompt">The prompt.</param> | |||
| /// <param name="inferenceConfig">The inference configuration, if null session default is used</param> | |||
| /// <param name="cancellationToken">The cancellation token.</param> | |||
| /// <exception cref="System.Exception">Inference is already running for this session</exception> | |||
| public async IAsyncEnumerable<TokenModel> InferAsync(string sessionId, string prompt, InferenceOptions inferenceConfig = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) | |||
| { | |||
| if (!_sessionGuard.Guard(sessionId)) | |||
| throw new Exception($"Inference is already running for this session"); | |||
| try | |||
| { | |||
| if (!_modelSessions.TryGetValue(sessionId, out var modelSession)) | |||
| yield break; | |||
| // Send begin of response | |||
| var stopwatch = Stopwatch.GetTimestamp(); | |||
| yield return new TokenModel(default, default, TokenType.Begin); | |||
| // Send content of response | |||
| await foreach (var token in modelSession.InferAsync(prompt, inferenceConfig, cancellationToken).ConfigureAwait(false)) | |||
| { | |||
| yield return new TokenModel(default, token); | |||
| } | |||
| // Send end of response | |||
| var elapsedTime = GetElapsed(stopwatch); | |||
| var endTokenType = modelSession.IsInferCanceled() ? TokenType.Cancel : TokenType.End; | |||
| var signature = endTokenType == TokenType.Cancel | |||
| ? $"Inference cancelled after {elapsedTime / 1000:F0} seconds" | |||
| : $"Inference completed in {elapsedTime / 1000:F0} seconds"; | |||
| yield return new TokenModel(default, signature, endTokenType); | |||
| } | |||
| finally | |||
| { | |||
| _sessionGuard.Release(sessionId); | |||
| } | |||
| } | |||
| /// <summary> | |||
| /// Runs inference on the current ModelSession | |||
| /// </summary> | |||
| /// <param name="sessionId">The session identifier.</param> | |||
| /// <param name="prompt">The prompt.</param> | |||
| /// <param name="inferenceConfig">The inference configuration, if null session default is used</param> | |||
| /// <param name="cancellationToken">The cancellation token.</param> | |||
| /// <returns>Streaming async result of <see cref="System.String" /></returns> | |||
| /// <exception cref="System.Exception">Inference is already running for this session</exception> | |||
| public IAsyncEnumerable<string> InferTextAsync(string sessionId, string prompt, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default) | |||
| { | |||
| async IAsyncEnumerable<string> InferTextInternal() | |||
| { | |||
| await foreach (var token in InferAsync(sessionId, prompt, inferenceConfig, cancellationToken).ConfigureAwait(false)) | |||
| { | |||
| if (token.TokenType == TokenType.Content) | |||
| yield return token.Content; | |||
| } | |||
| } | |||
| return InferTextInternal(); | |||
| } | |||
| /// <summary> | |||
| /// Runs inference on the current ModelSession | |||
| /// </summary> | |||
| /// <param name="sessionId">The session identifier.</param> | |||
| /// <param name="prompt">The prompt.</param> | |||
| /// <param name="inferenceConfig">The inference configuration, if null session default is used</param> | |||
| /// <param name="cancellationToken">The cancellation token.</param> | |||
| /// <returns>Completed inference result as string</returns> | |||
| /// <exception cref="System.Exception">Inference is already running for this session</exception> | |||
| public async Task<string> InferTextCompleteAsync(string sessionId, string prompt, InferenceOptions inferenceConfig = null, CancellationToken cancellationToken = default) | |||
| { | |||
| var inferResult = await InferAsync(sessionId, prompt, inferenceConfig, cancellationToken) | |||
| .Where(x => x.TokenType == TokenType.Content) | |||
| .Select(x => x.Content) | |||
| .ToListAsync(cancellationToken: cancellationToken); | |||
| return string.Concat(inferResult); | |||
| } | |||
| /// <summary> | |||
| /// Cancels the current inference action. | |||
| /// </summary> | |||
| /// <param name="sessionId">The session identifier.</param> | |||
| /// <returns></returns> | |||
| public Task<bool> CancelAsync(string sessionId) | |||
| { | |||
| if (_modelSessions.TryGetValue(sessionId, out var modelSession)) | |||
| { | |||
| modelSession.CancelInfer(); | |||
| return Task.FromResult(true); | |||
| } | |||
| return Task.FromResult(false); | |||
| } | |||
| /// <summary> | |||
| /// Gets the elapsed time in milliseconds. | |||
| /// </summary> | |||
| /// <param name="timestamp">The timestamp.</param> | |||
| /// <returns></returns> | |||
| private static int GetElapsed(long timestamp) | |||
| { | |||
| return (int)Stopwatch.GetElapsedTime(timestamp).TotalMilliseconds; | |||
| } | |||
| } | |||
| } | |||
| @@ -7,48 +7,34 @@ | |||
| }, | |||
| "AllowedHosts": "*", | |||
| "LLamaOptions": { | |||
| "ModelLoadType": "Single", | |||
| "Models": [ | |||
| { | |||
| "Name": "WizardLM-7B", | |||
| "MaxInstances": 2, | |||
| "MaxInstances": 20, | |||
| "ModelPath": "D:\\Repositories\\AI\\Models\\wizardLM-7B.ggmlv3.q4_0.bin", | |||
| "ContextSize": 2048 | |||
| } | |||
| ], | |||
| "Parameters": [ | |||
| { | |||
| "Name": "Default", | |||
| "Temperature": 0.6 | |||
| } | |||
| ], | |||
| "Prompts": [ | |||
| { | |||
| "Name": "None", | |||
| "Prompt": "" | |||
| }, | |||
| { | |||
| "Name": "Alpaca", | |||
| "Path": "D:\\Repositories\\AI\\Prompts\\alpaca.txt", | |||
| "AntiPrompt": [ | |||
| "User:" | |||
| ], | |||
| "OutputFilter": [ | |||
| "Response:", | |||
| "User:" | |||
| ] | |||
| }, | |||
| { | |||
| "Name": "ChatWithBob", | |||
| "Path": "D:\\Repositories\\AI\\Prompts\\chat-with-bob.txt", | |||
| "AntiPrompt": [ | |||
| "User:" | |||
| ], | |||
| "OutputFilter": [ | |||
| "Bob:", | |||
| "User:" | |||
| ] | |||
| "ContextSize": 2048, | |||
| "BatchSize": 2048, | |||
| "Threads": 4, | |||
| "GpuLayerCount": 6, | |||
| "UseMemorymap": true, | |||
| "UseMemoryLock": false, | |||
| "MainGpu": 0, | |||
| "LowVram": false, | |||
| "Seed": 1686349486, | |||
| "UseFp16Memory": true, | |||
| "Perplexity": false, | |||
| "LoraAdapter": "", | |||
| "LoraBase": "", | |||
| "EmbeddingMode": false, | |||
| "TensorSplits": null, | |||
| "GroupedQueryAttention": 1, | |||
| "RmsNormEpsilon": 0.000005, | |||
| "RopeFrequencyBase": 10000.0, | |||
| "RopeFrequencyScale": 1.0, | |||
| "MulMatQ": false, | |||
| "Encoding": "UTF-8" | |||
| } | |||
| ] | |||
| } | |||
| } | |||
| @@ -22,13 +22,30 @@ footer { | |||
| @media (min-width: 768px) { | |||
| html { | |||
| font-size: 16px; | |||
| } | |||
| html { | |||
| font-size: 16px; | |||
| } | |||
| } | |||
| .btn:focus, .btn:active:focus, .btn-link.nav-link:focus, .form-control:focus, .form-check-input:focus { | |||
| box-shadow: 0 0 0 0.1rem white, 0 0 0 0.25rem #258cfb; | |||
| box-shadow: 0 0 0 0.1rem white, 0 0 0 0.25rem #258cfb; | |||
| } | |||
| #scroll-container { | |||
| flex: 1; | |||
| overflow-y: scroll; | |||
| } | |||
| #output-container .content { | |||
| white-space: break-spaces; | |||
| } | |||
| .slider-container > .slider { | |||
| width: 100%; | |||
| } | |||
| .slider-container > label { | |||
| width: 50px; | |||
| text-align: center; | |||
| } | |||
| @@ -1,26 +1,26 @@ | |||
| const createConnectionSessionChat = (LLamaExecutorType) => { | |||
| const createConnectionSessionChat = () => { | |||
| const outputErrorTemplate = $("#outputErrorTemplate").html(); | |||
| const outputInfoTemplate = $("#outputInfoTemplate").html(); | |||
| const outputUserTemplate = $("#outputUserTemplate").html(); | |||
| const outputBotTemplate = $("#outputBotTemplate").html(); | |||
| const sessionDetailsTemplate = $("#sessionDetailsTemplate").html(); | |||
| const signatureTemplate = $("#signatureTemplate").html(); | |||
| let connectionId; | |||
| let inferenceSession; | |||
| const connection = new signalR.HubConnectionBuilder().withUrl("/SessionConnectionHub").build(); | |||
| const scrollContainer = $("#scroll-container"); | |||
| const outputContainer = $("#output-container"); | |||
| const chatInput = $("#input"); | |||
| const onStatus = (connection, status) => { | |||
| connectionId = connection; | |||
| if (status == Enums.SessionConnectionStatus.Connected) { | |||
| $("#socket").text("Connected").addClass("text-success"); | |||
| } | |||
| else if (status == Enums.SessionConnectionStatus.Loaded) { | |||
| loaderHide(); | |||
| enableControls(); | |||
| $("#session-details").html(Mustache.render(sessionDetailsTemplate, { model: getSelectedModel(), prompt: getSelectedPrompt(), parameter: getSelectedParameter() })); | |||
| $("#load").hide(); | |||
| $("#unload").show(); | |||
| onInfo(`New model session successfully started`) | |||
| } | |||
| } | |||
| @@ -36,30 +36,31 @@ const createConnectionSessionChat = (LLamaExecutorType) => { | |||
| let responseContent; | |||
| let responseContainer; | |||
| let responseFirstFragment; | |||
| let responseFirstToken; | |||
| const onResponse = (response) => { | |||
| if (!response) | |||
| return; | |||
| if (response.isFirst) { | |||
| outputContainer.append(Mustache.render(outputBotTemplate, response)); | |||
| responseContainer = $(`#${response.id}`); | |||
| if (response.tokenType == Enums.TokenType.Begin) { | |||
| const uniqueId = randomString(); | |||
| outputContainer.append(Mustache.render(outputBotTemplate, { id: uniqueId, ...response })); | |||
| responseContainer = $(`#${uniqueId}`); | |||
| responseContent = responseContainer.find(".content"); | |||
| responseFirstFragment = true; | |||
| responseFirstToken = true; | |||
| scrollToBottom(true); | |||
| return; | |||
| } | |||
| if (response.isLast) { | |||
| if (response.tokenType == Enums.TokenType.End || response.tokenType == Enums.TokenType.Cancel) { | |||
| enableControls(); | |||
| responseContainer.find(".signature").append(response.content); | |||
| responseContainer.find(".signature").append(Mustache.render(signatureTemplate, response)); | |||
| scrollToBottom(); | |||
| } | |||
| else { | |||
| if (responseFirstFragment) { | |||
| if (responseFirstToken) { | |||
| responseContent.empty(); | |||
| responseFirstFragment = false; | |||
| responseFirstToken = false; | |||
| responseContainer.find(".date").append(getDateTime()); | |||
| } | |||
| responseContent.append(response.content); | |||
| @@ -67,45 +68,88 @@ const createConnectionSessionChat = (LLamaExecutorType) => { | |||
| } | |||
| } | |||
| const sendPrompt = async () => { | |||
| const text = chatInput.val(); | |||
| if (text) { | |||
| chatInput.val(null); | |||
| disableControls(); | |||
| outputContainer.append(Mustache.render(outputUserTemplate, { text: text, date: getDateTime() })); | |||
| await connection.invoke('SendPrompt', text); | |||
| chatInput.val(null); | |||
| inferenceSession = await connection | |||
| .stream("SendPrompt", text, serializeFormToJson('SessionParameters')) | |||
| .subscribe({ | |||
| next: onResponse, | |||
| complete: onResponse, | |||
| error: onError, | |||
| }); | |||
| scrollToBottom(true); | |||
| } | |||
| } | |||
| const cancelPrompt = async () => { | |||
| await ajaxPostJsonAsync('?handler=Cancel', { connectionId: connectionId }); | |||
| if (inferenceSession) | |||
| inferenceSession.dispose(); | |||
| } | |||
| const loadModel = async () => { | |||
| const modelName = getSelectedModel(); | |||
| const promptName = getSelectedPrompt(); | |||
| const parameterName = getSelectedParameter(); | |||
| if (!modelName || !promptName || !parameterName) { | |||
| onError("Please select a valid Model, Parameter and Prompt"); | |||
| return; | |||
| } | |||
| const sessionParams = serializeFormToJson('SessionParameters'); | |||
| loaderShow(); | |||
| disableControls(); | |||
| disablePromptControls(); | |||
| $("#load").attr("disabled", "disabled"); | |||
| // TODO: Split parameters sets | |||
| await connection.invoke('LoadModel', sessionParams, sessionParams); | |||
| } | |||
| const unloadModel = async () => { | |||
| disableControls(); | |||
| await connection.invoke('LoadModel', LLamaExecutorType, modelName, promptName, parameterName); | |||
| enablePromptControls(); | |||
| $("#load").removeAttr("disabled"); | |||
| } | |||
| const serializeFormToJson = (form) => { | |||
| const formDataJson = {}; | |||
| const formData = new FormData(document.getElementById(form)); | |||
| formData.forEach((value, key) => { | |||
| if (key.includes(".")) | |||
| key = key.split(".")[1]; | |||
| // Convert number strings to numbers | |||
| if (!isNaN(value) && value.trim() !== "") { | |||
| formDataJson[key] = parseFloat(value); | |||
| } | |||
| // Convert boolean strings to booleans | |||
| else if (value === "true" || value === "false") { | |||
| formDataJson[key] = (value === "true"); | |||
| } | |||
| else { | |||
| formDataJson[key] = value; | |||
| } | |||
| }); | |||
| return formDataJson; | |||
| } | |||
| const enableControls = () => { | |||
| $(".input-control").removeAttr("disabled"); | |||
| } | |||
| const disableControls = () => { | |||
| $(".input-control").attr("disabled", "disabled"); | |||
| } | |||
| const enablePromptControls = () => { | |||
| $("#load").show(); | |||
| $("#unload").hide(); | |||
| $(".prompt-control").removeAttr("disabled"); | |||
| activatePromptTab(); | |||
| } | |||
| const disablePromptControls = () => { | |||
| $(".prompt-control").attr("disabled", "disabled"); | |||
| activateParamsTab(); | |||
| } | |||
| const clearOutput = () => { | |||
| outputContainer.empty(); | |||
| } | |||
| @@ -117,27 +161,14 @@ const createConnectionSessionChat = (LLamaExecutorType) => { | |||
| customPrompt.text(selectedValue); | |||
| } | |||
| const getSelectedModel = () => { | |||
| return $("option:selected", "#Model").val(); | |||
| } | |||
| const getSelectedParameter = () => { | |||
| return $("option:selected", "#Parameter").val(); | |||
| } | |||
| const getSelectedPrompt = () => { | |||
| return $("option:selected", "#Prompt").val(); | |||
| } | |||
| const getDateTime = () => { | |||
| const dateTime = new Date(); | |||
| return dateTime.toLocaleString(); | |||
| } | |||
| const randomString = () => { | |||
| return Math.random().toString(36).slice(2); | |||
| } | |||
| const scrollToBottom = (force) => { | |||
| const scrollTop = scrollContainer.scrollTop(); | |||
| @@ -151,10 +182,25 @@ const createConnectionSessionChat = (LLamaExecutorType) => { | |||
| } | |||
| } | |||
| const activatePromptTab = () => { | |||
| $("#nav-prompt-tab").trigger("click"); | |||
| } | |||
| const activateParamsTab = () => { | |||
| $("#nav-params-tab").trigger("click"); | |||
| } | |||
| const loaderShow = () => { | |||
| $(".spinner").show(); | |||
| } | |||
| const loaderHide = () => { | |||
| $(".spinner").hide(); | |||
| } | |||
| // Map UI functions | |||
| $("#load").on("click", loadModel); | |||
| $("#unload").on("click", unloadModel); | |||
| $("#send").on("click", sendPrompt); | |||
| $("#clear").on("click", clearOutput); | |||
| $("#cancel").on("click", cancelPrompt); | |||
| @@ -165,7 +211,10 @@ const createConnectionSessionChat = (LLamaExecutorType) => { | |||
| sendPrompt(); | |||
| } | |||
| }); | |||
| $(".slider").on("input", function (e) { | |||
| const slider = $(this); | |||
| slider.next().text(slider.val()); | |||
| }).trigger("input"); | |||
| // Map signalr functions | |||
| @@ -40,11 +40,17 @@ const Enums = { | |||
| Loaded: 4, | |||
| Connected: 10 | |||
| }), | |||
| LLamaExecutorType: Object.freeze({ | |||
| ExecutorType: Object.freeze({ | |||
| Interactive: 0, | |||
| Instruct: 1, | |||
| Stateless: 2 | |||
| }), | |||
| TokenType: Object.freeze({ | |||
| Begin: 0, | |||
| Content: 2, | |||
| End: 4, | |||
| Cancel: 10 | |||
| }), | |||
| GetName: (enumType, enumKey) => { | |||
| return Object.keys(enumType)[enumKey] | |||
| }, | |||