using LLama;
using LLama.Common;
using Microsoft.KernelMemory.AI;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace LLamaSharp.KernelMemory
{
    /// <summary>
    /// Provides text generation for LLamaSharp.
    /// </summary>
    public class LlamaSharpTextGeneration : ITextGeneration, IDisposable
    {
        private readonly LLamaSharpConfig _config;
        private readonly LLamaWeights _weights;
        private readonly InstructExecutor _executor;
        private readonly LLamaContext _context;

        /// <summary>
        /// Initializes a new instance of the <see cref="LlamaSharpTextGeneration"/> class.
        /// </summary>
        /// <param name="config">The configuration for LLamaSharp.</param>
        public LlamaSharpTextGeneration(LLamaSharpConfig config)
        {
            this._config = config;
            var parameters = new ModelParams(config.ModelPath)
            {
                ContextSize = config?.ContextSize ?? 1024,
                Seed = config?.Seed ?? 0,
                GpuLayerCount = config?.GpuLayerCount ?? 20
            };
            _weights = LLamaWeights.LoadFromFile(parameters);
            _context = _weights.CreateContext(parameters);
            _executor = new InstructExecutor(_context);

        }

        /// <inheritdoc/>
        public void Dispose()
        {
            _context.Dispose();
            _weights.Dispose();
        }

        /// <inheritdoc/>
        public IAsyncEnumerable<string> GenerateTextAsync(string prompt, TextGenerationOptions options, CancellationToken cancellationToken = default)
        {
            return _executor.InferAsync(prompt, OptionsToParams(options), cancellationToken: cancellationToken);
        }

        private static InferenceParams OptionsToParams(TextGenerationOptions options)
        {
            return new InferenceParams()
            {
                AntiPrompts = options.StopSequences,
                Temperature = (float)options.Temperature,
                MaxTokens = options.MaxTokens ?? 1024,
                FrequencyPenalty = (float)options.FrequencyPenalty,
                PresencePenalty = (float)options.PresencePenalty,
                TopP = (float)options.TopP,
            };
        }
    }
}