using System;
using System.Collections.Generic;
using System.Text;
namespace LLama.Transform
{
///
/// The default tokenizer of LLamaSharp. This class should not be inherited.
/// Note that this class has state. The previous outputs feeded to it will affect its control.
/// If you use it in a session, please don't reuse it for another session unless you intend to do so.
///
public sealed class DefaultTokenizer: ITokenizer
{
private Encoding _encoding;
private StreamingTokenDecoder _tokenDecoder;
///
/// Initialize a new tokenizer with the specified encoding.
///
///
public DefaultTokenizer(Encoding encoding)
{
_encoding = encoding;
_tokenDecoder = new StreamingTokenDecoder(encoding);
}
///
///
///
public IEnumerable Tokenize(LLamaContext context, string text, bool addBos = true, bool special = false)
{
return context.Tokenize(text, addBos, special);
}
///
///
///
public string Detokenize(LLamaContext context, int token)
{
_tokenDecoder.Add(token, context.NativeHandle.ModelHandle);
return _tokenDecoder.Read();
}
///
///
///
public string Detokenize(LLamaContext context, IEnumerable tokens)
{
_tokenDecoder.AddRange(tokens, context.NativeHandle.ModelHandle);
return _tokenDecoder.Read();
}
}
}