using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using Tensorflow; using Tensorflow.Estimator; using TensorFlowNET.Examples.Utility; using static Tensorflow.Python; namespace TensorFlowNET.Examples.Text.NER { /// /// A NER model using Tensorflow (LSTM + CRF + chars embeddings). /// State-of-the-art performance (F1 score between 90 and 91). /// /// https://github.com/guillaumegenthial/sequence_tagging /// public class LstmCrfNer : IExample { public int Priority => 14; public bool Enabled { get; set; } = true; public bool ImportGraph { get; set; } = true; public string Name => "LSTM + CRF NER"; HyperParams hp; Dictionary vocab_tags = new Dictionary(); int nwords, nchars, ntags; CoNLLDataset dev, train; public bool Run() { PrepareData(); var graph = tf.Graph().as_default(); tf.train.import_meta_graph("graph/lstm_crf_ner.meta"); var init = tf.global_variables_initializer(); with(tf.Session(), sess => { sess.run(init); foreach (var epoch in range(hp.epochs)) { print($"Epoch {epoch + 1} out of {hp.epochs}"); } }); return true; } public void PrepareData() { hp = new HyperParams("LstmCrfNer") { epochs = 15, dropout = 0.5f, batch_size = 20, lr_method = "adam", lr = 0.001f, lr_decay = 0.9f, clip = false, epoch_no_imprv = 3, hidden_size_char = 100, hidden_size_lstm = 300 }; hp.filepath_dev = hp.filepath_test = hp.filepath_train = Path.Combine(hp.data_root_dir, "test.txt"); // Loads vocabulary, processing functions and embeddings hp.filepath_words = Path.Combine(hp.data_root_dir, "words.txt"); hp.filepath_tags = Path.Combine(hp.data_root_dir, "tags.txt"); hp.filepath_chars = Path.Combine(hp.data_root_dir, "chars.txt"); // 1. vocabulary /*vocab_tags = load_vocab(hp.filepath_tags); nwords = vocab_words.Count; nchars = vocab_chars.Count; ntags = vocab_tags.Count;*/ // 2. get processing functions that map str -> id dev = new CoNLLDataset(hp.filepath_dev, hp); train = new CoNLLDataset(hp.filepath_train, hp); } } }