You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

TextApi.cs 1.2 kB

1234567891011121314151617181920212223242526272829303132333435
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using Tensorflow.Keras.Text;
  6. namespace Tensorflow.Keras
  7. {
  8. public class TextApi
  9. {
  10. public Tensorflow.Keras.Text.Tokenizer Tokenizer(
  11. int num_words = -1,
  12. string filters = DefaultFilter,
  13. bool lower = true,
  14. char split = ' ',
  15. bool char_level = false,
  16. string oov_token = null,
  17. Func<string, IEnumerable<string>> analyzer = null)
  18. {
  19. return new Keras.Text.Tokenizer(num_words, filters, lower, split, char_level, oov_token, analyzer);
  20. }
  21. public static IEnumerable<string> text_to_word_sequence(string text, string filters = DefaultFilter, bool lower = true, char split = ' ')
  22. {
  23. if (lower)
  24. {
  25. text = text.ToLower();
  26. }
  27. var newText = new String(text.Where(c => !filters.Contains(c)).ToArray());
  28. return newText.Split(split);
  29. }
  30. private const string DefaultFilter = "!\"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n";
  31. }
  32. }