You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

DataHelpers.cs 2.1 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. using NumSharp.Core;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.IO;
  5. using System.Linq;
  6. using System.Text;
  7. using System.Text.RegularExpressions;
  8. namespace TensorFlowNET.Examples.CnnTextClassification
  9. {
  10. public class DataHelpers
  11. {
  12. /// <summary>
  13. /// Loads MR polarity data from files, splits the data into words and generates labels.
  14. /// Returns split sentences and labels.
  15. /// </summary>
  16. /// <param name="positive_data_file"></param>
  17. /// <param name="negative_data_file"></param>
  18. /// <returns></returns>
  19. public static (string[], NDArray) load_data_and_labels(string positive_data_file, string negative_data_file)
  20. {
  21. Directory.CreateDirectory("CnnTextClassification");
  22. Utility.Web.Download(positive_data_file, "CnnTextClassification/rt-polarity.pos");
  23. Utility.Web.Download(negative_data_file, "CnnTextClassification/rt-polarity.neg");
  24. // Load data from files
  25. var positive_examples = File.ReadAllLines("CnnTextClassification/rt-polarity.pos")
  26. .Select(x => x.Trim())
  27. .ToArray();
  28. var negative_examples = File.ReadAllLines("CnnTextClassification/rt-polarity.neg")
  29. .Select(x => x.Trim())
  30. .ToArray();
  31. var x_text = new List<string>();
  32. x_text.AddRange(positive_examples);
  33. x_text.AddRange(negative_examples);
  34. x_text = x_text.Select(x => clean_str(x)).ToList();
  35. var positive_labels = positive_examples.Select(x => new int[2] { 0, 1 }).ToArray();
  36. var negative_labels = negative_examples.Select(x => new int[2] { 1, 0 }).ToArray();
  37. var y = np.concatenate(new int[][][] { positive_labels, negative_labels });
  38. return (x_text.ToArray(), y);
  39. }
  40. private static string clean_str(string str)
  41. {
  42. str = Regex.Replace(str, @"[^A-Za-z0-9(),!?\'\`]", " ");
  43. str = Regex.Replace(str, @"\'s", " \'s");
  44. return str;
  45. }
  46. }
  47. }

tensorflow框架的.NET版本,提供了丰富的特性和API,可以借此很方便地在.NET平台下搭建深度学习训练与推理流程。