You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

KMeansClustering.cs 5.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. using NumSharp;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Diagnostics;
  5. using System.Linq;
  6. using System.Text;
  7. using Tensorflow;
  8. using Tensorflow.Clustering;
  9. using TensorFlowNET.Examples.Utility;
  10. using static Tensorflow.Python;
  11. namespace TensorFlowNET.Examples
  12. {
  13. /// <summary>
  14. /// Implement K-Means algorithm with TensorFlow.NET, and apply it to classify
  15. /// handwritten digit images.
  16. /// https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/2_BasicModels/kmeans.py
  17. /// </summary>
  18. public class KMeansClustering : IExample
  19. {
  20. public int Priority => 8;
  21. public bool Enabled { get; set; } = true;
  22. public string Name => "K-means Clustering";
  23. public bool ImportGraph { get; set; } = true;
  24. public int? train_size = null;
  25. public int validation_size = 5000;
  26. public int? test_size = null;
  27. public int batch_size = 1024; // The number of samples per batch
  28. Datasets mnist;
  29. NDArray full_data_x;
  30. int num_steps = 20; // Total steps to train
  31. int k = 25; // The number of clusters
  32. int num_classes = 10; // The 10 digits
  33. int num_features = 784; // Each image is 28x28 pixels
  34. public bool Run()
  35. {
  36. PrepareData();
  37. var graph = tf.Graph().as_default();
  38. tf.train.import_meta_graph("graph/kmeans.meta");
  39. // Input images
  40. Tensor X = graph.get_operation_by_name("Placeholder"); // tf.placeholder(tf.float32, shape: new TensorShape(-1, num_features));
  41. // Labels (for assigning a label to a centroid and testing)
  42. Tensor Y = graph.get_operation_by_name("Placeholder_1"); // tf.placeholder(tf.float32, shape: new TensorShape(-1, num_classes));
  43. // K-Means Parameters
  44. //var kmeans = new KMeans(X, k, distance_metric: KMeans.COSINE_DISTANCE, use_mini_batch: true);
  45. // Build KMeans graph
  46. //var training_graph = kmeans.training_graph();
  47. var init_vars = tf.global_variables_initializer();
  48. Tensor init_op = graph.get_operation_by_name("cond/Merge");
  49. var train_op = graph.get_operation_by_name("group_deps");
  50. Tensor avg_distance = graph.get_operation_by_name("Mean");
  51. Tensor cluster_idx = graph.get_operation_by_name("Squeeze_1");
  52. NDArray result = null;
  53. with(tf.Session(graph), sess =>
  54. {
  55. sess.run(init_vars, new FeedItem(X, full_data_x));
  56. sess.run(init_op, new FeedItem(X, full_data_x));
  57. // Training
  58. var sw = new Stopwatch();
  59. foreach (var i in range(1, num_steps + 1))
  60. {
  61. sw.Restart();
  62. result = sess.run(new ITensorOrOperation[] { train_op, avg_distance, cluster_idx }, new FeedItem(X, full_data_x));
  63. sw.Stop();
  64. if (i % 4 == 0 || i == 1)
  65. print($"Step {i}, Avg Distance: {result[1]} Elapse: {sw.ElapsedMilliseconds}ms");
  66. }
  67. var idx = result[2].Data<int>();
  68. // Assign a label to each centroid
  69. // Count total number of labels per centroid, using the label of each training
  70. // sample to their closest centroid (given by 'idx')
  71. var counts = np.zeros((k, num_classes), np.float32);
  72. sw.Start();
  73. foreach (var i in range(idx.Length))
  74. {
  75. var x = mnist.train.labels[i];
  76. counts[idx[i]] += x;
  77. }
  78. sw.Stop();
  79. print($"Assign a label to each centroid took {sw.ElapsedMilliseconds}ms");
  80. // Assign the most frequent label to the centroid
  81. var labels_map_array = np.argmax(counts, 1);
  82. var labels_map = tf.convert_to_tensor(labels_map_array);
  83. // Evaluation ops
  84. // Lookup: centroid_id -> label
  85. var cluster_label = tf.nn.embedding_lookup(labels_map, cluster_idx);
  86. // Compute accuracy
  87. var correct_prediction = tf.equal(cluster_label, tf.cast(tf.argmax(Y, 1), tf.int32));
  88. var cast = tf.cast(correct_prediction, tf.float32);
  89. var accuracy_op = tf.reduce_mean(cast);
  90. // Test Model
  91. var (test_x, test_y) = (mnist.test.images, mnist.test.labels);
  92. result = sess.run(accuracy_op, new FeedItem(X, test_x), new FeedItem(Y, test_y));
  93. print($"Test Accuracy: {result}");
  94. });
  95. return (float)result > 0.70;
  96. }
  97. public void PrepareData()
  98. {
  99. mnist = MnistDataSet.read_data_sets("mnist", one_hot: true, train_size: train_size, validation_size:validation_size, test_size:test_size);
  100. full_data_x = mnist.train.images;
  101. // download graph meta data
  102. string url = "https://raw.githubusercontent.com/SciSharp/TensorFlow.NET/master/graph/kmeans.meta";
  103. Web.Download(url, "graph", "kmeans.meta");
  104. }
  105. }
  106. }