You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

NaiveBayesClassifier.cs 8.2 kB

6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
6 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Text;
  4. using Tensorflow;
  5. using NumSharp;
  6. using System.Linq;
  7. using static Tensorflow.Python;
  8. namespace TensorFlowNET.Examples
  9. {
  10. /// <summary>
  11. /// https://github.com/nicolov/naive_bayes_tensorflow
  12. /// </summary>
  13. public class NaiveBayesClassifier : IExample
  14. {
  15. public int Priority => 6;
  16. public bool Enabled { get; set; } = true;
  17. public string Name => "Naive Bayes Classifier";
  18. public bool ImportGraph { get; set; } = false;
  19. public NDArray X, y;
  20. public Normal dist { get; set; }
  21. public bool Run()
  22. {
  23. PrepareData();
  24. fit(X, y);
  25. // Create a regular grid and classify each point
  26. float x_min = X.amin(0).Data<float>(0) - 0.5f;
  27. float y_min = X.amin(0).Data<float>(1) - 0.5f;
  28. float x_max = X.amax(0).Data<float>(0) + 0.5f;
  29. float y_max = X.amax(0).Data<float>(1) + 0.5f;
  30. var (xx, yy) = np.meshgrid(np.linspace(x_min, x_max, 30), np.linspace(y_min, y_max, 30));
  31. with(tf.Session(), sess =>
  32. {
  33. var samples = np.hstack<float>(xx.ravel().reshape(xx.size, 1), yy.ravel().reshape(yy.size, 1));
  34. var Z = sess.run(predict(samples));
  35. });
  36. return true;
  37. }
  38. public void fit(NDArray X, NDArray y)
  39. {
  40. var unique_y = y.unique<int>();
  41. var dic = new Dictionary<int, List<List<float>>>();
  42. // Init uy in dic
  43. foreach (int uy in unique_y.Data<int>())
  44. {
  45. dic.Add(uy, new List<List<float>>());
  46. }
  47. // Separate training points by class
  48. // Shape : nb_classes * nb_samples * nb_features
  49. int maxCount = 0;
  50. for (int i = 0; i < y.size; i++)
  51. {
  52. var curClass = y[i];
  53. var l = dic[curClass];
  54. var pair = new List<float>();
  55. pair.Add(X[i,0]);
  56. pair.Add(X[i, 1]);
  57. l.Add(pair);
  58. if (l.Count > maxCount)
  59. {
  60. maxCount = l.Count;
  61. }
  62. dic[curClass] = l;
  63. }
  64. float[,,] points = new float[dic.Count, maxCount, X.shape[1]];
  65. foreach (KeyValuePair<int, List<List<float>>> kv in dic)
  66. {
  67. int j = (int) kv.Key;
  68. for (int i = 0; i < maxCount; i++)
  69. {
  70. for (int k = 0; k < X.shape[1]; k++)
  71. {
  72. points[j, i, k] = kv.Value[i][k];
  73. }
  74. }
  75. }
  76. var points_by_class = np.array(points);
  77. // estimate mean and variance for each class / feature
  78. // shape : nb_classes * nb_features
  79. var cons = tf.constant(points_by_class);
  80. var tup = tf.nn.moments(cons, new int[]{1});
  81. var mean = tup.Item1;
  82. var variance = tup.Item2;
  83. // Create a 3x2 univariate normal distribution with the
  84. // Known mean and variance
  85. var dist = tf.distributions.Normal(mean, tf.sqrt(variance));
  86. this.dist = dist;
  87. }
  88. public Tensor predict (NDArray X)
  89. {
  90. if (dist == null)
  91. {
  92. throw new ArgumentNullException("cant not find the model (normal distribution)!");
  93. }
  94. int nb_classes = (int) dist.scale().shape[0];
  95. int nb_features = (int)dist.scale().shape[1];
  96. // Conditional probabilities log P(x|c) with shape
  97. // (nb_samples, nb_classes)
  98. var t1= ops.convert_to_tensor(X, TF_DataType.TF_FLOAT);
  99. var t2 = ops.convert_to_tensor(new int[] { 1, nb_classes });
  100. Tensor tile = tf.tile(t1, t2);
  101. var t3 = ops.convert_to_tensor(new int[] { -1, nb_classes, nb_features });
  102. Tensor r = tf.reshape(tile, t3);
  103. var cond_probs = tf.reduce_sum(dist.log_prob(r), 2);
  104. // uniform priors
  105. float[] tem = new float[nb_classes];
  106. for (int i = 0; i < tem.Length; i++)
  107. {
  108. tem[i] = 1.0f / nb_classes;
  109. }
  110. var priors = np.log(np.array<float>(tem));
  111. // posterior log probability, log P(c) + log P(x|c)
  112. var joint_likelihood = tf.add(ops.convert_to_tensor(priors, TF_DataType.TF_FLOAT), cond_probs);
  113. // normalize to get (log)-probabilities
  114. var norm_factor = tf.reduce_logsumexp(joint_likelihood, new int[] { 1 }, keepdims: true);
  115. var log_prob = joint_likelihood - norm_factor;
  116. // exp to get the actual probabilities
  117. return tf.exp(log_prob);
  118. }
  119. public void PrepareData()
  120. {
  121. #region Training data
  122. X = np.array(new float[,] {
  123. {5.1f, 3.5f}, {4.9f, 3.0f}, {4.7f, 3.2f}, {4.6f, 3.1f}, {5.0f, 3.6f}, {5.4f, 3.9f},
  124. {4.6f, 3.4f}, {5.0f, 3.4f}, {4.4f, 2.9f}, {4.9f, 3.1f}, {5.4f, 3.7f}, {4.8f, 3.4f},
  125. {4.8f, 3.0f}, {4.3f, 3.0f}, {5.8f, 4.0f}, {5.7f, 4.4f}, {5.4f, 3.9f}, {5.1f, 3.5f},
  126. {5.7f, 3.8f}, {5.1f, 3.8f}, {5.4f, 3.4f}, {5.1f, 3.7f}, {5.1f, 3.3f}, {4.8f, 3.4f},
  127. {5.0f, 3.0f}, {5.0f, 3.4f}, {5.2f, 3.5f}, {5.2f, 3.4f}, {4.7f, 3.2f}, {4.8f, 3.1f},
  128. {5.4f, 3.4f}, {5.2f, 4.1f}, {5.5f, 4.2f}, {4.9f, 3.1f}, {5.0f, 3.2f}, {5.5f, 3.5f},
  129. {4.9f, 3.6f}, {4.4f, 3.0f}, {5.1f, 3.4f}, {5.0f, 3.5f}, {4.5f, 2.3f}, {4.4f, 3.2f},
  130. {5.0f, 3.5f}, {5.1f, 3.8f}, {4.8f, 3.0f}, {5.1f, 3.8f}, {4.6f, 3.2f}, {5.3f, 3.7f},
  131. {5.0f, 3.3f}, {7.0f, 3.2f}, {6.4f, 3.2f}, {6.9f, 3.1f}, {5.5f, 2.3f}, {6.5f, 2.8f},
  132. {5.7f, 2.8f}, {6.3f, 3.3f}, {4.9f, 2.4f}, {6.6f, 2.9f}, {5.2f, 2.7f}, {5.0f, 2.0f},
  133. {5.9f, 3.0f}, {6.0f, 2.2f}, {6.1f, 2.9f}, {5.6f, 2.9f}, {6.7f, 3.1f}, {5.6f, 3.0f},
  134. {5.8f, 2.7f}, {6.2f, 2.2f}, {5.6f, 2.5f}, {5.9f, 3.0f}, {6.1f, 2.8f}, {6.3f, 2.5f},
  135. {6.1f, 2.8f}, {6.4f, 2.9f}, {6.6f, 3.0f}, {6.8f, 2.8f}, {6.7f, 3.0f}, {6.0f, 2.9f},
  136. {5.7f, 2.6f}, {5.5f, 2.4f}, {5.5f, 2.4f}, {5.8f, 2.7f}, {6.0f, 2.7f}, {5.4f, 3.0f},
  137. {6.0f, 3.4f}, {6.7f, 3.1f}, {6.3f, 2.3f}, {5.6f, 3.0f}, {5.5f, 2.5f}, {5.5f, 2.6f},
  138. {6.1f, 3.0f}, {5.8f, 2.6f}, {5.0f, 2.3f}, {5.6f, 2.7f}, {5.7f, 3.0f}, {5.7f, 2.9f},
  139. {6.2f, 2.9f}, {5.1f, 2.5f}, {5.7f, 2.8f}, {6.3f, 3.3f}, {5.8f, 2.7f}, {7.1f, 3.0f},
  140. {6.3f, 2.9f}, {6.5f, 3.0f}, {7.6f, 3.0f}, {4.9f, 2.5f}, {7.3f, 2.9f}, {6.7f, 2.5f},
  141. {7.2f, 3.6f}, {6.5f, 3.2f}, {6.4f, 2.7f}, {6.8f, 3.0f}, {5.7f, 2.5f}, {5.8f, 2.8f},
  142. {6.4f, 3.2f}, {6.5f, 3.0f}, {7.7f, 3.8f}, {7.7f, 2.6f}, {6.0f, 2.2f}, {6.9f, 3.2f},
  143. {5.6f, 2.8f}, {7.7f, 2.8f}, {6.3f, 2.7f}, {6.7f, 3.3f}, {7.2f, 3.2f}, {6.2f, 2.8f},
  144. {6.1f, 3.0f}, {6.4f, 2.8f}, {7.2f, 3.0f}, {7.4f, 2.8f}, {7.9f, 3.8f}, {6.4f, 2.8f},
  145. {6.3f, 2.8f}, {6.1f, 2.6f}, {7.7f, 3.0f}, {6.3f, 3.4f}, {6.4f, 3.1f}, {6.0f, 3.0f},
  146. {6.9f, 3.1f}, {6.7f, 3.1f}, {6.9f, 3.1f}, {5.8f, 2.7f}, {6.8f, 3.2f}, {6.7f, 3.3f},
  147. {6.7f, 3.0f}, {6.3f, 2.5f}, {6.5f, 3.0f}, {6.2f, 3.4f}, {5.9f, 3.0f}, {5.8f, 3.0f}});
  148. y = np.array(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  149. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  150. 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  151. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  152. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  153. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  154. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
  155. #endregion
  156. }
  157. }
  158. }