You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

AttentionTest.cs 8.6 kB

3 years ago
3 years ago
3 years ago
3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. using Microsoft.VisualStudio.TestTools.UnitTesting;
  2. using System;
  3. using System.Collections.Generic;
  4. using Tensorflow.NumPy;
  5. using static Tensorflow.Binding;
  6. using static Tensorflow.KerasApi;
  7. using Tensorflow.Keras.Layers;
  8. using Tensorflow;
  9. using Tensorflow.Keras.ArgsDefinition;
  10. using Tensorflow.Keras.Utils;
  11. namespace TensorFlowNET.Keras.UnitTest
  12. {
  13. [TestClass]
  14. public class AttentionTest : EagerModeTestBase
  15. {
  16. #region BaseDenseAttention
  17. [TestMethod]
  18. public void test_multi_dim_with_mask()
  19. {
  20. // Scores tensor of shape [1, 1, 3]
  21. var scores = np.array(new[, ,] { { { 1f, 0f, 1f } } }, dtype: np.float32);
  22. // Value tensor of shape [1, 3, 1]
  23. var v = np.array(new[, ,] { { { 1.6f }, { 0.7f }, { -0.8f } } }, dtype: np.float32);
  24. // Scores mask tensor of shape [1, 1, 3]
  25. var scores_mask = np.array(new[, ,] { { { true, true, false } } }, dtype: np.@bool);
  26. var _tup_1 = new BaseDenseAttention(new())._apply_scores(scores: scores, value: v, scores_mask: scores_mask);
  27. var actual = _tup_1.Item1;
  28. var actual_scores = _tup_1.Item2;
  29. // Expected softmax scores = softmax(scores) with zeros in positions where
  30. // v_mask == False.
  31. // => softmax_scores000 = exp(1)/(exp(1) + exp(0)) = 0.73105857863
  32. // softmax_scores001 = exp(0)/(exp(1) + exp(0)) = 0.26894142137
  33. // softmax_scores002 = 0
  34. var expected_scores = np.array(new[, ,] { { { 0.73105857863f, 0.26894142137f, 0f } } }, dtype: np.float32);
  35. Assert.AreEqual(expected_scores, actual_scores.numpy());
  36. // Expected tensor of shape [1, 1, 1].
  37. // expected000 = 0.73105857863 * 1.6 + 0.26894142137 * 0.7 - 0 * 0.8
  38. // = 1.35795272077
  39. //Actually the output is 1.3579528
  40. var expected = np.array(new[, ,] { { { 1.3579528f } } }, dtype: np.float32);
  41. Assert.AreEqual(expected, actual.numpy());
  42. }
  43. [TestMethod]
  44. public void test_one_dim_batch_size_two()
  45. {
  46. // Scores tensor of shape [2, 1, 1]
  47. var scores = np.array(new[, ,] { { { 1.1f } }, { { 2.1f } } }, dtype: np.float32);
  48. // Value tensor of shape [2, 1, 1]
  49. var v = np.array(new[, ,] { { { 1.6f } }, { { 2.6f } } }, dtype: np.float32);
  50. // Scpres mask tensor of shape [2, 1, 1]
  51. var scores_mask = np.array(new[, ,] { { { true } }, { { true } } }, dtype: np.@bool);
  52. var _tup_1 = new BaseDenseAttention(new())._apply_scores(scores: scores, value: v, scores_mask: scores_mask);
  53. var actual = _tup_1.Item1;
  54. var actual_scores = _tup_1.Item2;
  55. // Expected softmax_scores = [[[1]], [[1]]]
  56. var expected_scores = np.array(new[, ,] { { { 1f } }, { { 1f } } }, dtype: np.float32);
  57. Assert.AreEqual(expected_scores, actual_scores.numpy());
  58. // Expected tensor of shape [2, 1, 1].
  59. // expected000 = softmax_scores[0, 0] * 1.6 = 1.6
  60. // expected100 = softmax_scores[1, 0] * 2.6 = 2.6
  61. var expected = np.array(new[, ,] { { { 1.6f } }, { { 2.6f } } }, dtype: np.float32);
  62. Assert.AreEqual(expected, actual.numpy());
  63. }
  64. #endregion
  65. // ------------------------------------------------------------------
  66. #region Attention
  67. [TestMethod]
  68. public void test_calculate_scores_multi_dim()
  69. {
  70. // Query tensor of shape [1, 2, 4]
  71. var q = np.array(new[, ,] { {
  72. { 1f, 1.1f, 1.2f, 1.3f },
  73. { 2f, 2.1f, 2.2f, 2.3f }
  74. } }, dtype: np.float32);
  75. // Key tensor of shape [1, 3, 4]
  76. var k = np.array(new[, ,] { {
  77. { 1.5f, 1.6f, 1.7f, 1.8f },
  78. { 2.5f, 2.6f, 2.7f, 2.8f },
  79. { 3.5f, 3.6f, 3.7f, 3.8f }
  80. } }, dtype: np.float32);
  81. var attention_layer = keras.layers.Attention();
  82. //attention_layer.build(((1, 2, 4), (1, 3, 4)));
  83. var actual = attention_layer._calculate_scores(query: q, key: k);
  84. // Expected tensor of shape [1, 2, 3].
  85. // expected000 = 1.*1.5+1.1*1.6+1.2*1.7+1.3*1.8 = 7.64
  86. // expected001 = 1.*2.5+1.1*2.6+1.2*2.7+1.3*2.8 = 12.24
  87. // expected002 = 1.*3.5+1.1*3.6+1.2*3.7+1.3*3.8 = 16.84
  88. // expected010 = 2.*1.5+2.1*1.6+2.2*1.7+2.3*1.8 = 14.24
  89. // expected011 = 2.*2.5+2.1*2.6+2.2*2.7+2.3*2.8 = 22.84
  90. // expected012 = 2.*3.5+2.1*3.6+2.2*3.7+2.3*3.8 = 31.44
  91. // Actually the output000 is 7.6400003, the output012 is 31.439999
  92. var expected = np.array(new[, ,] { {
  93. { 7.6400003f, 12.24f, 16.84f },
  94. { 14.24f, 22.84f, 31.439999f }
  95. } }, dtype: np.float32);
  96. Assert.AreEqual(expected, actual.numpy());
  97. }
  98. [TestMethod]
  99. public void test_calculate_scores_multi_dim_concat()
  100. {
  101. // Query tensor of shape [1, 2, 4]
  102. var q = np.array(new[, ,] { {
  103. { 1f, 1.1f, 1.2f, 1.3f },
  104. { 2f, 2.1f, 2.2f, 2.3f }
  105. } }, dtype: np.float32);
  106. // Key tensor of shape [1, 3, 4]
  107. var k = np.array(new[, ,] { {
  108. { 1.5f, 1.6f, 1.7f, 1.8f },
  109. { 2.5f, 2.6f, 2.7f, 2.8f },
  110. { 3.5f, 3.6f, 3.7f, 3.8f }
  111. } }, dtype: np.float32);
  112. var attention_layer = keras.layers.Attention(score_mode: "concat");
  113. //attention_layer.concat_score_weight = 1;
  114. attention_layer.concat_score_weight = base_layer_utils.make_variable(new VariableArgs() {
  115. Name = "concat_score_weight",
  116. Shape = (1),
  117. DType = TF_DataType.TF_FLOAT,
  118. Getter = base_layer_utils.make_variable,
  119. Overwrite = true,
  120. Initializer = tf.ones_initializer,
  121. Synchronization = VariableSynchronization.Auto,
  122. Aggregation = VariableAggregation.None,
  123. Trainable = true
  124. });
  125. //attention_layer.build(((1, 2, 4), (1, 3, 4)));
  126. //var actual = keras.backend.get_value(attention_layer._calculate_scores(query: q, key: k));
  127. var actual = attention_layer._calculate_scores(query: q, key: k);
  128. // pylint:disable=line-too-long
  129. // expected000 = tanh(1.+1.5) + tanh(1.1+1.6) + tanh(1.2+1.7) + tanh(1.3+1.8) = 3.96753427840
  130. // expected001 = tanh(1.+2.5) + tanh(1.1+2.6) + tanh(1.2+2.7) + tanh(1.3+2.8) = 3.99558784825
  131. // expected002 = tanh(1.+3.5) + tanh(1.1+3.6) + tanh(1.2+3.7) + tanh(1.3+3.8) = 3.99940254147
  132. // expected010 = tanh(2.+1.5) + tanh(2.1+1.6) + tanh(2.2+1.7) + tanh(2.3+1.8) = 3.99558784825
  133. // expected011 = tanh(2.+2.5) + tanh(2.1+2.6) + tanh(2.2+2.7) + tanh(2.3+2.8) = 3.99940254147
  134. // expected012 = tanh(2.+3.5) + tanh(2.1+3.6) + tanh(2.2+3.7) + tanh(2.3+3.8) = 3.99991913657
  135. //Actually the output012 is 3.9999194
  136. var expected = np.array(new[, ,] { {
  137. { 3.96753427840f, 3.99558784825f, 3.99940254147f },
  138. { 3.99558784825f, 3.99940254147f, 3.9999194f }
  139. } }, dtype: np.float32);
  140. Assert.AreEqual(expected, actual.numpy());
  141. }
  142. #endregion
  143. // ------------------------------------------------------------------
  144. #region MultiHeadAttention
  145. [TestMethod]
  146. public void test_masked_attention()
  147. {
  148. var batch_size = 3;
  149. var query = keras.Input(shape: (4, 8));
  150. var value = keras.Input(shape: (2, 8));
  151. var mask_tensor = keras.Input(shape:(4, 2));
  152. var attention_layer = keras.layers.MultiHeadAttention(num_heads: 2, key_dim: 2);
  153. attention_layer.Apply(new[] { query, value, mask_tensor });
  154. var from_data = 10 * np.random.randn(batch_size, 4, 8);
  155. var to_data = 10 * np.random.randn(batch_size, 2, 8);
  156. var mask_data = np.random.randint(2, size: (batch_size, 4, 2));
  157. var masked_output_data = attention_layer.Apply(new[] { from_data, to_data, mask_data });
  158. var null_mask_data = np.ones((batch_size, 4, 2));
  159. var unmasked_output_data = attention_layer.Apply(new[] { from_data, to_data, null_mask_data });
  160. Assert.AreNotEqual(masked_output_data, unmasked_output_data);
  161. }
  162. #endregion
  163. }
  164. }