You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

AttentionTest.cs 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. using Microsoft.VisualStudio.TestTools.UnitTesting;
  2. using System;
  3. using System.Collections.Generic;
  4. using Tensorflow.NumPy;
  5. using static Tensorflow.Binding;
  6. using static Tensorflow.KerasApi;
  7. using Tensorflow.Keras.Layers;
  8. using Tensorflow;
  9. using Tensorflow.Keras.ArgsDefinition;
  10. using Tensorflow.Keras.Utils;
  11. namespace TensorFlowNET.Keras.UnitTest
  12. {
  13. [TestClass]
  14. public class AttentionTest : EagerModeTestBase
  15. {
  16. #region BaseDenseAttention
  17. [TestMethod]
  18. public void test_one_dim_with_mask()
  19. {
  20. // Scores tensor of shape [1, 1, 1]
  21. var scores = np.array(new[, ,] { { { 1.1f } } }, dtype: np.float32);
  22. // Value tensor of shape [1, 1, 1]
  23. var v = np.array(new[, ,] { { { 1.6f } } }, dtype: np.float32);
  24. // Scores mask tensor of shape [1, 1, 1]
  25. var scores_mask = np.array(new[, ,] { { { true } } }, dtype: np.@bool);
  26. var _tup_1 = new BaseDenseAttention(new())._apply_scores(scores: scores, value: v, scores_mask: scores_mask);
  27. var actual = _tup_1.Item1;
  28. var actual_scores = _tup_1.Item2;
  29. // Expected softmax_scores = [[[1]]]
  30. var expected_scores = np.array(new[, ,] { { { 1f } } }, dtype: np.float32);
  31. Assert.AreEqual(expected_scores, actual_scores.numpy());
  32. // Expected tensor of shape [1, 1, 1].
  33. // expected000 = softmax_scores[0, 0] * 1.6 = 1.6
  34. var expected = np.array(new[, ,] { { { 1.6f } } }, dtype: np.float32);
  35. Assert.AreEqual(expected, actual.numpy());
  36. }
  37. [TestMethod]
  38. public void test_one_dim_no_mask()
  39. {
  40. // Scores tensor of shape [1, 1, 1]
  41. var scores = np.array(new[, ,] { { { 1.1f } } }, dtype: np.float32);
  42. // Value tensor of shape [1, 1, 1]
  43. var v = np.array(new[, ,] { { { 1.6f } } }, dtype: np.float32);
  44. var _tup_1 = new BaseDenseAttention(new())._apply_scores(scores: scores, value: v);
  45. var actual = _tup_1.Item1;
  46. var actual_scores = _tup_1.Item2;
  47. // Expected softmax_scores = [[[1]]]
  48. var expected_scores = np.array(new[, ,] { { { 1f } } }, dtype: np.float32);
  49. Assert.AreEqual(expected_scores, actual_scores.numpy());
  50. // Expected tensor of shape [1, 1, 1].
  51. // expected000 = softmax_scores[0, 0] * 1.6 = 1.6
  52. var expected = np.array(new[, ,] { { { 1.6f } } }, dtype: np.float32);
  53. Assert.AreEqual(expected, actual.numpy());
  54. }
  55. [TestMethod]
  56. public void test_multi_dim_with_mask()
  57. {
  58. // Scores tensor of shape [1, 1, 3]
  59. var scores = np.array(new[, ,] { { { 1f, 0f, 1f } } }, dtype: np.float32);
  60. // Value tensor of shape [1, 3, 1]
  61. var v = np.array(new[, ,] { { { 1.6f }, { 0.7f }, { -0.8f } } }, dtype: np.float32);
  62. // Scores mask tensor of shape [1, 1, 3]
  63. var scores_mask = np.array(new[, ,] { { { true, true, false } } }, dtype: np.@bool);
  64. var _tup_1 = new BaseDenseAttention(new())._apply_scores(scores: scores, value: v, scores_mask: scores_mask);
  65. var actual = _tup_1.Item1;
  66. var actual_scores = _tup_1.Item2;
  67. // Expected softmax scores = softmax(scores) with zeros in positions where
  68. // v_mask == False.
  69. // => softmax_scores000 = exp(1)/(exp(1) + exp(0)) = 0.73105857863
  70. // softmax_scores001 = exp(0)/(exp(1) + exp(0)) = 0.26894142137
  71. // softmax_scores002 = 0
  72. var expected_scores = np.array(new[, ,] { { { 0.73105857863f, 0.26894142137f, 0f } } }, dtype: np.float32);
  73. Assert.AreEqual(expected_scores, actual_scores.numpy());
  74. // Expected tensor of shape [1, 1, 1].
  75. // expected000 = 0.73105857863 * 1.6 + 0.26894142137 * 0.7 - 0 * 0.8
  76. // = 1.35795272077
  77. //Actually the output is 1.3579528
  78. var expected = np.array(new[, ,] { { { 1.3579528f } } }, dtype: np.float32);
  79. Assert.AreEqual(expected, actual.numpy());
  80. }
  81. [TestMethod]
  82. public void test_multi_dim_no_mask()
  83. {
  84. // Scores tensor of shape [1, 1, 3]
  85. var scores = np.array(new[, ,] { { { 1f, 0f, 1f } } }, dtype: np.float32);
  86. // Value tensor of shape [1, 3, 1]
  87. var v = np.array(new[, ,] { { { 1.6f }, { 0.7f }, { -0.8f } } }, dtype: np.float32);
  88. var _tup_1 = new BaseDenseAttention(new())._apply_scores(scores: scores, value: v);
  89. var actual = _tup_1.Item1;
  90. var actual_scores = _tup_1.Item2;
  91. // Expected softmax_scores = softmax(scores).
  92. // => softmax_scores000 = exp(1)/(exp(1) + exp(0) + exp(1))
  93. // = 0.42231879825
  94. // softmax_scores001 = exp(0)/(exp(1) + exp(0) + exp(1))
  95. // = 0.15536240349
  96. // softmax_scores002 = exp(1)/(exp(1) + exp(0) + exp(1))
  97. // = 0.42231879825
  98. //Actually the output is 0.42231882, 0.15536241, 0.42231882
  99. var expected_scores = np.array(new[, ,] { { { 0.42231882f, 0.15536241f, 0.42231882f } } }, dtype: np.float32);
  100. Assert.AreEqual(expected_scores, actual_scores.numpy());
  101. // Expected tensor of shape [1, 1, 1].
  102. // expected000 = 0.42231879825 * 1.6 + 0.15536240349 * 0.7
  103. // - 0.42231879825 * 0.8
  104. // = 0.44660872104
  105. //Actually the output is 0.44660875
  106. var expected = np.array(new[, ,] { { { 0.44660875f } } }, dtype: np.float32);
  107. Assert.AreEqual(expected, actual.numpy());
  108. }
  109. [TestMethod]
  110. public void test_one_dim_batch_size_two()
  111. {
  112. // Scores tensor of shape [2, 1, 1]
  113. var scores = np.array(new[, ,] { { { 1.1f } }, { { 2.1f } } }, dtype: np.float32);
  114. // Value tensor of shape [2, 1, 1]
  115. var v = np.array(new[, ,] { { { 1.6f } }, { { 2.6f } } }, dtype: np.float32);
  116. // Scpres mask tensor of shape [2, 1, 1]
  117. var scores_mask = np.array(new[, ,] { { { true } }, { { true } } }, dtype: np.@bool);
  118. var _tup_1 = new BaseDenseAttention(new())._apply_scores(scores: scores, value: v, scores_mask: scores_mask);
  119. var actual = _tup_1.Item1;
  120. var actual_scores = _tup_1.Item2;
  121. // Expected softmax_scores = [[[1]], [[1]]]
  122. var expected_scores = np.array(new[, ,] { { { 1f } }, { { 1f } } }, dtype: np.float32);
  123. Assert.AreEqual(expected_scores, actual_scores.numpy());
  124. // Expected tensor of shape [2, 1, 1].
  125. // expected000 = softmax_scores[0, 0] * 1.6 = 1.6
  126. // expected100 = softmax_scores[1, 0] * 2.6 = 2.6
  127. var expected = np.array(new[, ,] { { { 1.6f } }, { { 2.6f } } }, dtype: np.float32);
  128. Assert.AreEqual(expected, actual.numpy());
  129. }
  130. [TestMethod]
  131. public void test_shape_with_dropout()
  132. {
  133. // scores: Scores float tensor of shape `[batch_size, tq, tv]`.
  134. // value: Value tensor of shape `[batch_size, tv, dim]`.
  135. var batch_size = 4;
  136. var tq = 5;
  137. var tv = 6;
  138. var dim = 7;
  139. var scores = np.ones((batch_size, tq, tv));
  140. var value = np.ones((batch_size, tv, dim));
  141. var _tup_1 = new BaseDenseAttention(new BaseDenseAttentionArgs { dropout = 0.1f })
  142. ._apply_scores(scores: scores, value: value, training: false);
  143. var actual = _tup_1.Item1;
  144. var actual_scores = _tup_1.Item2;
  145. // Expected Tensor of shape `[batch_size, tq, tv]`.
  146. var expected_scores_shape = new[] {
  147. batch_size,
  148. tq,
  149. tv
  150. };
  151. Assert.AreEqual(expected_scores_shape, tf.shape(actual_scores).numpy());
  152. // Expected Tensor of shape `[batch_size, tq, dim]`.
  153. var expected_shape = new[] {
  154. batch_size,
  155. tq,
  156. dim
  157. };
  158. Assert.AreEqual(expected_shape, tf.shape(actual).numpy());
  159. }
  160. #endregion
  161. // ------------------------------------------------------------------
  162. #region Attention
  163. [TestMethod]
  164. public void test_example()
  165. {
  166. //Variable-length int sequences.
  167. var query_input = keras.Input((1000), dtype: TF_DataType.TF_INT32);
  168. var value_input = keras.Input((1000), dtype: TF_DataType.TF_INT32);
  169. // Embedding lookup.
  170. var token_embedding = keras.layers.Embedding(input_dim: 1000, output_dim: 64);
  171. // Query embeddings of shape [batch_size, Tq, dimension].
  172. var query_embeddings = token_embedding.Apply(query_input);
  173. // Value embeddings of shape [batch_size, Tv, dimension].
  174. var value_embeddings = token_embedding.Apply(value_input);
  175. // CNN layer.
  176. var cnn_layer = keras.layers.Conv1D(
  177. filters: 100,
  178. kernel_size: 4,
  179. // Use 'same' padding so outputs have the same shape as inputs.
  180. padding: "same",
  181. activation: "relu");
  182. var cnn_layer2 = keras.layers.Conv1D(
  183. filters: 100,
  184. kernel_size: 4,
  185. // Use 'same' padding so outputs have the same shape as inputs.
  186. padding: "same",
  187. activation: "relu");
  188. // Query encoding of shape [batch_size, Tq, filters].
  189. var query_seq_encoding = cnn_layer.Apply(query_embeddings);
  190. // Value encoding of shape [batch_size, Tv, filters].
  191. var value_seq_encoding = cnn_layer2.Apply(value_embeddings);
  192. // Query-value attention of shape [batch_size, Tq, filters].
  193. var query_value_attention_seq = keras.layers.Attention().Apply(
  194. (query_seq_encoding, value_seq_encoding));
  195. // Reduce over the sequence axis to produce encodings of shape
  196. // [batch_size, filters].
  197. var query_encoding = keras.layers.GlobalAveragePooling1D().Apply(
  198. query_seq_encoding);
  199. var query_value_attention = keras.layers.GlobalAveragePooling1D().Apply(
  200. query_value_attention_seq);
  201. // Concatenate query and document encodings to produce a DNN input layer.
  202. var input_layer = keras.layers.Concatenate().Apply(
  203. (query_encoding, query_value_attention));
  204. // Add DNN layers, and create Model.
  205. // ...
  206. }
  207. [TestMethod]
  208. public void test_calculate_scores_one_dim()
  209. {
  210. // Query tensor of shape [1, 1, 1]
  211. var q = np.array(new[,,] { { { 1.1f } } }, dtype: np.float32);
  212. // Key tensor of shape [1, 1, 1]
  213. var k = np.array(new[,,] { { { 1.6f } } }, dtype: np.float32);
  214. var attention_layer = keras.layers.Attention();
  215. //attention_layer.build((1));
  216. var actual = attention_layer._calculate_scores(query: q, key: k);
  217. // Expected tensor of shape [1, 1, 1].
  218. // expected000 = 1.1*1.6 = 1.76
  219. // Actually the output is 1.7600001
  220. var expected = np.array(new[,,] { { { 1.7600001f } } }, dtype: np.float32);
  221. Assert.AreEqual(expected, actual.numpy());
  222. }
  223. [TestMethod]
  224. public void test_calculate_scores_multi_dim()
  225. {
  226. // Query tensor of shape [1, 2, 4]
  227. var q = np.array(new[, ,] { {
  228. { 1f, 1.1f, 1.2f, 1.3f },
  229. { 2f, 2.1f, 2.2f, 2.3f }
  230. } }, dtype: np.float32);
  231. // Key tensor of shape [1, 3, 4]
  232. var k = np.array(new[, ,] { {
  233. { 1.5f, 1.6f, 1.7f, 1.8f },
  234. { 2.5f, 2.6f, 2.7f, 2.8f },
  235. { 3.5f, 3.6f, 3.7f, 3.8f }
  236. } }, dtype: np.float32);
  237. var attention_layer = keras.layers.Attention();
  238. //attention_layer.build(((1, 2, 4), (1, 3, 4)));
  239. var actual = attention_layer._calculate_scores(query: q, key: k);
  240. // Expected tensor of shape [1, 2, 3].
  241. // expected000 = 1.*1.5+1.1*1.6+1.2*1.7+1.3*1.8 = 7.64
  242. // expected001 = 1.*2.5+1.1*2.6+1.2*2.7+1.3*2.8 = 12.24
  243. // expected002 = 1.*3.5+1.1*3.6+1.2*3.7+1.3*3.8 = 16.84
  244. // expected010 = 2.*1.5+2.1*1.6+2.2*1.7+2.3*1.8 = 14.24
  245. // expected011 = 2.*2.5+2.1*2.6+2.2*2.7+2.3*2.8 = 22.84
  246. // expected012 = 2.*3.5+2.1*3.6+2.2*3.7+2.3*3.8 = 31.44
  247. // Actually the output000 is 7.6400003, the output012 is 31.439999
  248. var expected = np.array(new[, ,] { {
  249. { 7.6400003f, 12.24f, 16.84f },
  250. { 14.24f, 22.84f, 31.439999f }
  251. } }, dtype: np.float32);
  252. Assert.AreEqual(expected, actual.numpy());
  253. }
  254. [TestMethod]
  255. public void test_calculate_scores_multi_dim_concat()
  256. {
  257. // Query tensor of shape [1, 2, 4]
  258. var q = np.array(new[, ,] { {
  259. { 1f, 1.1f, 1.2f, 1.3f },
  260. { 2f, 2.1f, 2.2f, 2.3f }
  261. } }, dtype: np.float32);
  262. // Key tensor of shape [1, 3, 4]
  263. var k = np.array(new[, ,] { {
  264. { 1.5f, 1.6f, 1.7f, 1.8f },
  265. { 2.5f, 2.6f, 2.7f, 2.8f },
  266. { 3.5f, 3.6f, 3.7f, 3.8f }
  267. } }, dtype: np.float32);
  268. var attention_layer = keras.layers.Attention(score_mode: "concat");
  269. //attention_layer.concat_score_weight = 1;
  270. attention_layer.concat_score_weight = base_layer_utils.make_variable(new VariableArgs() {
  271. Name = "concat_score_weight",
  272. Shape = (1),
  273. DType = TF_DataType.TF_FLOAT,
  274. Getter = base_layer_utils.make_variable,
  275. Overwrite = true,
  276. Initializer = tf.ones_initializer,
  277. Synchronization = VariableSynchronization.Auto,
  278. Aggregation = VariableAggregation.None,
  279. Trainable = true
  280. });
  281. //attention_layer.build(((1, 2, 4), (1, 3, 4)));
  282. //var actual = keras.backend.get_value(attention_layer._calculate_scores(query: q, key: k));
  283. var actual = attention_layer._calculate_scores(query: q, key: k);
  284. // pylint:disable=line-too-long
  285. // expected000 = tanh(1.+1.5) + tanh(1.1+1.6) + tanh(1.2+1.7) + tanh(1.3+1.8) = 3.96753427840
  286. // expected001 = tanh(1.+2.5) + tanh(1.1+2.6) + tanh(1.2+2.7) + tanh(1.3+2.8) = 3.99558784825
  287. // expected002 = tanh(1.+3.5) + tanh(1.1+3.6) + tanh(1.2+3.7) + tanh(1.3+3.8) = 3.99940254147
  288. // expected010 = tanh(2.+1.5) + tanh(2.1+1.6) + tanh(2.2+1.7) + tanh(2.3+1.8) = 3.99558784825
  289. // expected011 = tanh(2.+2.5) + tanh(2.1+2.6) + tanh(2.2+2.7) + tanh(2.3+2.8) = 3.99940254147
  290. // expected012 = tanh(2.+3.5) + tanh(2.1+3.6) + tanh(2.2+3.7) + tanh(2.3+3.8) = 3.99991913657
  291. //Actually the output012 is 3.9999194
  292. var expected = np.array(new[, ,] { {
  293. { 3.96753427840f, 3.99558784825f, 3.99940254147f },
  294. { 3.99558784825f, 3.99940254147f, 3.9999194f }
  295. } }, dtype: np.float32);
  296. Assert.AreEqual(expected, actual.numpy());
  297. }
  298. #endregion
  299. }
  300. }