You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

GradientsTest.cs 21 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using Microsoft.VisualStudio.TestTools.UnitTesting;
  6. using NumSharp;
  7. using Tensorflow;
  8. using static Tensorflow.Python;
  9. namespace TensorFlowNET.UnitTest.gradients_test
  10. {
  11. [TestClass]
  12. public class GradientsTest : PythonTest
  13. {
  14. [Ignore("TODO")]
  15. [TestMethod]
  16. public void testGradients()
  17. {
  18. with(tf.Graph().as_default(), g =>
  19. {
  20. var inp = tf.constant(1.0, shape: new[] { 32, 100 }, name: "in");
  21. var w = tf.constant(1.0, shape: new[] { 100, 10 }, name: "w");
  22. var b = tf.constant(1.0, shape: new[] { 10 }, name: "b");
  23. var xw = math_ops.matmul(inp, w, name: "xw");
  24. var h = nn_ops.bias_add(xw, b, name: "h");
  25. var w_grad = gradients_impl.gradients(new[] { h }, new[] { w })[0];
  26. self.assertEquals("MatMul", w_grad.op.type);
  27. // TODO: Operation._original_op
  28. //self.assertEquals(w_grad.op._original_op, xw.op);
  29. self.assertTrue((bool)w_grad.op.get_attr("transpose_a"));
  30. self.assertFalse((bool)w_grad.op.get_attr("transpose_b"));
  31. });
  32. }
  33. [TestMethod]
  34. public void testBatchMatMulGradient()
  35. {
  36. var a = tf.constant(np.array(Enumerable.Range(1, 18).Select(elem => (float)elem).ToArray()), shape:new []{2, 3, 3});
  37. var b = tf.divide(a, tf.constant(2.0f));
  38. var c = tf.batch_matmul(a, b);
  39. var g = tf.gradients(c, new[] {a, b}, stop_gradients: new[] {a, b});
  40. var checkG = new[]
  41. {
  42. 3.0f, 7.5f, 12.0f,
  43. 3.0f, 7.5f, 12.0f,
  44. 3.0f, 7.5f, 12.0f,
  45. 16.5f, 21.0f, 25.5f,
  46. 16.5f, 21.0f, 25.5f,
  47. 16.5f, 21.0f, 25.5f,
  48. 12.0f, 12.0f, 12.0f,
  49. 15.0f, 15.0f, 15.0f,
  50. 18.0f, 18.0f, 18.0f,
  51. 39.0f, 39.0f, 39.0f,
  52. 42.0f, 42.0f, 42.0f,
  53. 45.0f, 45.0f, 45.0f
  54. };
  55. using (var sess = tf.Session())
  56. {
  57. var result = sess.run(g);
  58. var resultList = result[0].GetData<float>().ToList();
  59. resultList.AddRange(result[1].GetData<float>());
  60. Console.WriteLine(result.ToString());
  61. CollectionAssert.AreEqual(resultList.ToArray(), checkG);
  62. }
  63. }
  64. [Ignore("TODO")]
  65. [TestMethod]
  66. public void testUnusedOutput()
  67. {
  68. //def testUnusedOutput(self):
  69. // with ops.Graph().as_default():
  70. // w = constant(1.0, shape=[2, 2])
  71. // x = constant(1.0, shape=[2, 2])
  72. // wx = math_ops.matmul(w, x)
  73. // split_wx = array_ops.split(value=wx, num_or_size_splits=2, axis=0)
  74. // c = math_ops.reduce_sum(split_wx[1])
  75. // gw = gradients.gradients(c, [w])[0]
  76. // self.assertEquals("MatMul", gw.op.type)
  77. }
  78. [Ignore("TODO")]
  79. [TestMethod]
  80. public void testColocateGradients()
  81. {
  82. //def testColocateGradients(self):
  83. // with ops.Graph().as_default() as g:
  84. // w = constant(1.0, shape=[1, 1])
  85. // x = constant(1.0, shape=[1, 2])
  86. // with g.device("/device:GPU:0"):
  87. // wx = math_ops.matmul(w, x)
  88. // gw = gradients.gradients(wx, [w], colocate_gradients_with_ops=True)[0]
  89. // self.assertEqual(gw.op.colocation_groups(), wx.op.colocation_groups())
  90. }
  91. [Ignore("TODO")]
  92. [TestMethod]
  93. public void testColocateGradientsWithAggregation()
  94. {
  95. //def testColocateGradientsWithAggregation(self):
  96. // with ops.Graph().as_default() as g:
  97. // with g.device("/device:GPU:1"):
  98. // w = constant(1.0, shape=[1, 1])
  99. // x = constant(1.0, shape=[1, 2])
  100. // y = constant(1.0, shape=[1, 2])
  101. // wx = math_ops.matmul(w, x)
  102. // wy = math_ops.matmul(w, y)
  103. // with g.device("/device:GPU:0"):
  104. // z = wx + wy
  105. // gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0]
  106. // self.assertEqual(gw1.op.colocation_groups(), wx.op.colocation_groups())
  107. // gw2 = gradients.gradients(z, [w], colocate_gradients_with_ops=False)[0]
  108. // self.assertTrue(wx.op.colocation_groups() != gw2.op.colocation_groups())
  109. }
  110. [Ignore("TODO")]
  111. [TestMethod]
  112. public void testColocateGradientsWithAggregationInMultipleDevices()
  113. {
  114. //def testColocateGradientsWithAggregationInMultipleDevices(self):
  115. // with ops.Graph().as_default() as g:
  116. // with g.device("/device:GPU:1"):
  117. // w = constant(1.0, shape=[1, 1])
  118. // x = constant(1.0, shape=[1, 2])
  119. // y = constant(1.0, shape=[1, 2])
  120. // with g.device("/task:1"):
  121. // wx = math_ops.matmul(w, x)
  122. // with g.device("/task:2"):
  123. // wy = math_ops.matmul(w, y)
  124. // with g.device("/device:GPU:0"):
  125. // z = wx + wy
  126. // gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0]
  127. // self.assertEqual(gw1.op.colocation_groups(), w.op.colocation_groups())
  128. // gw2 = gradients.gradients(z, [w], colocate_gradients_with_ops=False)[0]
  129. // self.assertTrue(w.op.colocation_groups() != gw2.op.colocation_groups())
  130. }
  131. [Ignore("TODO")]
  132. [TestMethod]
  133. public void testColocateGradientsWithGateGradients()
  134. {
  135. //def testColocateGradientsWithGateGradients(self):
  136. // if not test_util.is_gpu_available():
  137. // self.skipTest("No GPU available")
  138. // with ops.Graph().as_default() as g:
  139. // with g.device("/device:CPU:0"):
  140. // x = constant(1.0, shape=[1, 1])
  141. // y = constant(1.0, shape=[1, 1])
  142. // s = x + y
  143. // with g.device("/device:GPU:0"):
  144. // z = math_ops.reduce_sum(s)
  145. // gz_x = gradients.gradients(z, [x], colocate_gradients_with_ops=True,
  146. // gate_gradients=True)[0]
  147. // with session.Session():
  148. // # Make sure the placer doesn't complain.
  149. // self.evaluate(gz_x)
  150. }
  151. [Ignore("TODO")]
  152. [TestMethod]
  153. public void testBoundaryStop()
  154. {
  155. //def testBoundaryStop(self):
  156. // # Test that we don't differentiate 'x'. The gradient function for 'x' is
  157. // # set explicitly to None so we will get an exception if the gradient code
  158. // # tries to differentiate 'x'.
  159. // with ops.Graph().as_default():
  160. // c = constant(1.0)
  161. // x = array_ops.identity(c)
  162. // y = x + 1.0
  163. // z = y + 1
  164. // grads = gradients.gradients(z, [x])
  165. // self.assertTrue(all(x is not None for x in grads))
  166. }
  167. [Ignore("TODO")]
  168. [TestMethod]
  169. public void testBoundaryContinue()
  170. {
  171. //@test_util.run_v1_only("b/120545219")
  172. //def testBoundaryContinue(self):
  173. // # Test that we differentiate both 'x' and 'y' correctly when x is a
  174. // # predecessor of y.
  175. // with self.cached_session():
  176. // x = constant(1.0)
  177. // y = x * 2.0
  178. // z = y * 3.0
  179. // grads = gradients.gradients(z, [x, y])
  180. // self.assertTrue(all(x is not None for x in grads))
  181. // self.assertEqual(6.0, grads[0].eval())
  182. }
  183. [Ignore("TODO")]
  184. [TestMethod]
  185. public void testAggregationMethodAccumulateN()
  186. {
  187. //@test_util.run_v1_only("b/120545219")
  188. //def testAggregationMethodAccumulateN(self):
  189. // with self.cached_session():
  190. // x = constant(1.0)
  191. // y = x * 2.0
  192. // z = y + y + y + y + y + y + y + y + y + y
  193. // grads = gradients.gradients(
  194. // z, [x, y],
  195. // aggregation_method=gradients.AggregationMethod.
  196. // EXPERIMENTAL_ACCUMULATE_N)
  197. // self.assertTrue(all(x is not None for x in grads))
  198. // self.assertEqual(20.0, grads[0].eval())
  199. // self.assertEqual(10.0, grads[1].eval())
  200. }
  201. [Ignore("TODO")]
  202. [TestMethod]
  203. public void testAggregationMethodAddN()
  204. {
  205. //@test_util.run_v1_only("b/120545219")
  206. //def testAggregationMethodAddN(self):
  207. // with self.cached_session():
  208. // x = constant(1.0)
  209. // y = x * 2.0
  210. // z = y + y + y + y + y + y + y + y + y + y
  211. // grads = gradients.gradients(
  212. // z, [x, y], aggregation_method=gradients.AggregationMethod.ADD_N)
  213. // self.assertTrue(all(x is not None for x in grads))
  214. // self.assertEqual(20.0, grads[0].eval())
  215. // self.assertEqual(10.0, grads[1].eval())
  216. }
  217. [Ignore("TODO")]
  218. [TestMethod]
  219. public void testAggregationMethodTree()
  220. {
  221. //@test_util.run_v1_only("b/120545219")
  222. //def testAggregationMethodTree(self):
  223. // with self.cached_session():
  224. // x = constant(1.0)
  225. // y = x * 2.0
  226. // z = y + y + y + y + y + y + y + y + y + y
  227. // grads = gradients.gradients(
  228. // z, [x, y],
  229. // aggregation_method=gradients.AggregationMethod.EXPERIMENTAL_TREE)
  230. // self.assertTrue(all(x is not None for x in grads))
  231. // self.assertEqual(20.0, grads[0].eval())
  232. // self.assertEqual(10.0, grads[1].eval())
  233. }
  234. [Ignore("TODO")]
  235. [TestMethod]
  236. public void testNoGradientForStringOutputs()
  237. {
  238. //def testNoGradientForStringOutputs(self):
  239. // with ops.Graph().as_default():
  240. // def _TestOpGrad(_, float_grad, string_grad):
  241. // """Gradient function for TestStringOutput."""
  242. // self.assertEquals(float_grad.dtype, dtypes.float32)
  243. // self.assertFalse(string_grad)
  244. // return float_grad
  245. // ops.RegisterGradient("TestStringOutput")(_TestOpGrad)
  246. // c = constant(1.0)
  247. // x, _ = test_ops.test_string_output(c)
  248. // z = x * 2.0
  249. // w = z * 3.0
  250. // grads = gradients.gradients(z, [c])
  251. // self.assertTrue(isinstance(grads[0], ops.Tensor))
  252. // grads = gradients.gradients(w, [c])
  253. // self.assertTrue(isinstance(grads[0], ops.Tensor))
  254. }
  255. [Ignore("TODO")]
  256. [TestMethod]
  257. public void testSingletonIndexedSlices()
  258. {
  259. //def testSingletonIndexedSlices(self):
  260. // with ops.Graph().as_default():
  261. // x = array_ops.placeholder(dtypes.float32)
  262. // y = array_ops.identity(x)
  263. // dy = ops.IndexedSlices(
  264. // array_ops.placeholder(dtypes.float32),
  265. // array_ops.placeholder(dtypes.int32))
  266. // dx, = gradients.gradients(y, x, grad_ys=dy)
  267. // # The IndexedSlices gradient of tf.identity is the identity map.
  268. // with self.cached_session() as sess:
  269. // vdx, vdy = sess.run(
  270. // [dx, dy], feed_dict={x: [1.0], dy.indices: [0], dy.values: [2.0]})
  271. // self.assertEqual(vdx, vdy)
  272. }
  273. [Ignore("TODO")]
  274. [TestMethod]
  275. public void testNonDifferentiableSwitchInWhileLoop()
  276. {
  277. //@test_util.run_v1_only("b/120545219")
  278. //def testNonDifferentiableSwitchInWhileLoop(self):
  279. // with ops.Graph().as_default():
  280. // v = array_ops.placeholder(dtypes.float32, [])
  281. // def _Step(i, a, ta):
  282. // a += math_ops.cast(v, dtypes.int32)
  283. // return (i + 1, a, ta.write(i, a))
  284. // n = 4
  285. // i, _, ta = control_flow_ops.while_loop(
  286. // lambda i, *_: i < n,
  287. // _Step, [0, 0, tensor_array_ops.TensorArray(
  288. // dtypes.int32, size=n)])
  289. // target = ta.read(i - 1)
  290. // grad, = gradients.gradients(target, v)
  291. // self.assertIsNone(grad)
  292. }
  293. [Ignore("TODO")]
  294. [TestMethod]
  295. public void testVariableReadValueGradient()
  296. {
  297. //def testVariableReadValueGradient(self):
  298. // with ops.Graph().as_default():
  299. // init = constant_op.constant(100.0)
  300. // var = variables.Variable(init)
  301. // gradient = gradients.gradients(var.read_value(), var)
  302. // self.assertIsNotNone(gradient)
  303. }
  304. [Ignore("TODO")]
  305. [TestMethod]
  306. public void testVariableAsGraphElementGradient()
  307. {
  308. //def testVariableAsGraphElementGradient(self):
  309. // with ops.Graph().as_default() as graph:
  310. // init = constant_op.constant(100.0)
  311. // var = variables.Variable(init)
  312. // gradient = gradients.gradients(graph.as_graph_element(var), var)
  313. // self.assertIsNotNone(gradient)
  314. }
  315. [Ignore("TODO")]
  316. [TestMethod]
  317. public void testVariableRefGradient()
  318. {
  319. //@test_util.run_v1_only("b/120545219")
  320. //def testVariableRefGradient(self):
  321. // with ops.Graph().as_default():
  322. // init = constant_op.constant(100.0)
  323. // var = variables.VariableV1(init)
  324. // gradient = gradients.gradients(var._ref(), var)
  325. // self.assertIsNotNone(gradient)
  326. }
  327. [Ignore("TODO")]
  328. [TestMethod]
  329. public void testDependentYs()
  330. {
  331. //@test_util.run_v1_only("b/120545219")
  332. //def testDependentYs(self):
  333. // with self.cached_session():
  334. // x = constant_op.constant(3.0)
  335. // y = math_ops.square(x)
  336. // y1 = math_ops.square(y)
  337. // y2 = math_ops.square(y1)
  338. // g = gradients.gradients([y, y2], x)
  339. // self.assertAllClose(17502.0, g[0].eval())
  340. // g = gradients.gradients(y + y2, x)
  341. // self.assertAllClose(17502.0, g[0].eval())
  342. // z = array_ops.identity(y)
  343. // z2 = array_ops.identity(y2)
  344. // g = gradients.gradients([z, z2], x)
  345. // self.assertAllClose(17502.0, g[0].eval())
  346. }
  347. [Ignore("TODO")]
  348. [TestMethod]
  349. public void testPartialDerivatives()
  350. {
  351. //@test_util.run_v1_only("b/120545219")
  352. //def testPartialDerivatives(self):
  353. // with self.cached_session():
  354. // x = constant_op.constant(1.)
  355. // y = 2 * x
  356. // z = x + y
  357. // totalg = gradients.gradients(z, [x, y])
  358. // self.assertEqual([3.0, 1.0], [g.eval() for g in totalg])
  359. // partialg = gradients.gradients(z, [x, y], stop_gradients=[x, y])
  360. // self.assertEqual([1.0, 1.0], [g.eval() for g in partialg])
  361. }
  362. [Ignore("TODO")]
  363. [TestMethod]
  364. public void testStopGradients()
  365. {
  366. //@test_util.run_v1_only("b/120545219")
  367. //def testStopGradients(self):
  368. // def _MakeGraph(rng, stop_gradients=()):
  369. // def _FunctionOf(xs, k=3):
  370. // return ops.convert_to_tensor(
  371. // sum(math_ops.matmul(rng.rand(k, k), x) for x in xs)
  372. // + rng.rand(k, k))
  373. // a = _FunctionOf([])
  374. // if "a" in stop_gradients: a = array_ops.stop_gradient(a)
  375. // b = _FunctionOf([a])
  376. // if "b" in stop_gradients: b = array_ops.stop_gradient(b)
  377. // c = _FunctionOf([a, b])
  378. // if "c" in stop_gradients: c = array_ops.stop_gradient(c)
  379. // d = _FunctionOf([b, c])
  380. // if "d" in stop_gradients: d = array_ops.stop_gradient(d)
  381. // return dict(a=a, b=b, c=c, d=d)
  382. // def _Gradients(ys, xs, **kwargs):
  383. // dydxs = gradients.gradients(ys, xs, **kwargs)
  384. // dydxs = [0. * x if dydx is None else dydx
  385. // for x, dydx in zip(xs, dydxs)]
  386. // return dydxs
  387. // seed = np.random.randint(1000)
  388. // cases = []
  389. // subsets = [""] + "a b c d ab ac ad bc bd cd abc abd acd bcd abcd".split()
  390. // graph = _MakeGraph(np.random.RandomState(seed))
  391. // for constants in subsets:
  392. // graph_with_stops = _MakeGraph(np.random.RandomState(seed), constants)
  393. // for variables_ in subsets:
  394. // # compute the gradient when stopped using tf.stop_gradients
  395. // grad1 = _Gradients([graph_with_stops["d"]],
  396. // [graph_with_stops[v] for v in variables_])
  397. // # compute the gradient when stopped using the stop_gradients kwarg
  398. // grad2 = _Gradients([graph["d"]],
  399. // [graph[v] for v in variables_],
  400. // stop_gradients=[graph[v] for v in constants])
  401. // cases.append(dict(grad1=grad1, grad2=grad2,
  402. // constants=constants, variables=variables_))
  403. // # evaluate all tensors in one call to session.run for speed
  404. // with self.cached_session() as sess:
  405. // results = sess.run([(case["grad1"], case["grad2"]) for case in cases])
  406. // for (npgrad1, npgrad2), case in zip(results, cases):
  407. // for a, b in zip(npgrad1, npgrad2):
  408. // np.testing.assert_allclose(a, b)
  409. }
  410. [Ignore("TODO")]
  411. [TestMethod]
  412. public void testUnconnectedGradientsNoneUnconnectedGradients()
  413. {
  414. //def testUnconnectedGradientsNoneUnconnectedGradients(self):
  415. // with ops.Graph().as_default():
  416. // x = constant(1.0, shape=[2, 2])
  417. // y = constant(3.0, shape=[3, 1])
  418. // grad = gradients.gradients(
  419. // [y], [x], unconnected_gradients="none")
  420. // self.assertIsNone(grad[0])
  421. }
  422. [Ignore("TODO")]
  423. [TestMethod]
  424. public void testUnconnectedGradientsZerosUnconnectedGradients()
  425. {
  426. //def testUnconnectedGradientsZerosUnconnectedGradients(self):
  427. // with ops.Graph().as_default():
  428. // x = constant(1.0, shape=[2, 2])
  429. // y = constant(3.0, shape=[3, 1])
  430. // grads = gradients.gradients(
  431. // [y], [x], unconnected_gradients="zero")
  432. // with self.cached_session() as sess:
  433. // self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], self.evaluate(grads)[0])
  434. }
  435. [Ignore("TODO")]
  436. [TestMethod]
  437. public void testUnconnectedGradientsZeroConnectedGradients()
  438. {
  439. //def testUnconnectedGradientsZeroConnectedGradients(self):
  440. // with ops.Graph().as_default():
  441. // x = constant(1.0)
  442. // y = x * 3.0
  443. // grad = gradients.gradients(
  444. // [y], [x], unconnected_gradients="zero")
  445. // with self.cached_session() as sess:
  446. // self.assertEquals(3.0, self.evaluate(grad)[0])
  447. }
  448. [Ignore("TODO")]
  449. [TestMethod]
  450. public void testUnknownUnconnectedGradientsValueGiven()
  451. {
  452. //def testUnknownUnconnectedGradientsValueGiven(self):
  453. // with ops.Graph().as_default():
  454. // x = constant(1.0)
  455. // y = constant(1.0)
  456. // with self.assertRaisesRegexp(
  457. // ValueError, "Unknown value for unconnected_gradients: 'nonsense'"):
  458. // gradients.gradients([y], [x], unconnected_gradients="nonsense")
  459. }
  460. /*
  461. */
  462. }
  463. }