You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

recurrent.py 28 kB

4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705
  1. #! /usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import numpy as np
  4. import tensorlayer as tl
  5. from tensorlayer import logging
  6. from tensorlayer.backend.ops.load_backend import BACKEND
  7. from tensorlayer.layers.core import Module
  8. __all__ = [
  9. 'RNN',
  10. 'RNNCell',
  11. 'GRU',
  12. 'LSTM',
  13. 'GRUCell',
  14. 'LSTMCell',
  15. ]
  16. class RNNCell(Module):
  17. """An Elman RNN cell with tanh or ReLU non-linearity.
  18. Parameters
  19. ----------
  20. input_size : int
  21. The number of expected features in the input `x`
  22. hidden_size : int
  23. The number of features in the hidden state `h`
  24. bias : bool
  25. If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`. Default: ``True``
  26. act : activation function
  27. The non-linearity to use. Can be either 'tanh' or 'relu'. Default: 'tanh'
  28. name : None or str
  29. A unique layer name
  30. --------------------------------------------------------
  31. inputs : tensor
  32. A tensor with shape `[batch_size, input_size]`.
  33. states : tensor or None
  34. A tensor with shape `[batch_size, hidden_size]`. When states is None, zero state is used. Defaults to None.
  35. Returns
  36. ----------
  37. outputs : tensor
  38. A tensor with shape `[batch_size, hidden_size]`.
  39. states : tensor
  40. A tensor with shape `[batch_size, hidden_size]`.
  41. Tensor containing the next hidden state for each element in the batch
  42. Examples
  43. --------
  44. With TensorLayer
  45. >>> input = tl.layers.Input([4, 16], name='input')
  46. >>> prev_h = tl.layers.Input([4,32])
  47. >>> cell = tl.layers.RNNCell(input_size=16, hidden_size=32, bias=True, act='tanh', name='rnncell_1')
  48. >>> y, h = cell(input, prev_h)
  49. >>> print(y.shape)
  50. """
  51. def __init__(
  52. self,
  53. input_size,
  54. hidden_size,
  55. bias=True,
  56. act='tanh',
  57. name=None,
  58. ):
  59. super(RNNCell, self).__init__(name)
  60. self.input_size = input_size
  61. self.hidden_size = hidden_size
  62. self.bias = bias
  63. if act not in ('relu', 'tanh'):
  64. raise ValueError("Activation should be 'tanh' or 'relu'.")
  65. self.act = act
  66. self.build(None)
  67. logging.info("RNNCell %s: input_size: %d hidden_size: %d act: %s" % (self.name, input_size, hidden_size, act))
  68. def __repr__(self):
  69. actstr = self.act
  70. s = ('{classname}(input_size={input_size}, hidden_size={hidden_size}')
  71. s += ', bias=True' if self.bias else ', bias=False'
  72. s += (',' + actstr)
  73. if self.name is not None:
  74. s += ', name=\'{name}\''
  75. s += ')'
  76. return s.format(classname=self.__class__.__name__, **self.__dict__)
  77. def check_input(self, input_shape):
  78. if input_shape[1] != self.input_size:
  79. raise ValueError(
  80. 'input should have consistent input_size. But got {}, expected {}'.format(
  81. input_shape[1], self.input_size
  82. )
  83. )
  84. def check_hidden(self, input_shape, h_shape, hidden_label):
  85. if input_shape[0] != h_shape[0]:
  86. raise ValueError(
  87. 'input batch size{} should match hidden{} batch size{}.'.format(
  88. input_shape[0], hidden_label, h_shape[0]
  89. )
  90. )
  91. if h_shape[1] != self.hidden_size:
  92. raise ValueError(
  93. 'hidden{} should have consistent hidden_size. But got {}, expected {}.'.format(
  94. hidden_label, h_shape[1], self.hidden_size
  95. )
  96. )
  97. def build(self, inputs_shape):
  98. stdv = 1.0 / np.sqrt(self.hidden_size)
  99. _init = tl.initializers.RandomUniform(minval=-stdv, maxval=stdv)
  100. self.weight_ih_shape = (self.hidden_size, self.input_size)
  101. self.weight_hh_shape = (self.hidden_size, self.hidden_size)
  102. self.weight_ih = self._get_weights("weight_ih", shape=self.weight_ih_shape, init=_init)
  103. self.weight_hh = self._get_weights("weight_hh", shape=self.weight_hh_shape, init=_init)
  104. if self.bias:
  105. self.bias_ih_shape = (self.hidden_size, )
  106. self.bias_hh_shape = (self.hidden_size, )
  107. self.bias_ih = self._get_weights('bias_ih', shape=self.bias_ih_shape, init=_init)
  108. self.bias_hh = self._get_weights('bias_hh', shape=self.bias_hh_shape, init=_init)
  109. else:
  110. self.bias_ih = None
  111. self.bias_hh = None
  112. self.rnncell = tl.ops.rnncell(
  113. weight_ih=self.weight_ih, weight_hh=self.weight_hh, bias_ih=self.bias_ih, bias_hh=self.bias_hh, act=self.act
  114. )
  115. def forward(self, inputs, states=None):
  116. input_shape = tl.get_tensor_shape(inputs)
  117. self.check_input(input_shape)
  118. if states is None:
  119. states = tl.zeros(shape=(input_shape[0], self.hidden_size), dtype=inputs.dtype)
  120. states_shape = tl.get_tensor_shape(states)
  121. self.check_hidden(input_shape, states_shape, hidden_label='h')
  122. output, states = self.rnncell(inputs, states)
  123. return output, states
  124. class LSTMCell(Module):
  125. """A long short-term memory (LSTM) cell.
  126. Parameters
  127. ----------
  128. input_size : int
  129. The number of expected features in the input `x`
  130. hidden_size : int
  131. The number of features in the hidden state `h`
  132. bias : bool
  133. If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`. Default: ``True``
  134. name : None or str
  135. A unique layer name
  136. --------------------------------------------------------
  137. inputs : tensor
  138. A tensor with shape `[batch_size, input_size]`.
  139. states : tuple or None
  140. A tuple of two tensor `(h, c)`, each of shape `[batch_size, hidden_size]`. When states is None, zero state is used. Defaults: None.
  141. Returns
  142. ----------
  143. outputs : tensor
  144. A tensor with shape `[batch_size, hidden_size]`.
  145. states : tensor
  146. A tuple of two tensor `(h, c)`, each of shape `[batch_size, hidden_size]`.
  147. Tensors containing the next hidden state and next cell state for each element in the batch.
  148. Examples
  149. --------
  150. With TensorLayer
  151. >>> input = tl.layers.Input([4, 16], name='input')
  152. >>> prev_h = tl.layers.Input([4,32])
  153. >>> prev_c = tl.layers.Input([4,32])
  154. >>> cell = tl.layers.LSTMCell(input_size=16, hidden_size=32, bias=True, name='lstmcell_1')
  155. >>> y, (h, c)= cell(input, (prev_h, prev_c))
  156. >>> print(y.shape)
  157. """
  158. def __init__(
  159. self,
  160. input_size,
  161. hidden_size,
  162. bias=True,
  163. name=None,
  164. ):
  165. super(LSTMCell, self).__init__(name)
  166. self.input_size = input_size
  167. self.hidden_size = hidden_size
  168. self.bias = bias
  169. self.build(None)
  170. logging.info("LSTMCell %s: input_size: %d hidden_size: %d " % (self.name, input_size, hidden_size))
  171. def __repr__(self):
  172. s = ('{classname}(input_size={input_size}, hidden_size={hidden_size}')
  173. s += ', bias=True' if self.bias else ', bias=False'
  174. if self.name is not None:
  175. s += ', name=\'{name}\''
  176. s += ')'
  177. return s.format(classname=self.__class__.__name__, **self.__dict__)
  178. def check_input(self, input_shape):
  179. if input_shape[1] != self.input_size:
  180. raise ValueError(
  181. 'input should have consistent input_size. But got {}, expected {}'.format(
  182. input_shape[1], self.input_size
  183. )
  184. )
  185. def check_hidden(self, input_shape, h_shape, hidden_label):
  186. if input_shape[0] != h_shape[0]:
  187. raise ValueError(
  188. 'input batch size{} should match hidden{} batch size{}.'.format(
  189. input_shape[0], hidden_label, h_shape[0]
  190. )
  191. )
  192. if h_shape[1] != self.hidden_size:
  193. raise ValueError(
  194. 'hidden{} should have consistent hidden_size. But got {}, expected {}.'.format(
  195. hidden_label, h_shape[1], self.hidden_size
  196. )
  197. )
  198. def build(self, inputs_shape):
  199. stdv = 1.0 / np.sqrt(self.hidden_size)
  200. _init = tl.initializers.RandomUniform(minval=-stdv, maxval=stdv)
  201. self.weight_ih_shape = (4 * self.hidden_size, self.input_size)
  202. self.weight_hh_shape = (4 * self.hidden_size, self.hidden_size)
  203. self.weight_ih = self._get_weights("weight_ih", shape=self.weight_ih_shape, init=_init)
  204. self.weight_hh = self._get_weights("weight_hh", shape=self.weight_hh_shape, init=_init)
  205. if self.bias:
  206. self.bias_ih_shape = (4 * self.hidden_size, )
  207. self.bias_hh_shape = (4 * self.hidden_size, )
  208. self.bias_ih = self._get_weights('bias_ih', shape=self.bias_ih_shape, init=_init)
  209. self.bias_hh = self._get_weights('bias_hh', shape=self.bias_hh_shape, init=_init)
  210. else:
  211. self.bias_ih = None
  212. self.bias_hh = None
  213. self.lstmcell = tl.ops.lstmcell(
  214. weight_ih=self.weight_ih, weight_hh=self.weight_hh, bias_ih=self.bias_ih, bias_hh=self.bias_hh
  215. )
  216. def forward(self, inputs, states=None):
  217. input_shape = tl.get_tensor_shape(inputs)
  218. self.check_input(input_shape)
  219. if states is not None:
  220. h, c = states
  221. else:
  222. h = tl.zeros(shape=(input_shape[0], self.hidden_size), dtype=inputs.dtype)
  223. c = tl.zeros(shape=(input_shape[0], self.hidden_size), dtype=inputs.dtype)
  224. h_shape = tl.get_tensor_shape(h)
  225. c_shape = tl.get_tensor_shape(c)
  226. self.check_hidden(input_shape, h_shape, hidden_label='h')
  227. self.check_hidden(input_shape, c_shape, hidden_label='c')
  228. output, new_h, new_c = self.lstmcell(inputs, h, c)
  229. return output, (new_h, new_c)
  230. class GRUCell(Module):
  231. """A gated recurrent unit (GRU) cell.
  232. Parameters
  233. ----------
  234. input_size : int
  235. The number of expected features in the input `x`
  236. hidden_size : int
  237. The number of features in the hidden state `h`
  238. bias : bool
  239. If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`. Default: ``True``
  240. name : None or str
  241. A unique layer name
  242. --------------------------------------------------------
  243. inputs : tensor
  244. A tensor with shape `[batch_size, input_size]`.
  245. states : tensor or None
  246. A tensor with shape `[batch_size, hidden_size]`. When states is None, zero state is used. Defaults: `None`.
  247. Returns
  248. ----------
  249. outputs : tensor
  250. A tensor with shape `[batch_size, hidden_size]`.
  251. states : tensor
  252. A tensor with shape `[batch_size, hidden_size]`.
  253. Tensor containing the next hidden state for each element in the batch
  254. Examples
  255. --------
  256. With TensorLayer
  257. >>> input = tl.layers.Input([4, 16], name='input')
  258. >>> prev_h = tl.layers.Input([4,32])
  259. >>> cell = tl.layers.GRUCell(input_size=16, hidden_size=32, bias=True, name='grucell_1')
  260. >>> y, h= cell(input, prev_h)
  261. >>> print(y.shape)
  262. """
  263. def __init__(
  264. self,
  265. input_size,
  266. hidden_size,
  267. bias=True,
  268. name=None,
  269. ):
  270. super(GRUCell, self).__init__(name)
  271. self.input_size = input_size
  272. self.hidden_size = hidden_size
  273. self.bias = bias
  274. self.build(None)
  275. logging.info("GRUCell %s: input_size: %d hidden_size: %d " % (self.name, input_size, hidden_size))
  276. def __repr__(self):
  277. s = ('{classname}(input_size={input_size}, hidden_size={hidden_size}')
  278. s += ', bias=True' if self.bias else ', bias=False'
  279. if self.name is not None:
  280. s += ', name=\'{name}\''
  281. s += ')'
  282. return s.format(classname=self.__class__.__name__, **self.__dict__)
  283. def check_input(self, input_shape):
  284. if input_shape[1] != self.input_size:
  285. raise ValueError(
  286. 'input should have consistent input_size. But got {}, expected {}'.format(
  287. input_shape[1], self.input_size
  288. )
  289. )
  290. def check_hidden(self, input_shape, h_shape, hidden_label):
  291. if input_shape[0] != h_shape[0]:
  292. raise ValueError(
  293. 'input batch size{} should match hidden{} batch size{}.'.format(
  294. input_shape[0], hidden_label, h_shape[0]
  295. )
  296. )
  297. if h_shape[1] != self.hidden_size:
  298. raise ValueError(
  299. 'hidden{} should have consistent hidden_size. But got {}, expected {}.'.format(
  300. hidden_label, h_shape[1], self.hidden_size
  301. )
  302. )
  303. def build(self, inputs_shape):
  304. stdv = 1.0 / np.sqrt(self.hidden_size)
  305. _init = tl.initializers.RandomUniform(minval=-stdv, maxval=stdv)
  306. self.weight_ih_shape = (3 * self.hidden_size, self.input_size)
  307. self.weight_hh_shape = (3 * self.hidden_size, self.hidden_size)
  308. self.weight_ih = self._get_weights("weight_ih", shape=self.weight_ih_shape, init=_init)
  309. self.weight_hh = self._get_weights("weight_hh", shape=self.weight_hh_shape, init=_init)
  310. if self.bias:
  311. self.bias_ih_shape = (3 * self.hidden_size, )
  312. self.bias_hh_shape = (3 * self.hidden_size, )
  313. self.bias_ih = self._get_weights('bias_ih', shape=self.bias_ih_shape, init=_init)
  314. self.bias_hh = self._get_weights('bias_hh', shape=self.bias_hh_shape, init=_init)
  315. else:
  316. self.bias_ih = None
  317. self.bias_hh = None
  318. self.grucell = tl.ops.grucell(
  319. weight_ih=self.weight_ih, weight_hh=self.weight_hh, bias_ih=self.bias_ih, bias_hh=self.bias_hh
  320. )
  321. def forward(self, inputs, states=None):
  322. input_shape = tl.get_tensor_shape(inputs)
  323. self.check_input(input_shape)
  324. if states is None:
  325. states = tl.zeros(shape=(input_shape[0], self.hidden_size), dtype=inputs.dtype)
  326. states_shape = tl.get_tensor_shape(states)
  327. self.check_hidden(input_shape, states_shape, hidden_label='h')
  328. output, states = self.grucell(inputs, states)
  329. return output, states
  330. class RNNBase(Module):
  331. """
  332. RNNBase class for RNN networks. It provides `forward` and other common methods for RNN, LSTM and GRU.
  333. """
  334. def __init__(
  335. self,
  336. mode,
  337. input_size,
  338. hidden_size,
  339. num_layers=1,
  340. bias=True,
  341. batch_first=False,
  342. dropout=0.0,
  343. bidirectional=False,
  344. name=None,
  345. ):
  346. super(RNNBase, self).__init__(name)
  347. self.mode = mode
  348. self.input_size = input_size
  349. self.hidden_size = hidden_size
  350. self.num_layers = num_layers
  351. self.bias = bias
  352. self.batch_first = batch_first
  353. self.dropout = dropout
  354. self.bidirectional = bidirectional
  355. self.build(None)
  356. logging.info(
  357. "%s: %s: input_size: %d hidden_size: %d num_layers: %d " %
  358. (self.mode, self.name, input_size, hidden_size, num_layers)
  359. )
  360. def __repr__(self):
  361. s = (
  362. '{classname}(input_size={input_size}, hidden_size={hidden_size}, num_layers={num_layers}'
  363. ', dropout={dropout}'
  364. )
  365. s += ', bias=True' if self.bias else ', bias=False'
  366. s += ', bidirectional=True' if self.bidirectional else ', bidirectional=False'
  367. if self.name is not None:
  368. s += ', name=\'{name}\''
  369. s += ')'
  370. return s.format(classname=self.__class__.__name__, **self.__dict__)
  371. def build(self, inputs_shape):
  372. if BACKEND == 'tensorflow':
  373. bidirect = 2 if self.bidirectional else 1
  374. self.weights_fw = []
  375. self.bias_fw = []
  376. self.weights_bw = []
  377. self.bias_bw = []
  378. stdv = 1.0 / np.sqrt(self.hidden_size)
  379. _init = tl.initializers.RandomUniform(minval=-stdv, maxval=stdv)
  380. if self.mode == 'LSTM':
  381. gate_size = 4 * self.hidden_size
  382. elif self.mode == 'GRU':
  383. gate_size = 3 * self.hidden_size
  384. else:
  385. gate_size = self.hidden_size
  386. for layer in range(self.num_layers):
  387. for direction in range(bidirect):
  388. layer_input_size = self.input_size if layer == 0 else self.hidden_size * bidirect
  389. if direction == 0:
  390. self.w_ih = self._get_weights(
  391. 'weight_ih_l' + str(layer), shape=(gate_size, layer_input_size), init=_init
  392. )
  393. self.w_hh = self._get_weights(
  394. 'weight_ih_l' + str(layer), shape=(gate_size, self.hidden_size), init=_init
  395. )
  396. self.weights_fw.append(self.w_ih)
  397. self.weights_fw.append(self.w_hh)
  398. if self.bias:
  399. self.b_ih = self._get_weights('bias_ih_l' + str(layer), shape=(gate_size, ), init=_init)
  400. self.b_hh = self._get_weights('bias_hh_l' + str(layer), shape=(gate_size, ), init=_init)
  401. self.bias_fw.append(self.b_ih)
  402. self.bias_fw.append(self.b_hh)
  403. else:
  404. self.w_ih = self._get_weights(
  405. 'weight_ih_l' + str(layer) + '_reverse', shape=(gate_size, layer_input_size), init=_init
  406. )
  407. self.w_hh = self._get_weights(
  408. 'weight_hh_l' + str(layer) + '_reverse', shape=(gate_size, self.hidden_size), init=_init
  409. )
  410. self.weights_bw.append(self.w_ih)
  411. self.weights_bw.append(self.w_hh)
  412. if self.bias:
  413. self.b_ih = self._get_weights(
  414. 'bias_ih_l' + str(layer) + '_reverse', shape=(gate_size, ), init=_init
  415. )
  416. self.b_hh = self._get_weights(
  417. 'bias_hh_l' + str(layer) + '_reverse', shape=(gate_size, ), init=_init
  418. )
  419. self.bias_bw.append(self.b_ih)
  420. self.bias_bw.append(self.b_hh)
  421. self.rnn = tl.ops.rnnbase(
  422. mode=self.mode, input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers,
  423. bias=self.bias, batch_first=self.batch_first, dropout=self.dropout, bidirectional=self.bidirectional,
  424. is_train=self.is_train, weights_fw=self.weights_fw, weights_bw=self.weights_bw, bias_fw=self.bias_fw,
  425. bias_bw=self.bias_bw
  426. )
  427. else:
  428. self.rnn = tl.ops.rnnbase(
  429. mode=self.mode,
  430. input_size=self.input_size,
  431. hidden_size=self.hidden_size,
  432. num_layers=self.num_layers,
  433. bias=self.bias,
  434. batch_first=self.batch_first,
  435. dropout=self.dropout,
  436. bidirectional=self.bidirectional,
  437. is_train=self.is_train,
  438. )
  439. def forward(self, input, states=None):
  440. output, new_states = self.rnn(input, states)
  441. return output, new_states
  442. class RNN(RNNBase):
  443. """Multilayer Elman network(RNN). It takes input sequences and initial
  444. states as inputs, and returns the output sequences and the final states.
  445. Parameters
  446. ----------
  447. input_size : int
  448. The number of expected features in the input `x`
  449. hidden_size : int
  450. The number of features in the hidden state `h`
  451. num_layers : int
  452. Number of recurrent layers. Default: 1
  453. bias : bool
  454. If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`. Default: ``True``
  455. batch_first : bool
  456. If ``True``, then the input and output tensors are provided as `[batch_size, seq, input_size]`, Default: ``False``
  457. dropout : float
  458. If non-zero, introduces a `Dropout` layer on the outputs of each RNN layer except the last layer,
  459. with dropout probability equal to `dropout`. Default: 0
  460. bidirectional : bool
  461. If ``True``, becomes a bidirectional RNN. Default: ``False``
  462. act : activation function
  463. The non-linearity to use. Can be either 'tanh' or 'relu'. Default: 'tanh'
  464. name : None or str
  465. A unique layer name
  466. --------------------------------------------------------
  467. inputs : tensor
  468. the input sequence. if `batch_first` is True, the shape is `[batch_size, seq, input_size]`, else, the shape is `[seq, batch_size, input_size]`.
  469. initial_states : tensor or None
  470. the initial states. The shape is `[num_layers * num_directions, batch_size, hidden_size]`.If initial_state is not given, zero initial states are used.
  471. If the RNN is Bidirectional, num_directions should be 2, else it should be 1. Default: None.
  472. Returns
  473. ----------
  474. outputs : tensor
  475. the output sequence. if `batch_first` is True, the shape is `[batch_size, seq, num_directions * hidden_size]`,
  476. else, the shape is `[seq, batch_size, num_directions * hidden_size]`.
  477. final_states : tensor
  478. final states. The shape is `[num_layers * num_directions, batch_size, hidden_size]`. Note that if the RNN is Bidirectional, the forward states are (0,2,4,6,...) and
  479. the backward states are (1,3,5,7,....).
  480. Examples
  481. --------
  482. With TensorLayer
  483. >>> input = tl.layers.Input([23, 32, 16], name='input')
  484. >>> prev_h = tl.layers.Input([4, 32, 32])
  485. >>> cell = tl.layers.RNN(input_size=16, hidden_size=32, bias=True, num_layers=2, bidirectional = True, act='tanh', batch_first=False, dropout=0, name='rnn_1')
  486. >>> y, h= cell(input, prev_h)
  487. >>> print(y.shape)
  488. """
  489. def __init__(
  490. self,
  491. input_size,
  492. hidden_size,
  493. num_layers=1,
  494. bias=True,
  495. batch_first=False,
  496. dropout=0.0,
  497. bidirectional=False,
  498. act='tanh',
  499. name=None,
  500. ):
  501. if act == 'tanh':
  502. mode = 'RNN_TANH'
  503. elif act == 'relu':
  504. mode = 'RNN_RELU'
  505. else:
  506. raise ValueError("act should be in ['tanh', 'relu'], but got {}.".format(act))
  507. super(RNN, self
  508. ).__init__(mode, input_size, hidden_size, num_layers, bias, batch_first, dropout, bidirectional, name)
  509. class LSTM(RNNBase):
  510. """Applies a multi-layer long short-term memory (LSTM) RNN to an input sequence.
  511. Parameters
  512. ----------
  513. input_size : int
  514. The number of expected features in the input `x`
  515. hidden_size : int
  516. The number of features in the hidden state `h`
  517. num_layers : int
  518. Number of recurrent layers. Default: 1
  519. bias : bool
  520. If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`. Default: ``True``
  521. batch_first : bool
  522. If ``True``, then the input and output tensors are provided as `[batch_size, seq, input_size]`, Default: ``False``
  523. dropout : float
  524. If non-zero, introduces a `Dropout` layer on the outputs of each LSTM layer except the last layer,
  525. with dropout probability equal to `dropout`. Default: 0
  526. bidirectional : bool
  527. If ``True``, becomes a bidirectional LSTM. Default: ``False``
  528. name : None or str
  529. A unique layer name
  530. --------------------------------------------------------
  531. inputs : tensor
  532. the input sequence. if `batch_first` is True, the shape is `[batch_size, seq, input_size]`, else, the shape is `[seq, batch_size, input_size]`.
  533. initial_states : tensor or None
  534. the initial states. A tuple of tensor (h, c), the shape of each is `[num_layers * num_directions, batch_size, hidden_size]`.If initial_state is not given, zero initial states are used.
  535. If the LSTM is Bidirectional, num_directions should be 2, else it should be 1. Default: None.
  536. Returns
  537. ----------
  538. outputs : tensor
  539. the output sequence. if `batch_first` is True, the shape is `[batch_size, seq, num_directions * hidden_size]`,
  540. else, the shape is `[seq, batch_size, num_directions * hidden_size]`.
  541. final_states : tensor
  542. final states. A tuple of two tensor. The shape of each is `[num_layers * num_directions, batch_size, hidden_size]`. Note that if the LSTM is Bidirectional, the forward states are (0,2,4,6,...) and
  543. the backward states are (1,3,5,7,....).
  544. Examples
  545. --------
  546. With TensorLayer
  547. >>> input = tl.layers.Input([23, 32, 16], name='input')
  548. >>> prev_h = tl.layers.Input([4, 32, 32])
  549. >>> prev_c = tl.layers.Input([4, 32, 32])
  550. >>> cell = tl.layers.LSTM(input_size=16, hidden_size=32, bias=True, num_layers=2, bidirectional = True, batch_first=False, dropout=0, name='lstm_1')
  551. >>> y, (h, c)= cell(input, (prev_h, prev_c))
  552. >>> print(y.shape)
  553. """
  554. def __init__(
  555. self,
  556. input_size,
  557. hidden_size,
  558. num_layers=1,
  559. bias=True,
  560. batch_first=False,
  561. dropout=0.0,
  562. bidirectional=False,
  563. name=None,
  564. ):
  565. super(LSTM, self
  566. ).__init__('LSTM', input_size, hidden_size, num_layers, bias, batch_first, dropout, bidirectional, name)
  567. class GRU(RNNBase):
  568. """Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence.
  569. Parameters
  570. ----------
  571. input_size : int
  572. The number of expected features in the input `x`
  573. hidden_size : int
  574. The number of features in the hidden state `h`
  575. num_layers : int
  576. Number of recurrent layers. Default: 1
  577. bias : bool
  578. If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`. Default: ``True``
  579. batch_first : bool
  580. If ``True``, then the input and output tensors are provided as `[batch_size, seq, input_size]`, Default: ``False``
  581. dropout : float
  582. If non-zero, introduces a `Dropout` layer on the outputs of each GRU layer except the last layer,
  583. with dropout probability equal to `dropout`. Default: 0
  584. bidirectional : bool
  585. If ``True``, becomes a bidirectional LSTM. Default: ``False``
  586. name : None or str
  587. A unique layer name
  588. --------------------------------------------------------
  589. inputs : tensor
  590. the input sequence. if `batch_first` is True, the shape is `[batch_size, seq, input_size]`, else, the shape is `[seq, batch_size, input_size]`.
  591. initial_states : tensor or None
  592. the initial states. A tuple of tensor (h, c), the shape of each is `[num_layers * num_directions, batch_size, hidden_size]`.If initial_state is not given, zero initial states are used.
  593. If the GRU is Bidirectional, num_directions should be 2, else it should be 1. Default: None.
  594. Returns
  595. ----------
  596. outputs : tensor
  597. the output sequence. if `batch_first` is True, the shape is `[batch_size, seq, num_directions * hidden_size]`,
  598. else, the shape is `[seq, batch_size, num_directions * hidden_size]`.
  599. final_states : tensor
  600. final states. A tuple of two tensor. The shape of each is `[num_layers * num_directions, batch_size, hidden_size]`. Note that if the GRU is Bidirectional, the forward states are (0,2,4,6,...) and
  601. the backward states are (1,3,5,7,....).
  602. Examples
  603. --------
  604. With TensorLayer
  605. >>> input = tl.layers.Input([23, 32, 16], name='input')
  606. >>> prev_h = tl.layers.Input([4, 32, 32])
  607. >>> cell = tl.layers.GRU(input_size=16, hidden_size=32, bias=True, num_layers=2, bidirectional = True, batch_first=False, dropout=0, name='GRU_1')
  608. >>> y, h= cell(input, prev_h)
  609. >>> print(y.shape)
  610. """
  611. def __init__(
  612. self,
  613. input_size,
  614. hidden_size,
  615. num_layers=1,
  616. bias=True,
  617. batch_first=False,
  618. dropout=0.0,
  619. bidirectional=False,
  620. name=None,
  621. ):
  622. super(GRU, self
  623. ).__init__('GRU', input_size, hidden_size, num_layers, bias, batch_first, dropout, bidirectional, name)

TensorLayer3.0 是一款兼容多种深度学习框架为计算后端的深度学习库。计划兼容TensorFlow, Pytorch, MindSpore, Paddle.