You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

paddle_optimizers.py 12 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. #! /usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. from __future__ import absolute_import, division, print_function
  4. import paddle
  5. from paddle.optimizer import Optimizer
  6. __all__ = ['Adadelta', 'Adagrad', 'Adam', 'Adamax', 'Ftrl', 'Nadam', 'RMSprop', 'SGD', 'Momentum', 'Lamb', 'LARS']
  7. class Adadelta(Optimizer):
  8. def __init__(self, learning_rate=0.001, epsilon=1.0e-6, rho=0.95):
  9. if learning_rate is None:
  10. raise ValueError('learn_rate is not set.')
  11. if epsilon is None:
  12. raise ValueError('epsilon is not set.')
  13. if rho is None:
  14. raise ValueError('rho is not set')
  15. self.learning_rate = learning_rate
  16. self.epsilon = epsilon
  17. self.rho = rho
  18. def gradient(self, loss, weights):
  19. if loss is None:
  20. raise ValueError('loss is not set.')
  21. if weights is None:
  22. raise ValueError('weights is not set.')
  23. self.adadelta = paddle.optimizer.Adadelta(
  24. learning_rate=self.learning_rate, epsilon=self.epsilon, rho=self.rho, parameters=weights
  25. )
  26. loss.backward()
  27. weights_and_grads = self.adadelta.backward(loss=loss, parameters=weights)
  28. return weights_and_grads
  29. def apply_gradients(self, weights_and_grads):
  30. if weights_and_grads is None:
  31. raise ValueError('weights_and_grads is not set.')
  32. self.adadelta._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
  33. self.adadelta.clear_grad()
  34. class Adagrad(Optimizer):
  35. def __init__(self, learning_rate, initial_accumulator_value=0.0, epsilon=1.0e-6):
  36. if learning_rate is None:
  37. raise ValueError('learning_rate is not set.')
  38. if initial_accumulator_value is None:
  39. raise ValueError('initial_accumulator_value is not set.')
  40. if epsilon is None:
  41. raise ValueError('epsilon is not set.')
  42. self.learning_rate = learning_rate
  43. self.initial_accumulator_value = initial_accumulator_value
  44. self.epsilon = epsilon
  45. def gradient(self, loss, weights):
  46. if loss is None:
  47. raise ValueError('loss is not set.')
  48. if weights is None:
  49. raise ValueError('weights is not set.')
  50. self.adagrad = paddle.optimizer.Adagrad(
  51. learning_rate=self.learning_rate, epsilon=self.epsilon,
  52. initial_accumulator_value=self.initial_accumulator_value, parameters=weights
  53. )
  54. loss.backward()
  55. weights_and_grads = self.adagrad.backward(loss=loss, parameters=weights)
  56. return weights_and_grads
  57. def apply_gradients(self, weights_and_grads):
  58. if weights_and_grads is None:
  59. raise ValueError('weights_and_grads is not set.')
  60. self.adagrad._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
  61. self.adagrad.clear_grad()
  62. class Adam(Optimizer):
  63. def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1.0e-8):
  64. if learning_rate is None:
  65. raise ValueError('learning_rate is not set.')
  66. if beta_1 is None:
  67. raise ValueError('beta_1 is not set.')
  68. if beta_2 is None:
  69. raise ValueError('beta_2 is not set.')
  70. if epsilon is None:
  71. raise ValueError('epsilon is not set.')
  72. if not 0 <= beta_1 < 1:
  73. raise ValueError("Invaild value of beta1, expect beta1 in [0,1).")
  74. if not 0 <= beta_2 < 1:
  75. raise ValueError("Invaild value of beta2, expect beta2 in [0,1).")
  76. self.learning_rate = learning_rate
  77. self.beta_1 = beta_1
  78. self.beta_2 = beta_2
  79. self.epsilon = epsilon
  80. def gradient(self, loss, weights):
  81. if loss is None:
  82. raise ValueError('loss is not set.')
  83. if weights is None:
  84. raise ValueError('weights is not set.')
  85. self.adam = paddle.optimizer.Adam(
  86. learning_rate=self.learning_rate, beta1=self.beta_1, beta2=self.beta_2, epsilon=self.epsilon,
  87. parameters=weights
  88. )
  89. loss.backward()
  90. weights_and_grads = self.adam.backward(loss, parameters=weights)
  91. return weights_and_grads
  92. def apply_gradients(self, weights_and_grads):
  93. if weights_and_grads is None:
  94. raise ValueError('weights_and_grads is not set.')
  95. self.adam._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
  96. self.adam.clear_grad()
  97. class Adamax(Optimizer):
  98. def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1.0e-8):
  99. if learning_rate is None:
  100. raise ValueError('learning_rate is not set.')
  101. if beta_1 is None:
  102. raise ValueError('beta_1 is not set.')
  103. if beta_2 is None:
  104. raise ValueError('beta_2 is not set.')
  105. if epsilon is None:
  106. raise ValueError('epsilon is not set.')
  107. if not 0 <= beta_1 < 1:
  108. raise ValueError("Invaild value of beta1, expect beta1 in [0,1).")
  109. if not 0 <= beta_2 < 1:
  110. raise ValueError("Invaild value of beta2, expect beta2 in [0,1).")
  111. self.learning_rate = learning_rate
  112. self.beta_1 = beta_1
  113. self.beta_2 = beta_2
  114. self.epsilon = epsilon
  115. def gradient(self, loss, weights):
  116. if loss is None:
  117. raise ValueError('loss is not set.')
  118. if weights is None:
  119. raise ValueError('weights is not set.')
  120. self.adamax = paddle.optimizer.Adamax(
  121. learning_rate=self.learning_rate, beta1=self.beta_1, beta2=self.beta_2, epsilon=self.epsilon,
  122. parameters=weights
  123. )
  124. loss.backward()
  125. weights_and_grads = self.adamax.backward(loss=loss, parameters=weights)
  126. return weights_and_grads
  127. def apply_gradients(self, weights_and_grads):
  128. if weights_and_grads is None:
  129. raise ValueError('weights_and_grads is not set.')
  130. self.adamax._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
  131. self.adamax.clear_grad()
  132. class Ftrl(Optimizer):
  133. def __init__(self):
  134. raise Exception('Ftrl optimizer function not implemented')
  135. class Nadam(Optimizer):
  136. def __init__(self):
  137. raise Exception('Nadam optimizer function not implemented')
  138. class RMSprop(Optimizer):
  139. def __init__(self, learning_rate=0.001, rho=0.95, epsilon=1.0e-6, momentum=0.0, centered=False):
  140. if learning_rate is None:
  141. raise ValueError("learning_rate is not set.")
  142. if rho is None:
  143. raise ValueError("rho is not set.")
  144. if epsilon is None:
  145. raise ValueError("epsilon is not set.")
  146. if momentum is None:
  147. raise ValueError("momentum is not set.")
  148. if not 0.0 <= epsilon:
  149. raise ValueError("Invalid value of epsilon, expect epsilon >= 0.")
  150. if not 0.0 <= momentum:
  151. raise ValueError("Invalid value of momentum, expect momentum >= 0.")
  152. if not 0.0 <= rho:
  153. raise ValueError("Invalid value of rho, expect rho >= 0.")
  154. self.learning_rate = learning_rate
  155. self.epsilon = epsilon
  156. self.rho = rho
  157. self.momentum = momentum
  158. self.centered = centered
  159. def gradient(self, loss, weights):
  160. if loss is None:
  161. raise ValueError('loss is not set.')
  162. if weights is None:
  163. raise ValueError('weights is not set.')
  164. self.rmsprop = paddle.optimizer.RMSProp(
  165. learning_rate=self.learning_rate, epsilon=self.epsilon, rho=self.rho, momentum=self.momentum,
  166. parameters=weights
  167. )
  168. loss.backward()
  169. weights_and_grads = self.rmsprop.backward(loss=loss, parameters=weights)
  170. return weights_and_grads
  171. def apply_gradients(self, weights_and_grads):
  172. if weights_and_grads is None:
  173. raise ValueError('weights_and_grads is not set.')
  174. self.rmsprop._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
  175. self.rmsprop.clear_grad()
  176. class SGD(Optimizer):
  177. def __init__(self, learning_rate=0.001):
  178. if learning_rate is None:
  179. raise ValueError("learning_rate is not set.")
  180. self.learning_rate = learning_rate
  181. def gradient(self, loss, weights):
  182. if loss is None:
  183. raise ValueError('loss is not set.')
  184. if weights is None:
  185. raise ValueError('weights is not set.')
  186. self.sgd = paddle.optimizer.SGD(learning_rate=self.learning_rate, parameters=weights)
  187. loss.backward()
  188. weights_and_grads = self.sgd.backward(loss=loss, parameters=weights)
  189. return weights_and_grads
  190. def apply_gradients(self, weights_and_grads):
  191. if weights_and_grads is None:
  192. raise ValueError('weights_and_grads is not set.')
  193. self.sgd._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
  194. self.sgd.clear_grad()
  195. class Momentum(Optimizer):
  196. def __init__(self, learning_rate=0.001, momentum=0.9, nesterov=False):
  197. if learning_rate is None:
  198. raise ValueError("learning_rate is not set")
  199. if momentum is None:
  200. raise ValueError("momentum is not set")
  201. self.learning_rate = learning_rate
  202. self.momentum = momentum
  203. self.nesterov = nesterov
  204. def gradient(self, loss, weights):
  205. if loss is None:
  206. raise ValueError('loss is not set.')
  207. if weights is None:
  208. raise ValueError('weights is not set.')
  209. self.moment = paddle.optimizer.Momentum(
  210. learning_rate=self.learning_rate, momentum=self.momentum, parameters=weights, use_nesterov=self.nesterov
  211. )
  212. loss.backward()
  213. weights_and_grads = self.moment.backward(loss=loss, parameters=weights)
  214. return weights_and_grads
  215. def apply_gradients(self, weights_and_grads):
  216. if weights_and_grads is None:
  217. raise ValueError('weights_and_grads is not set.')
  218. self.moment._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
  219. self.moment.clear_grad()
  220. class Lamb(Optimizer):
  221. def __init__(self, learning_rate=0.001, lamb_weight_decay=0.01, beta_1=0.9, beta_2=0.999, epsilon=1.0e-6):
  222. if learning_rate is None:
  223. raise ValueError('learning_rate is not set.')
  224. if lamb_weight_decay is None:
  225. raise ValueError('lamb_weight_decay is not set.')
  226. if beta_1 is None:
  227. raise ValueError('beta_1 is not set.')
  228. if beta_2 is None:
  229. raise ValueError('beta_2 is not set.')
  230. if epsilon is None:
  231. raise ValueError('epsilon is not set.')
  232. if not 0 <= beta_1 < 1:
  233. raise ValueError("Invaild value of beta1, expect beta1 in [0,1).")
  234. if not 0 <= beta_2 < 1:
  235. raise ValueError("Invaild value of beta2, expect beta2 in [0,1).")
  236. self.learning_rate = learning_rate
  237. self.lamb_weight_decay = lamb_weight_decay
  238. self.beta_1 = beta_1
  239. self.beta_2 = beta_2
  240. self.epsilon = epsilon
  241. def gradient(self, loss, weights):
  242. if loss is None:
  243. raise ValueError('loss is not set.')
  244. if weights is None:
  245. raise ValueError('weights is not set.')
  246. self.lamb = paddle.optimizer.Lamb(
  247. learning_rate=self.learning_rate, lamb_weight_decay=self.lamb_weight_decay, beta1=self.beta_1,
  248. beta2=self.beta_2, epsilon=self.epsilon, parameters=weights
  249. )
  250. loss.backward()
  251. weights_and_grads = self.lamb.backward(loss=loss, parameters=weights)
  252. return weights_and_grads
  253. def apply_gradients(self, weights_and_grads):
  254. if weights_and_grads is None:
  255. raise ValueError('weights_and_grads is not set.')
  256. self.lamb._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
  257. self.lamb.clear_grad()
  258. class LARS(Optimizer):
  259. def __init__(self):
  260. pass
  261. def gradient(self):
  262. pass
  263. def apply_gradients(self, weights_and_grads):
  264. raise Exception('LARS optimizer function not implemented')

TensorLayer3.0 是一款兼容多种深度学习框架为计算后端的深度学习库。计划兼容TensorFlow, Pytorch, MindSpore, Paddle.