You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

nn_training_ops.h 82 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_OP_TRAINING_OPS_H
  17. #define GE_OP_TRAINING_OPS_H
  18. #include "../graph/operator_reg.h"
  19. namespace ge {
  20. /**
  21. *@brief Updates "var" according to the AdaMax algorithm.\n
  22. * t-1 mean previous period.
  23. * m_t <- beta1 * m{t-1} + (1 - beta1) * grad\n
  24. * v_t <- max(beta2 * v{t-1}, abs(grad))\n
  25. * var <- var - lr / (1 - beta1^t) * m_t / (v_t + epsilon)
  26. *
  27. *@attention Constraints:\n
  28. * the input tensors must have the same shape.
  29. *
  30. *@par Inputs:
  31. *@li var: A mutable tensor. Must be one of the following types: TensorType::NumberType().
  32. * Should be from a Variable().
  33. *@li m: A mutable tensor. Has the same type as "var".
  34. * Should be from a Variable().
  35. *@li v: A mutable tensor. Has the same type as "var".
  36. * Should be from a Variable().
  37. *@li beta1_power: A scalar. Has the same type as "var".
  38. *@li lr: learning_rate. A scalar. Has the same type as "var".
  39. *@li beta1: A scalar. Has the same type as "var".
  40. *@li beta2: A scalar. Has the same type as "var".
  41. *@li epsilon: A scalar. Has the same type as "var".
  42. *@li grad: A tensor for the gradient. Has the same type as "var".
  43. *
  44. *@par Attributes:\n
  45. * use_locking: An optional bool. Defaults to "False".
  46. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  47. * by a lock; otherwise the behavior is undefined, but may exhibit less
  48. * contention.
  49. *
  50. *@par Outputs:
  51. * var: A mutable tensor. Has the same type as input "var".
  52. *
  53. */
  54. REG_OP(ApplyAdaMax)
  55. .INPUT(var, TensorType::NumberType())
  56. .INPUT(m, TensorType::NumberType())
  57. .INPUT(v, TensorType::NumberType())
  58. .INPUT(beta1_power, TensorType::NumberType())
  59. .INPUT(lr, TensorType::NumberType())
  60. .INPUT(beta1, TensorType::NumberType())
  61. .INPUT(beta2, TensorType::NumberType())
  62. .INPUT(epsilon, TensorType::NumberType())
  63. .INPUT(grad, TensorType::NumberType())
  64. .OUTPUT(var, TensorType::NumberType())
  65. .ATTR(use_locking, Bool, false)
  66. .OP_END_FACTORY_REG(ApplyAdaMax)
  67. /**
  68. *@brief Updates "var" according to the AdaMax algorithm.\n
  69. * t-1 mean previous period.
  70. * m_t <- beta1 * m{t-1} + (1 - beta1) * grad\n
  71. * v_t <- max(beta2 * v{t-1}, abs(grad))\n
  72. * var <- var - lr / (1 - beta1^t) * m_t / (v_t + epsilon)
  73. *
  74. *@attention Constraints:\n
  75. * the input tensors must have the same shape.
  76. *
  77. *@par Inputs:
  78. *@li var: A mutable tensor. Must be one of the following types: TensorType::NumberType().
  79. * Should be from a Variable().
  80. *@li m: A mutable tensor. Has the same type as "var".
  81. * Should be from a Variable().
  82. *@li v: A mutable tensor. Has the same type as "var".
  83. * Should be from a Variable().
  84. *@li beta1_power: A scalar. Has the same type as "var".
  85. *@li lr: learning_rate. A scalar. Has the same type as "var".
  86. *@li beta1: A scalar. Has the same type as "var".
  87. *@li beta2: A scalar. Has the same type as "var".
  88. *@li epsilon: A scalar. Has the same type as "var".
  89. *@li grad: A tensor for the gradient. Has the same type as "var".
  90. *
  91. *@par Attributes:\n
  92. * use_locking: An optional bool. Defaults to "False".
  93. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  94. * by a lock; otherwise the behavior is undefined, but may exhibit less
  95. * contention.
  96. *
  97. *@par Outputs:
  98. * var: A mutable tensor. Has the same type as input "var".
  99. *
  100. *
  101. */
  102. REG_OP(ApplyAdaMaxD)
  103. .INPUT(var, TensorType::NumberType())
  104. .INPUT(m, TensorType::NumberType())
  105. .INPUT(v, TensorType::NumberType())
  106. .INPUT(beta1_power, TensorType::NumberType())
  107. .INPUT(lr, TensorType::NumberType())
  108. .INPUT(beta1, TensorType::NumberType())
  109. .INPUT(beta2, TensorType::NumberType())
  110. .INPUT(epsilon, TensorType::NumberType())
  111. .INPUT(grad, TensorType::NumberType())
  112. .OUTPUT(var, TensorType::NumberType())
  113. .OUTPUT(m, TensorType::NumberType())
  114. .OUTPUT(v, TensorType::NumberType())
  115. .ATTR(use_locking, Bool, false)
  116. .OP_END_FACTORY_REG(ApplyAdaMaxD)
  117. /**
  118. *@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme.
  119. *@par Inputs:
  120. * Five inputs, including:
  121. *@li var: An NCHW, NHWC, or ND Tensor of type float32.
  122. *@li accum: An NCHW, NHWC, or ND Tensor of type float32.
  123. *@li lr: An NCHW, NHWC, or ND Tensor of type float32.
  124. *@li grad: An NCHW, NHWC, or ND Tensor of type float32.
  125. *@li indices: An NCHW, NHWC, or ND Tensor of type float32.
  126. *@par Attributes:
  127. *@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
  128. *@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False".
  129. *@par Outputs:
  130. *var: A Tensor. Has the same type and format as input "var".
  131. */
  132. REG_OP(SparseApplyAdagrad)
  133. .INPUT(var, TensorType({DT_FLOAT}))
  134. .INPUT(accum, TensorType({DT_FLOAT}))
  135. .INPUT(lr, TensorType({DT_FLOAT}))
  136. .INPUT(grad, TensorType({DT_FLOAT}))
  137. .INPUT(indices, TensorType({DT_INT32}))
  138. .OUTPUT(var, TensorType({DT_FLOAT}))
  139. .ATTR(use_locking, Bool, false)
  140. .ATTR(update_slots, Bool, true)
  141. .OP_END_FACTORY_REG(SparseApplyAdagrad)
  142. /**
  143. *@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme.
  144. *@par Inputs:
  145. * Four inputs, including:
  146. *@li var: An NCHW, NHWC, or ND Tensor of type float32.
  147. *@li accum: An NCHW, NHWC, or ND Tensor of type float32.
  148. *@li grad: An NCHW, NHWC, or ND Tensor of type float32.
  149. *@li indices: An NCHW, NHWC, or ND Tensor of type int32.
  150. *@par Attributes:
  151. *@li lr: Required, used for computation.
  152. *@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
  153. *@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False".
  154. *@par Outputs:
  155. *@li var: A Tensor. Has the same type and format as input "var".
  156. *@li accum: A Tensor. Has the same type and format as input "var".
  157. */
  158. REG_OP(SparseApplyAdagradD)
  159. .INPUT(var, TensorType({DT_FLOAT}))
  160. .INPUT(accum, TensorType({DT_FLOAT}))
  161. .INPUT(grad, TensorType({DT_FLOAT}))
  162. .INPUT(indices, TensorType({DT_INT32}))
  163. .OUTPUT(var, TensorType({DT_FLOAT}))
  164. .OUTPUT(accum, TensorType({DT_FLOAT}))
  165. .REQUIRED_ATTR(lr, Float)
  166. .ATTR(use_locking, Bool, false)
  167. .ATTR(update_slots, Bool, true)
  168. .OP_END_FACTORY_REG(SparseApplyAdagradD)
  169. /**
  170. *@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme.
  171. *@par Inputs:
  172. *Six inputs, including:
  173. *@li var: An NCHW, NHWC, or ND Tensor of type float32.
  174. *@li accum: An NCHW, NHWC, or ND Tensor of type float32.
  175. *@li lr: An NCHW, NHWC, or ND Tensor of type float32.
  176. *@li epsilon: An NCHW, NHWC, or ND Tensor of type float32.
  177. *@li grad: An NCHW, NHWC, or ND Tensor of type float32.
  178. *@li indices: An NCHW, NHWC, or ND Tensor of type float32.
  179. *@par Attributes:
  180. *@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
  181. *@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False".
  182. *@par Outputs:
  183. *var: A Tensor. Has the same type and format as input "var".
  184. */
  185. REG_OP(SparseApplyAdagradV2)
  186. .INPUT(var, TensorType({DT_FLOAT}))
  187. .INPUT(accum, TensorType({DT_FLOAT}))
  188. .INPUT(lr, TensorType({DT_FLOAT}))
  189. .INPUT(epsilon, TensorType({DT_FLOAT}))
  190. .INPUT(grad, TensorType({DT_FLOAT}))
  191. .INPUT(indices, TensorType({DT_INT32}))
  192. .OUTPUT(var, TensorType({DT_FLOAT}))
  193. .ATTR(use_locking, Bool, false)
  194. .ATTR(update_slots, Bool, true)
  195. .OP_END_FACTORY_REG(SparseApplyAdagradV2)
  196. /**
  197. *@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme.
  198. *@par Inputs:
  199. *Four inputs, including:
  200. *@li var: An NCHW, NHWC, or ND Tensor of type float32.
  201. *@li accum: An NCHW, NHWC, or ND Tensor of type float32.
  202. *@li grad: An NCHW, NHWC, or ND Tensor of type float32.
  203. *@li indices: An NCHW, NHWC, or ND Tensor of type int32.
  204. *@par Attributes:
  205. *@li lr: Required, used for computation.
  206. *@li epsilon: Required, used for computation.
  207. *@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
  208. *@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False".
  209. *@par Outputs:
  210. *@li var: A Tensor. Has the same type and format as input "var".
  211. *@li accum: A Tensor. Has the same type and format as input "accum".
  212. */
  213. REG_OP(SparseApplyAdagradV2D)
  214. .INPUT(var, TensorType({DT_FLOAT}))
  215. .INPUT(accum, TensorType({DT_FLOAT}))
  216. .INPUT(grad, TensorType({DT_FLOAT}))
  217. .INPUT(indices, TensorType({DT_INT32}))
  218. .OUTPUT(var, TensorType({DT_FLOAT}))
  219. .OUTPUT(accum, TensorType({DT_FLOAT}))
  220. .REQUIRED_ATTR(lr, Float)
  221. .REQUIRED_ATTR(epsilon, Float)
  222. .ATTR(use_locking, Bool, false)
  223. .ATTR(update_slots, Bool, true)
  224. .OP_END_FACTORY_REG(SparseApplyAdagradV2D)
  225. /**
  226. *@brief Updates "var" according to the momentum scheme. Set use_nesterov = True if you
  227. * want to use Nesterov momentum.\n
  228. * computing process: \n
  229. * accum = accum * momentum + grad\n
  230. * var -= lr * accum
  231. *
  232. *@attention Constraints:\n
  233. * the input tensors must have the same shape.
  234. *
  235. *@par Inputs:
  236. *@li var: A mutable tensor. Should be from a Variable().
  237. *@li accum: A mutable tensor. Has the same type as "var".
  238. * Should be from a Variable().
  239. *@li lr: A scalar. Has the same type as "var".
  240. *@li grad: A tensor for the gradient. Has the same type as "var".
  241. *
  242. *@par Attributes:
  243. *@li use_nesterov: An optional bool. Defaults to "False".
  244. * If "True", the tensor passed to compute grad will be
  245. * var - lr * momentum * accum, so in the end, the var you get is actually
  246. * var - lr * momentum * accum.
  247. *
  248. *@li use_locking: An optional bool. Defaults to "False".\n
  249. * If "True", updating of the "var", "ms", and "mom" tensors is protected by a lock;
  250. * otherwise the behavior is undefined, but may exhibit less contention.
  251. *
  252. *@par Outputs:
  253. * var: A mutable tensor. Has the same type as input "var".
  254. *
  255. */
  256. REG_OP(ApplyMomentum)
  257. .INPUT(var, TensorType::NumberType())
  258. .INPUT(accum, TensorType::NumberType())
  259. .INPUT(lr, TensorType::NumberType())
  260. .INPUT(grad, TensorType::NumberType())
  261. .INPUT(momentum, TensorType::NumberType())
  262. .OUTPUT(var, TensorType::NumberType())
  263. .ATTR(use_nesterov, Bool, false)
  264. .ATTR(use_locking, Bool, false)
  265. .OP_END_FACTORY_REG(ApplyMomentum)
  266. REG_OP(ApplyMomentumCCE)
  267. .INPUT(var, TensorType::NumberType())
  268. .INPUT(accum, TensorType::NumberType())
  269. .INPUT(lr, TensorType::NumberType())
  270. .INPUT(grad, TensorType::NumberType())
  271. .INPUT(momentum, TensorType::NumberType())
  272. .OUTPUT(var, TensorType::NumberType())
  273. .ATTR(use_nesterov, Bool, false)
  274. .ATTR(use_locking, Bool, false)
  275. .OP_END_FACTORY_REG(ApplyMomentumCCE)
  276. /**
  277. *@brief Updates "var" according to the momentum scheme. Set use_nesterov = True if you
  278. * want to use Nesterov momentum.\n
  279. * computing process: \n
  280. * accum = accum * momentum + grad\n
  281. * var -= lr * accum
  282. *
  283. *@attention Constraints:\n
  284. * the input tensors must have the same shape.
  285. *
  286. *@par Inputs:
  287. *@li var: A mutable tensor. Should be from a Variable().
  288. *@li accum: A mutable tensor. Has the same type as "var".
  289. * Should be from a Variable().
  290. *@li lr: A scalar. Has the same type as "var".
  291. *@li grad: A tensor for the gradient. Has the same type as "var".
  292. *
  293. *@par Attributes:
  294. *@li use_nesterov: An optional bool. Defaults to "False".
  295. * If "True", the tensor passed to compute grad will be
  296. * var - lr * momentum * accum, so in the end, the var you get is actually
  297. * var - lr * momentum * accum.
  298. *
  299. *@li use_locking: An optional bool. Defaults to "False".\n
  300. * If "True", updating of the "var", "ms", and "mom" tensors is protected by a lock;
  301. * otherwise the behavior is undefined, but may exhibit less contention.
  302. *
  303. *@par Outputs:
  304. * var: A mutable tensor. Has the same type as input "var".
  305. * accum: A mutable tensor. Has the same type as input "accum".
  306. *
  307. */
  308. REG_OP(ApplyMomentumD)
  309. .INPUT(var, TensorType::NumberType())
  310. .INPUT(accum, TensorType::NumberType())
  311. .INPUT(lr, TensorType::NumberType())
  312. .INPUT(grad, TensorType::NumberType())
  313. .INPUT(momentum, TensorType::NumberType())
  314. .OUTPUT(var, TensorType::NumberType())
  315. .OUTPUT(accum, TensorType::NumberType())
  316. .ATTR(use_nesterov, Bool, false)
  317. .ATTR(use_locking, Bool, false)
  318. .OP_END_FACTORY_REG(ApplyMomentumD)
  319. /**
  320. *@brief Updates "var" according to the AddSign update.\n
  321. * t-1 mean previous period.
  322. * m_t <- beta1 * m_{t-1} + (1 - beta1) * grad\n
  323. * update <- exp(logbase * sign_decay * sign(grad) * sign(m_t)) * grad\n
  324. * var <- var - lr * update
  325. *
  326. *@attention Constraints:\n
  327. * the input tensors must have the same shape.
  328. *
  329. *@par Inputs:
  330. *@li var: A mutable tensor. Should be from a Variable().
  331. *@li m: A mutable tensor. Has the same type as "var".
  332. * Should be from a Variable().
  333. *@li lr: A scalar. Has the same type as "var".
  334. *@li logbase: A scalar. Has the same type as "var".
  335. *@li sign_decay: A scalar. Has the same type as "var".
  336. *@li beta: A scalar. Has the same type as "var".
  337. *@li grad: A tensor for the gradient. Has the same type as "var".
  338. *
  339. *@par Attributes:
  340. * use_locking: An optional bool. Defaults to "False".
  341. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  342. * by a lock; otherwise the behavior is undefined, but may exhibit less
  343. * contention.
  344. *
  345. *@par Outputs:
  346. * var: A mutable tensor. Has the same type as input "var".
  347. *
  348. */
  349. REG_OP(ApplyPowerSign)
  350. .INPUT(var, TensorType::NumberType())
  351. .INPUT(m, TensorType::NumberType())
  352. .INPUT(lr, TensorType::NumberType())
  353. .INPUT(logbase, TensorType::NumberType())
  354. .INPUT(sign_decay, TensorType::NumberType())
  355. .INPUT(beta, TensorType::NumberType())
  356. .INPUT(grad, TensorType::NumberType())
  357. .OUTPUT(var, TensorType::NumberType())
  358. .ATTR(use_locking, Bool, false)
  359. .OP_END_FACTORY_REG(ApplyPowerSign)
  360. /**
  361. *@brief Updates "var" according to the AddSign update.\n
  362. * t-1 mean previous period.
  363. * m_t <- beta1 * m_{t-1} + (1 - beta1) * grad\n
  364. * update <- exp(logbase * sign_decay * sign(grad) * sign(m_t)) * grad\n
  365. * var <- var - lr * update
  366. *
  367. *@attention Constraints:\n
  368. * the input tensors must have the same shape.
  369. *
  370. *@par Inputs:
  371. *@li var: A mutable tensor. Should be from a Variable().
  372. *@li m: A mutable tensor. Has the same type as "var".
  373. * Should be from a Variable().
  374. *@li lr: A scalar. Has the same type as "var".
  375. *@li logbase: A scalar. Has the same type as "var".
  376. *@li sign_decay: A scalar. Has the same type as "var".
  377. *@li beta: A scalar. Has the same type as "var".
  378. *@li grad: A tensor for the gradient. Has the same type as "var".
  379. *
  380. *@par Attributes:
  381. * use_locking: An optional bool. Defaults to "False".
  382. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  383. * by a lock; otherwise the behavior is undefined, but may exhibit less
  384. * contention.
  385. *
  386. *@par Outputs:
  387. *@li var: A mutable tensor. Has the same type as input "var".
  388. *@li m: A mutable tensor. Has the same type as input "var".
  389. *
  390. *
  391. */
  392. REG_OP(ApplyPowerSignD)
  393. .INPUT(var, TensorType::NumberType())
  394. .INPUT(m, TensorType::NumberType())
  395. .INPUT(lr, TensorType::NumberType())
  396. .INPUT(logbase, TensorType::NumberType())
  397. .INPUT(sign_decay, TensorType::NumberType())
  398. .INPUT(beta, TensorType::NumberType())
  399. .INPUT(grad, TensorType::NumberType())
  400. .OUTPUT(var, TensorType::NumberType())
  401. .OUTPUT(m, TensorType::NumberType())
  402. .ATTR(use_locking, Bool, false)
  403. .OP_END_FACTORY_REG(ApplyPowerSignD)
  404. /**
  405. *@brief Updates "var" as FOBOS algorithm with fixed learning rate.\n
  406. * prox_v = var - alpha * delta\n
  407. * var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
  408. *
  409. *@attention Constraints:\n
  410. * the input tensors must have the same shape.
  411. *
  412. *@par Inputs:
  413. *@li var: A mutable tensor. Should be from a Variable().
  414. *@li alpha: A scalar. Has the same type as "var".
  415. *@li l1: A scalar. Has the same type as "var".
  416. *@li l2: A scalar. Has the same type as "var".
  417. *@li delta: A tensor. Has the same type as "var". The change.
  418. *
  419. *@par Attributes:
  420. * use_locking: An optional bool. Defaults to "False".
  421. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  422. * by a lock; otherwise the behavior is undefined, but may exhibit less
  423. * contention.
  424. *
  425. *@par Outputs:
  426. * var: A mutable tensor. Has the same type as input "var".
  427. *
  428. */
  429. REG_OP(ApplyProximalGradientDescent)
  430. .INPUT(var, TensorType::NumberType())
  431. .INPUT(alpha, TensorType::NumberType())
  432. .INPUT(l1, TensorType::NumberType())
  433. .INPUT(l2, TensorType::NumberType())
  434. .INPUT(delta, TensorType::NumberType())
  435. .OUTPUT(var, TensorType::NumberType())
  436. .ATTR(use_locking, Bool, false)
  437. .OP_END_FACTORY_REG(ApplyProximalGradientDescent)
  438. /**
  439. *@brief Updates "var" according to the AddSign update.
  440. *@par Inputs:
  441. *Seven inputs, including:
  442. * @li var: A mutable Tensor of type TensorType::NumberType().
  443. * Should be a Variable Tensor.
  444. * @li m: A mutable Tensor of the same type as "var".
  445. * Should be a Variable Tensor.
  446. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  447. * @li alpha: A Tensor of the same type as "var". Must be a scalar.
  448. * @li sign_decay: A Tensor of the same type as "var". Must be a scalar.
  449. * @li beta: A Tensor of the same type as "var". Must be a scalar.
  450. * @li grad: A Tensor of the same type as "var", for the gradient.
  451. *@par Attributes:
  452. *use_locking: An optional bool. Defaults to "False".
  453. * If "True", updating of the "var" and "m" tensors will be
  454. * protected by a lock; otherwise the behavior is undefined,
  455. * but may exhibit less contention.
  456. *@par Outputs:
  457. *var: A mutable Tensor. Has the same type as "var".
  458. */
  459. REG_OP(ApplyAddSign)
  460. .INPUT(var, TensorType::NumberType())
  461. .INPUT(m, TensorType::NumberType())
  462. .INPUT(lr, TensorType::NumberType())
  463. .INPUT(alpha, TensorType::NumberType())
  464. .INPUT(sign_decay, TensorType::NumberType())
  465. .INPUT(beta, TensorType::NumberType())
  466. .INPUT(grad, TensorType::NumberType())
  467. .OUTPUT(var, TensorType::NumberType())
  468. .ATTR(use_locking, Bool, false)
  469. .OP_END_FACTORY_REG(ApplyAddSign)
  470. /**
  471. *@brief Updates "var" according to the AddSign update.
  472. *@par Inputs:
  473. *Seven inputs, including:
  474. * @li var: A mutable Tensor of type TensorType::NumberType().
  475. * Should be a Variable Tensor.
  476. * @li m: A mutable Tensor of the same type as "var".
  477. * Should be a Variable Tensor.
  478. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  479. * @li alpha: A Tensor of the same type as "var". Must be a scalar.
  480. * @li sign_decay: A Tensor of the same type as "var". Must be a scalar.
  481. * @li beta: A Tensor of the same type as "var". Must be a scalar.
  482. * @li grad: A Tensor of the same type as "var", for the gradient.
  483. *@par Attributes:
  484. *use_locking: An optional bool. Defaults to "False".
  485. * If "True", updating of the "var" and "m" tensors will be
  486. * protected by a lock; otherwise the behavior is undefined,
  487. * but may exhibit less contention.
  488. *@par Outputs:
  489. *@li var: A mutable Tensor. Has the same type as "var".
  490. *@li m: A mutable Tensor. Has the same type as "m".
  491. */
  492. REG_OP(ApplyAddSignD)
  493. .INPUT(var, TensorType::NumberType())
  494. .INPUT(m, TensorType::NumberType())
  495. .INPUT(lr, TensorType::NumberType())
  496. .INPUT(alpha, TensorType::NumberType())
  497. .INPUT(sign_decay, TensorType::NumberType())
  498. .INPUT(beta, TensorType::NumberType())
  499. .INPUT(grad, TensorType::NumberType())
  500. .OUTPUT(var, TensorType::NumberType())
  501. .OUTPUT(m, TensorType::NumberType())
  502. .ATTR(use_locking, Bool, false)
  503. .OP_END_FACTORY_REG(ApplyAddSignD)
  504. /**
  505. *@brief Updates "var" according to the centered RMSProp algorithm.\n
  506. * The centered RMSProp algorithm uses an estimate of the centered second moment
  507. * (i.e., the variance) for normalization, as opposed to regular RMSProp, which
  508. * uses the (uncentered) second moment. This often helps with training, but is
  509. * slightly more expensive in terms of computation and memory.
  510. *
  511. * t-1 mean previous period.
  512. * mg <- rho * mg{t-1} + (1-rho) * grad\n
  513. * ms <- rho * ms{t-1} + (1-rho) * grad * grad\n
  514. * mom <- momentum * mom{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)\n
  515. * var <- var - mom\n
  516. *
  517. *@attention Constraints:\n
  518. *@li in dense implementation of this algorithm, mg, ms, and mom will
  519. * update even if the grad is zero, but in this sparse implementation, mg, ms,
  520. * and mom will not update in iterations during which the grad is zero.
  521. *@li the input tensors must have the same shape.
  522. *
  523. *@par Inputs:
  524. *@li var: A mutable tensor. Should be from a Variable().
  525. *@li mg: A mutable tensor. Has the same type as "var".
  526. * Should be from a Variable().
  527. *@li ms: A mutable tensor. Has the same type as "var".
  528. * Should be from a Variable().
  529. *@li mom: A mutable tensor. Has the same type as "var".
  530. * Should be from a Variable().
  531. *@li lr: A scalar. Has the same type as "var".
  532. *@li rho: A scalar. Has the same type as "var".
  533. *@li momentum: A tensor. Has the same type as "var".
  534. *@li epsilon: A scalar. Has the same type as "var".
  535. *@li grad: A tensor for the gradient. Has the same type as "var".
  536. *
  537. *@par Attributes:
  538. * use_locking: An optional bool. Defaults to "False".
  539. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  540. * by a lock; otherwise the behavior is undefined, but may exhibit less
  541. * contention.
  542. *
  543. *@par Outputs:
  544. * var: A mutable tensor. Has the same type as input "var".
  545. *
  546. */
  547. REG_OP(ApplyCenteredRMSProp)
  548. .INPUT(var, TensorType::NumberType())
  549. .INPUT(mg, TensorType::NumberType())
  550. .INPUT(ms, TensorType::NumberType())
  551. .INPUT(mom, TensorType::NumberType())
  552. .INPUT(lr, TensorType::NumberType())
  553. .INPUT(rho, TensorType::NumberType())
  554. .INPUT(momentum, TensorType::NumberType())
  555. .INPUT(epsilon, TensorType::NumberType())
  556. .INPUT(grad, TensorType::NumberType())
  557. .OUTPUT(var, TensorType::NumberType())
  558. .ATTR(use_locking, Bool, false)
  559. .OP_END_FACTORY_REG(ApplyCenteredRMSProp)
  560. /**
  561. *@brief Updates "var" according to the centered RMSProp algorithm.\n
  562. * The centered RMSProp algorithm uses an estimate of the centered second moment
  563. * (i.e., the variance) for normalization, as opposed to regular RMSProp, which
  564. * uses the (uncentered) second moment. This often helps with training, but is
  565. * slightly more expensive in terms of computation and memory.
  566. *
  567. * t-1 mean previous period.
  568. * mg <- rho * mg{t-1} + (1-rho) * grad\n
  569. * ms <- rho * ms{t-1} + (1-rho) * grad * grad\n
  570. * mom <- momentum * mom{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)\n
  571. * var <- var - mom\n
  572. *
  573. *@attention Constraints:\n
  574. *@li in dense implementation of this algorithm, mg, ms, and mom will
  575. * update even if the grad is zero, but in this sparse implementation, mg, ms,
  576. * and mom will not update in iterations during which the grad is zero.
  577. *@li the input tensors must have the same shape.
  578. *
  579. *@par Inputs:
  580. *@li var: A mutable tensor. Should be from a Variable().
  581. *@li mg: A mutable tensor. Has the same type as "var".
  582. * Should be from a Variable().
  583. *@li ms: A mutable tensor. Has the same type as "var".
  584. * Should be from a Variable().
  585. *@li mom: A mutable tensor. Has the same type as "var".
  586. * Should be from a Variable().
  587. *@li lr: A scalar. Has the same type as "var".
  588. *@li rho: A scalar. Has the same type as "var".
  589. *@li momentum: A tensor. Has the same type as "var".
  590. *@li epsilon: A scalar. Has the same type as "var".
  591. *@li grad: A tensor for the gradient. Has the same type as "var".
  592. *
  593. *@par Attributes:
  594. * use_locking: An optional bool. Defaults to "False".
  595. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  596. * by a lock; otherwise the behavior is undefined, but may exhibit less
  597. * contention.
  598. *
  599. *@par Outputs:
  600. *@li var: A mutable Tensor. Has the same type as "var".
  601. *@li mg: A mutable Tensor. Has the same type as "mg".
  602. *@li ms: A mutable Tensor. Has the same type as "ms".
  603. *@li mom: A mutable Tensor. Has the same type as "mom".
  604. *
  605. */
  606. REG_OP(ApplyCenteredRMSPropD)
  607. .INPUT(var, TensorType::NumberType())
  608. .INPUT(mg, TensorType::NumberType())
  609. .INPUT(ms, TensorType::NumberType())
  610. .INPUT(mom, TensorType::NumberType())
  611. .INPUT(lr, TensorType::NumberType())
  612. .INPUT(rho, TensorType::NumberType())
  613. .INPUT(momentum, TensorType::NumberType())
  614. .INPUT(epsilon, TensorType::NumberType())
  615. .INPUT(grad, TensorType::NumberType())
  616. .OUTPUT(var, TensorType::NumberType())
  617. .OUTPUT(mg, TensorType::NumberType())
  618. .OUTPUT(ms, TensorType::NumberType())
  619. .OUTPUT(mom, TensorType::NumberType())
  620. .ATTR(use_locking, Bool, false)
  621. .OP_END_FACTORY_REG(ApplyCenteredRMSPropD)
  622. /**
  623. *@brief Updates "var" by subtracting 'alpha' * 'delta' from it.\n
  624. * var -= delta * alpha
  625. *
  626. *@attention Constraints:\n
  627. * the input tensors must have the same shape.
  628. *
  629. *@par Inputs:
  630. *@li var: A mutable tensor. Should be from a Variable().
  631. *@li alpha: A scalar. Has the same type as "var".
  632. *@li delta: A tensor for the change. Has the same type as "var".
  633. *
  634. *@par Attributes:
  635. * use_locking: An optional bool. Defaults to "False".
  636. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  637. * by a lock; otherwise the behavior is undefined, but may exhibit less
  638. * contention.
  639. *
  640. *@par Outputs:
  641. * var: A mutable tensor. Has the same type as input "var".
  642. *
  643. */
  644. REG_OP(ApplyGradientDescent)
  645. .INPUT(var, TensorType::NumberType())
  646. .INPUT(alpha, TensorType::NumberType())
  647. .INPUT(delta, TensorType::NumberType())
  648. .OUTPUT(var, TensorType::NumberType())
  649. .ATTR(use_locking, Bool, false)
  650. .OP_END_FACTORY_REG(ApplyGradientDescent)
  651. /**
  652. *@brief Updates "var" according to the adagrad scheme.\n
  653. * accum += grad * grad\n
  654. * var -= lr * grad * (1 / sqrt(accum))
  655. *
  656. *@attention Constraints:\n
  657. * the input tensors must have the same shape.
  658. *
  659. *@par Inputs:
  660. *@li var: A mutable tensor. Should be from a Variable().
  661. *@li accum: A mutable tensor. Has the same type as "var".
  662. * Should be from a Variable().
  663. *@li lr: A scalar. Has the same type as "var".
  664. *@li grad: A tensor for the gradient. Has the same type as "var".
  665. *
  666. *@par Attributes:
  667. * use_locking: An optional bool. Defaults to "False".
  668. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  669. * by a lock; otherwise the behavior is undefined, but may exhibit less
  670. * contention.
  671. *
  672. *@par Outputs:
  673. * var: A mutable tensor. Has the same type as input "var".
  674. *
  675. */
  676. REG_OP(ApplyAdagrad)
  677. .INPUT(var, TensorType::NumberType())
  678. .INPUT(accum, TensorType::NumberType())
  679. .INPUT(lr, TensorType::NumberType())
  680. .INPUT(grad, TensorType::NumberType())
  681. .OUTPUT(var, TensorType::NumberType())
  682. .ATTR(update_slots, Bool, true)
  683. .ATTR(use_locking, Bool, false)
  684. .OP_END_FACTORY_REG(ApplyAdagrad)
  685. /**
  686. *@brief Updates "var" according to the adagrad scheme.\n
  687. * accum += grad * grad\n
  688. * var -= lr * grad * (1 / sqrt(accum))
  689. *
  690. *@attention Constraints:\n
  691. * the input tensors must have the same shape.
  692. *
  693. *@par Inputs:
  694. *@li var: A mutable tensor. Should be from a Variable().
  695. *@li accum: A mutable tensor. Has the same type as "var".
  696. * Should be from a Variable().
  697. *@li lr: A scalar. Has the same type as "var".
  698. *@li grad: A tensor for the gradient. Has the same type as "var".
  699. *
  700. *@par Attributes:
  701. * use_locking: An optional bool. Defaults to "False".
  702. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  703. * by a lock; otherwise the behavior is undefined, but may exhibit less
  704. * contention.
  705. *
  706. *@par Outputs:
  707. *@li var: A mutable tensor. Has the same type as input "var".
  708. *@li accum: A mutable tensor. Has the same type as input "var".
  709. *
  710. *
  711. */
  712. REG_OP(ApplyAdagradD)
  713. .INPUT(var, TensorType::NumberType())
  714. .INPUT(accum, TensorType::NumberType())
  715. .INPUT(lr, TensorType::NumberType())
  716. .INPUT(grad, TensorType::NumberType())
  717. .OUTPUT(var, TensorType::NumberType())
  718. .OUTPUT(accum, TensorType::NumberType())
  719. .ATTR(update_slots, Bool, true)
  720. .ATTR(use_locking, Bool, false)
  721. .OP_END_FACTORY_REG(ApplyAdagradD)
  722. /**
  723. * @brief Updates "var" according to the adagradv2 scheme.\n
  724. * accum += grad * grad \n
  725. * var -= lr * grad * (1 / sqrt(accum) + epsilon)
  726. *
  727. * @par Inputs:
  728. * @li var: A mutable tensor. Must be one of the data types defined in
  729. * TensorType::NumberType(). Should be from a Variable().
  730. * @li accum: A mutable tensor. Has the same type as "var". Should be from a
  731. * Variable().
  732. * @li lr: A tensor for the learning rate. Has the same type as "var". Should be
  733. * from a Variable().
  734. * @li grad: A tensor for the gradient. Has the same type as "var". Should be
  735. * from a Variable().
  736. * @li epsilon: A scalar. Has the same type as "var".
  737. *
  738. * @par Attributes:
  739. * @li update_slots: An optional bool. Defaults to "True".
  740. * If "True", accum will be updated
  741. * @li use_locking: An optional bool. Defaults to "False".
  742. * If "True", updating of the "var" tensor is protected by a lock;
  743. * otherwise the behavior is undefined, but may exhibit less contention.
  744. *
  745. * @par Outputs:
  746. * var: A mutable tensor. Has the same type as input "var".
  747. *
  748. * @attention Constraints:
  749. * The input tensors must have the same shape.
  750. *
  751. *
  752. */
  753. REG_OP(ApplyAdagradV2)
  754. .INPUT(var, TensorType::NumberType())
  755. .INPUT(accum, TensorType::NumberType())
  756. .INPUT(lr, TensorType::NumberType())
  757. .INPUT(epsilon, TensorType::NumberType())
  758. .INPUT(grad, TensorType::NumberType())
  759. .OUTPUT(var, TensorType::NumberType())
  760. .ATTR(update_slots, Bool, true)
  761. .ATTR(use_locking, Bool, false)
  762. .OP_END_FACTORY_REG(ApplyAdagradV2)
  763. /**
  764. * @brief Updates "var" according to the adagradv2 scheme.\n
  765. * accum += grad * grad \n
  766. * var -= lr * grad * (1 / sqrt(accum) + epsilon)
  767. *
  768. * @par Inputs:
  769. * @li var: A mutable tensor. Must be one of the data types defined in
  770. * TensorType::NumberType(). Should be from a Variable().
  771. * @li accum: A mutable tensor. Has the same type as "var". Should be from a
  772. * Variable().
  773. * @li lr: A tensor for the learning rate. Has the same type as "var". Should be
  774. * from a Variable().
  775. * @li grad: A tensor for the gradient. Has the same type as "var". Should be
  776. * from a Variable().
  777. *
  778. * @par Attributes:
  779. * @li epsilon: A scalar. Has the same type as "var".
  780. * @li update_slots: An optional bool. Defaults to "True".
  781. * If "True", accum will be updated
  782. * @li use_locking: An optional bool. Defaults to "False".
  783. * If "True", updating of the "var" tensor is protected by a lock;
  784. * otherwise the behavior is undefined, but may exhibit less contention.
  785. *
  786. * @par Outputs:
  787. * var: A mutable tensor. Has the same type as input "var".
  788. *
  789. * @attention Constraints:
  790. * The input tensors must have the same shape.
  791. *
  792. *
  793. */
  794. REG_OP(ApplyAdagradV2D)
  795. .INPUT(var, TensorType::NumberType())
  796. .INPUT(accum, TensorType::NumberType())
  797. .INPUT(lr, TensorType::NumberType())
  798. .INPUT(grad, TensorType::NumberType())
  799. .OUTPUT(var, TensorType::NumberType())
  800. .OUTPUT(accum, TensorType::NumberType())
  801. .REQUIRED_ATTR(epsilon, Float)
  802. .ATTR(update_slots, Bool, true)
  803. .ATTR(use_locking, Bool, false)
  804. .OP_END_FACTORY_REG(ApplyAdagradV2D)
  805. /**
  806. *@brief Updates "var" according to the proximal adagrad scheme.
  807. *@par Inputs:
  808. *Eight inputs, including:
  809. * @li var: A mutable Tensor. Must be one of the following types:
  810. * TensorType::NumberType(). Should be a Variable Tensor.
  811. * @li gradient_accumulator: A mutable Tensor. Must have the same
  812. * type as "var". Should be a Variable Tensor.
  813. * @li gradient_squared_accumulator: A mutable Tensor of the same type as "var".
  814. * Should be a Variable Tensor.
  815. * @li grad: A Tensor of the same type as "var", for the gradient.
  816. * @li lr: A Tensor of the same type as "var".
  817. * Scaling factor. Must be a scalar.
  818. * @li l1: A Tensor of the same type as "var".
  819. * L1 regulariation. Must be a scalar.
  820. * @li l2: A Tensor of the same type as "var".
  821. * L2 regulariation. Must be a scalar.
  822. * @li global_step: A Tensor of type int32 or int64.
  823. * Training step number. Must be a scalar.
  824. *@par Attributes:
  825. *use_locking: An optional bool. Defaults to "False".
  826. * If "True", updating of the var and accum tensors will be
  827. * protected by a lock; otherwise the behavior is undefined,
  828. * but may exhibit less contention.
  829. *@par Outputs:
  830. *var: A mutable Tensor. Has the same type as "var".
  831. */
  832. REG_OP(ApplyAdagradDA)
  833. .INPUT(var, TensorType::NumberType())
  834. .INPUT(gradient_accumulator, TensorType::NumberType())
  835. .INPUT(gradient_squared_accumulator, TensorType::NumberType())
  836. .INPUT(grad, TensorType::NumberType())
  837. .INPUT(lr, TensorType::NumberType())
  838. .INPUT(l1, TensorType::NumberType())
  839. .INPUT(l2, TensorType::NumberType())
  840. .INPUT(global_step, TensorType({DT_INT32, DT_INT64}))
  841. .OUTPUT(var, TensorType::NumberType())
  842. .ATTR(use_locking, Bool, false)
  843. .OP_END_FACTORY_REG(ApplyAdagradDA)
  844. /**
  845. *@brief Updates "var" according to the proximal adagrad scheme.
  846. *@par Inputs:
  847. *Eight inputs, including:
  848. * @li var: A mutable Tensor. Must be one of the following types:
  849. * TensorType::NumberType(). Should be a Variable Tensor.
  850. * @li gradient_accumulator: A mutable Tensor. Must have the same
  851. * type as "var". Should be a Variable Tensor.
  852. * @li gradient_squared_accumulator: A mutable Tensor of the same type as "var".
  853. * Should be a Variable Tensor.
  854. * @li grad: A Tensor of the same type as "var", for the gradient.
  855. * @li lr: A Tensor of the same type as "var".
  856. * Scaling factor. Must be a scalar.
  857. * @li l1: A Tensor of the same type as "var".
  858. * L1 regulariation. Must be a scalar.
  859. * @li l2: A Tensor of the same type as "var".
  860. * L2 regulariation. Must be a scalar.
  861. * @li global_step: A Tensor of type int32 or int64.
  862. * Training step number. Must be a scalar.
  863. *@par Attributes:
  864. *use_locking: An optional bool. Defaults to "False".
  865. * If "True", updating of the var and accum tensors will be
  866. * protected by a lock; otherwise the behavior is undefined,
  867. * but may exhibit less contention.
  868. *@par Outputs:
  869. *var: A mutable Tensor. Has the same type as "var".
  870. *gradient_accumulator: A mutable Tensor. Has the same type as "var".
  871. *gradient_squared_accumulator: A mutable Tensor. Has the same type as "var".
  872. */
  873. REG_OP(ApplyAdagradDAD)
  874. .INPUT(var, TensorType::NumberType())
  875. .INPUT(gradient_accumulator, TensorType::NumberType())
  876. .INPUT(gradient_squared_accumulator, TensorType::NumberType())
  877. .INPUT(grad, TensorType::NumberType())
  878. .INPUT(lr, TensorType::NumberType())
  879. .INPUT(l1, TensorType::NumberType())
  880. .INPUT(l2, TensorType::NumberType())
  881. .INPUT(global_step, TensorType({DT_INT32, DT_INT64}))
  882. .OUTPUT(var, TensorType::NumberType())
  883. .OUTPUT(gradient_accumulator, TensorType::NumberType())
  884. .OUTPUT(gradient_squared_accumulator, TensorType::NumberType())
  885. .ATTR(use_locking, Bool, false)
  886. .OP_END_FACTORY_REG(ApplyAdagradDAD)
  887. /**
  888. *@brief Returns the dimension index in the destination data format given the one in
  889. * the source data format.
  890. *
  891. *@par Inputs:
  892. * x: A tensor of type int32 or int64.
  893. * A Tensor with each element as a dimension index in source data format.
  894. * Must be in the range [-4, 4).
  895. *
  896. *@par Attributes:
  897. *@li src_format: An optional string. Defaults to NHWC.
  898. * source data format.
  899. *@li dst_format: An optional string. Defaults to NCHW.
  900. * destination data format.
  901. *
  902. *@par Outputs:
  903. * y: A tensor. Has the same type as "x".
  904. *
  905. */
  906. REG_OP(DataFormatDimMap)
  907. .INPUT(x, TensorType::IndexNumberType())
  908. .ATTR(src_format, String, "NHWC")
  909. .ATTR(dst_format, String, "NCHW")
  910. .OUTPUT(y, TensorType::IndexNumberType())
  911. .OP_END_FACTORY_REG(DataFormatDimMap)
  912. /**
  913. * @brief Implements stochastic gradient descent (optionally with momentum).\n
  914. * Nesterov momentum is based on the formula from
  915. * On the importance of initialization and momentum in deep learning.\n
  916. * @par Inputs:
  917. * @li parameters: A mutable tensor of type float16 or float32.\n
  918. * Specifies the iterable of parameters to optimize or dicts defining parameter
  919. * groups.
  920. * @li gradient: A tensor of type float16 or float32.\n
  921. * Specifies the gradient of training step.
  922. * @li learning_rate: A tensor of type float16 or float32.\n
  923. * Specifies the learing_rate of training step.
  924. * @li accum: A tensor of type float16 or float32.
  925. * Specifies the velocity of training step.
  926. * @li momentum: A tensor of type float16 or float32.
  927. * Specifies the momentum factor.
  928. * @li stat: A tensor of type float16 or float32.
  929. * Specifies the status representing the first step or not.
  930. * @par Attributes:
  931. * @li dampening: An optional float, specifying the dampening for momentum.
  932. * Defaults to "0.0".
  933. * @li weight_decay: An optional float, specifying the L2 penalty. Defaults to
  934. * "0.0".
  935. * @li nesterov: An optional bool, specifying whether to enable Nesterov
  936. * momentum. Defaults to "False".
  937. * @par Outputs:
  938. * parameters: A mutable tensor same as input "parameters".
  939. * @see ApplyMomentum()
  940. */
  941. REG_OP(SGD)
  942. .INPUT(parameters, TensorType(DT_FLOAT, DT_FLOAT16))
  943. .INPUT(gradient, TensorType(DT_FLOAT, DT_FLOAT16))
  944. .INPUT(learning_rate, TensorType(DT_FLOAT, DT_FLOAT16))
  945. .INPUT(accum, TensorType(DT_FLOAT, DT_FLOAT16))
  946. .INPUT(momentum, TensorType(DT_FLOAT, DT_FLOAT16))
  947. .INPUT(stat, TensorType(DT_FLOAT, DT_FLOAT16))
  948. .OUTPUT(parameters, TensorType(DT_FLOAT, DT_FLOAT16))
  949. .ATTR(dampening, Float, 0.0)
  950. .ATTR(weight_decay, Float, 0.0)
  951. .ATTR(nesterov, Bool, false)
  952. .OP_END_FACTORY_REG(SGD)
  953. /**
  954. * @brief Updates "var" according to the RMSProp algorithm.\n
  955. * mean_square = decay * mean_square + (1-decay) * gradient ** 2\n
  956. * Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n
  957. * ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n
  958. * mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n
  959. * var <- var - mom\n
  960. *
  961. * @par Inputs:
  962. * @li var: A mutable tensor. Must be one of the data types defined in\n
  963. * TensorType::NumberType(). Should be from a Variable().
  964. * @li ms: A mutable tensor. Must have the same type as "var". Should be from a
  965. * Variable().
  966. * @li mom: A mutable tensor. Must have the same type as "var". Should be from a
  967. * Variable().
  968. * @li lr: A scalar. Must have the same type as "var".
  969. * @li rho: A scalar. Must have the same type as "var".
  970. * @li momentum: A scalar. Must have the same type as "var".
  971. * @li epsilon: A scalar. Must have the same type as "var".
  972. * @li grad: A tensor, specifying the gradient. Must have the same type as "var".
  973. *
  974. * @par Attributes:
  975. * use_locking: An optional "bool". Defaults to "False". If "True", updating of\n
  976. * the "var", "ms", and "mom" tensors will be protected by a lock; otherwise the\n
  977. * behavior is undefined, but may exhibit less contention.
  978. *
  979. * @par Outputs:
  980. * var: A mutable tensor. Has the same type as input "var".
  981. *
  982. * @attention Constraints:
  983. * @li Note that in dense implementation of this algorithm, "ms" and "mom" will \n
  984. * update even if "grad" is 0, but in this sparse implementation, "ms" and "mom" \n
  985. * will not update in iterations during which "grad" is 0.
  986. * @li The input tensors "var", "ms", "mom" and "grad" must have the same shape.
  987. */
  988. REG_OP(ApplyRMSProp)
  989. .INPUT(var, TensorType::NumberType())
  990. .INPUT(ms, TensorType::NumberType())
  991. .INPUT(mom, TensorType::NumberType())
  992. .INPUT(lr, TensorType::NumberType())
  993. .INPUT(rho, TensorType::NumberType())
  994. .INPUT(momentum, TensorType::NumberType())
  995. .INPUT(epsilon, TensorType::NumberType())
  996. .INPUT(grad, TensorType::NumberType())
  997. .OUTPUT(var, TensorType::NumberType())
  998. .ATTR(use_locking, Bool, false)
  999. .OP_END_FACTORY_REG(ApplyRMSProp)
  1000. /**
  1001. * @brief Updates "var" according to the RMSProp algorithm, a const input will be
  1002. * considered as an attribute.\n
  1003. * mean_square = decay * mean_square + (1-decay) * gradient ** 2\n
  1004. * Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n
  1005. * ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n
  1006. * mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n
  1007. * var <- var - mom
  1008. *
  1009. * @par Inputs:
  1010. * @li var: A mutable tensor. Must be one of the data types defined in\n
  1011. * TensorType::NumberType(). Should be from a Variable().
  1012. * @li ms: A mutable tensor. Must have the same type as "var". Should be from a
  1013. * Variable().
  1014. * @li mom: A mutable tensor. Must have the same type as "var". Should be from a
  1015. * Variable().
  1016. * @li lr: A scalar. Must have the same type as "var".
  1017. * @li grad: A tensor, specifying the gradient. Must have the same type as "var".
  1018. *
  1019. * @par Attributes:
  1020. * @li use_locking: An optional "bool". Defaults to "False". If "True", updating\n
  1021. * of the "var", "ms", and "mom" tensors will be protected by a lock; \n
  1022. * otherwise the behavior is undefined, but may exhibit less contention.
  1023. * @li rho: A required scalar. Must have the same type as "var".
  1024. * @li momentum: A required scalar. Must have the same type as "var".
  1025. * @li epsilon: A required scalar. Must have the same type as "var".
  1026. *
  1027. * @par Outputs:
  1028. * var: A mutable tensor. Must have the same type as input "var".
  1029. *
  1030. * @attention Constraints:
  1031. * @li Note that in dense implementation of this algorithm, "ms" and "mom" will\n
  1032. * update even if "grad" is 0, but in this sparse implementation, "ms" and "mom"\n
  1033. * will not update in iterations during which "grad" is 0.
  1034. * @li The input tensors "var", "ms", "mom" and "grad" must have the same shape.
  1035. */
  1036. REG_OP(ApplyRMSPropD)
  1037. .INPUT(var, TensorType::NumberType())
  1038. .INPUT(ms, TensorType::NumberType())
  1039. .INPUT(mom, TensorType::NumberType())
  1040. .INPUT(lr, TensorType::NumberType())
  1041. .INPUT(grad, TensorType::NumberType())
  1042. .OUTPUT(var, TensorType::NumberType())
  1043. .OUTPUT(ms, TensorType::NumberType())
  1044. .OUTPUT(mom, TensorType::NumberType())
  1045. .REQUIRED_ATTR(rho, Float)
  1046. .REQUIRED_ATTR(momentum, Float)
  1047. .REQUIRED_ATTR(epsilon, Float)
  1048. .ATTR(use_locking, Bool, false)
  1049. .OP_END_FACTORY_REG(ApplyRMSPropD)
  1050. /**
  1051. *@brief Update "var" and "accum" according to FOBOS with Adagrad learning rate.
  1052. *@par Inputs:
  1053. *Six inputs, including:
  1054. * @li var: A mutable Tensor of type TensorType::NumberType().
  1055. * Should be from a Variable().
  1056. * @li accum: A mutable Tensor of the same type as "var". Should be from a Variable().
  1057. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1058. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1059. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1060. * @li grad: A Tensor of the same type as "var", for the gradient.
  1061. *@par Attributes:
  1062. *use_locking: An optional bool. Defaults to "False". If "True", updating of the "var" and "accum" *tensors will be protected by a lock; otherwise the behavior is undefined, but may exhibit less *contention.
  1063. *@par Outputs:
  1064. * @li var: A mutable tensor. Must have the same type as input "var".
  1065. * @li ms: A mutable tensor. Must have the same type as input "ms".
  1066. * @li mom: A mutable tensor. Must have the same type as input "mom".
  1067. */
  1068. REG_OP(ApplyProximalAdagrad)
  1069. .INPUT(var, TensorType::NumberType())
  1070. .INPUT(accum, TensorType::NumberType())
  1071. .INPUT(lr, TensorType::NumberType())
  1072. .INPUT(l1, TensorType::NumberType())
  1073. .INPUT(l2, TensorType::NumberType())
  1074. .INPUT(grad, TensorType::NumberType())
  1075. .OUTPUT(var, TensorType::NumberType())
  1076. .ATTR(use_locking, Bool, false)
  1077. .OP_END_FACTORY_REG(ApplyProximalAdagrad)
  1078. /**
  1079. *@brief Update "var" and "accum" according to FOBOS with Adagrad learning rate.
  1080. *@par Inputs:
  1081. *Six inputs, including:
  1082. * @li var: A mutable Tensor of type TensorType::NumberType().
  1083. * Should be from a Variable().
  1084. * @li accum: A mutable Tensor of the same type as "var". Should be from a Variable().
  1085. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1086. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1087. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1088. * @li grad: A Tensor of the same type as "var", for the gradient.
  1089. *@par Attributes:
  1090. *use_locking: An optional bool. Defaults to "False". If "True", updating of the "var" and "accum" *tensors will be protected by a lock; otherwise the behavior is undefined, but may exhibit less *contention.
  1091. *@par Outputs:
  1092. * @li var: A mutable Tensor. Has the same type as "var".
  1093. * @li accum: A mutable Tensor. Has the same type as "var".
  1094. */
  1095. REG_OP(ApplyProximalAdagradD)
  1096. .INPUT(var, TensorType::NumberType())
  1097. .INPUT(accum, TensorType::NumberType())
  1098. .INPUT(lr, TensorType::NumberType())
  1099. .INPUT(l1, TensorType::NumberType())
  1100. .INPUT(l2, TensorType::NumberType())
  1101. .INPUT(grad, TensorType::NumberType())
  1102. .OUTPUT(var, TensorType::NumberType())
  1103. .OUTPUT(accum, TensorType::NumberType())
  1104. .ATTR(use_locking, Bool, false)
  1105. .OP_END_FACTORY_REG(ApplyProximalAdagradD)
  1106. /**
  1107. *@brief Updates entries in 'var' and 'accum' according to the Proximal Adagrad algorithm.\ n
  1108. * Compared with op ApplyProximalAdagrad, an additional index tensor is input,
  1109. * Only the indices into the first dimensions of "var" and "accum" are updated.
  1110. *@par Inputs:
  1111. * Seven inputs, including:\n
  1112. * @li var: A mutable Tensor.\n
  1113. * TensorType::NumberType(). Should be a Variable Tensor.
  1114. * @li accum: A mutable Tensor of the same type as "var".\n
  1115. * Should be a Variable Tensor.
  1116. * @li lr: A Tensor of the same type as "var".\n
  1117. * Scaling factor. Must be a scalar.
  1118. * @li l1: A Tensor of the same type as "var".\n
  1119. * L1 regulariation. Must be a scalar.
  1120. * @li l2: A Tensor of the same type as "var".\n
  1121. * L2 regulariation. Must be a scalar.
  1122. * @li grad: A Tensor. Has the same type as "var". \n
  1123. * The gradient.
  1124. * @li indices: A vector of indices into the first dimension of "var" and "accum".\n
  1125. * TensorType::IndexNumberType().
  1126. *@par Attributes:
  1127. *use_locking: An optional bool. Defaults to "False".\n
  1128. * If "True", updating of the var and accum tensors will be protected by a lock; \n
  1129. * If "False", the behavior is undefined, but may exhibit less contention.
  1130. *@par Outputs:
  1131. *var: A mutable Tensor. Has the same type as "var".
  1132. */
  1133. REG_OP(SparseApplyProximalAdagrad)
  1134. .INPUT(var, TensorType::NumberType())
  1135. .INPUT(accum, TensorType::NumberType())
  1136. .INPUT(lr, TensorType::NumberType())
  1137. .INPUT(l1, TensorType::NumberType())
  1138. .INPUT(l2, TensorType::NumberType())
  1139. .INPUT(grad, TensorType::NumberType())
  1140. .INPUT(indices, TensorType::IndexNumberType())
  1141. .OUTPUT(var, TensorType::NumberType())
  1142. .ATTR(use_locking, Bool, false)
  1143. .OP_END_FACTORY_REG(SparseApplyProximalAdagrad)
  1144. /**
  1145. *@brief Updates entries in 'var' and 'accum' according to the Proximal Adagrad algorithm.\ n
  1146. * Compared with op ApplyProximalAdagrad, an additional index tensor is input,
  1147. * Only the indices into the first dimensions of "var" and "accum" are updated.
  1148. *@par Inputs:
  1149. * Seven inputs, including:\n
  1150. * @li var: A mutable Tensor.\n
  1151. * TensorType::NumberType(). Should be a Variable Tensor.
  1152. * @li accum: A mutable Tensor of the same type as "var".\n
  1153. * Should be a Variable Tensor.
  1154. * @li lr: A Tensor of the same type as "var".\n
  1155. * Scaling factor. Must be a scalar.
  1156. * @li l1: A Tensor of the same type as "var".\n
  1157. * L1 regulariation. Must be a scalar.
  1158. * @li l2: A Tensor of the same type as "var".\n
  1159. * L2 regulariation. Must be a scalar.
  1160. * @li grad: A Tensor. Has the same type as "var". \n
  1161. * The gradient.
  1162. * @li indices: A vector of indices into the first dimension of "var" and "accum".\n
  1163. * TensorType::IndexNumberType().
  1164. *@par Attributes:
  1165. *use_locking: An optional bool. Defaults to "False".\n
  1166. * If "True", updating of the var and accum tensors will be protected by a lock; \n
  1167. * If "False", the behavior is undefined, but may exhibit less contention.
  1168. *@par Outputs:
  1169. *@li var: A mutable Tensor. Has the same type as "var".
  1170. *@li accum: A mutable Tensor. Has the same type as "var".
  1171. */
  1172. REG_OP(SparseApplyProximalAdagradD)
  1173. .INPUT(var, TensorType::NumberType())
  1174. .INPUT(accum, TensorType::NumberType())
  1175. .INPUT(lr, TensorType::NumberType())
  1176. .INPUT(l1, TensorType::NumberType())
  1177. .INPUT(l2, TensorType::NumberType())
  1178. .INPUT(grad, TensorType::NumberType())
  1179. .INPUT(indices, TensorType::IndexNumberType())
  1180. .OUTPUT(var, TensorType::NumberType())
  1181. .OUTPUT(accum, TensorType::NumberType())
  1182. .ATTR(use_locking, Bool, false)
  1183. .OP_END_FACTORY_REG(SparseApplyProximalAdagradD)
  1184. /**
  1185. *@brief Updates "var" according to the Ftrl-proximal scheme.
  1186. *@par Inputs:
  1187. *Eight inputs, including:
  1188. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1189. * Should be a Variable Tensor.
  1190. * @li accum: A mutable Tensor of the same type as "var".
  1191. * Should be a Variable Tensor.
  1192. * @li linear: A mutable Tensor of the same type as "var".
  1193. * Should be a Variable Tensor.
  1194. * @li grad: A Tensor of the same type as "var", for the gradient.
  1195. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1196. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1197. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1198. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1199. *@par Attributes:
  1200. *use_locking: An optional bool. Defaults to "False".
  1201. * If "True", updating of the "var" and "accum" tensors will be
  1202. * protected by a lock; otherwise the behavior is undefined,
  1203. * but may exhibit less contention.
  1204. *@par Outputs:
  1205. *var: A mutable Tensor. Has the same type as "var".
  1206. */
  1207. REG_OP(ApplyFtrl)
  1208. .INPUT(var, TensorType::NumberType())
  1209. .INPUT(accum, TensorType::NumberType())
  1210. .INPUT(linear, TensorType::NumberType())
  1211. .INPUT(grad, TensorType::NumberType())
  1212. .INPUT(lr, TensorType::NumberType())
  1213. .INPUT(l1, TensorType::NumberType())
  1214. .INPUT(l2, TensorType::NumberType())
  1215. .INPUT(lr_power, TensorType::NumberType())
  1216. .OUTPUT(var, TensorType::NumberType())
  1217. .ATTR(use_locking, Bool, false)
  1218. .OP_END_FACTORY_REG(ApplyFtrl)
  1219. /**
  1220. *@brief Updates "var" according to the Ftrl-proximal scheme.
  1221. *@par Inputs:
  1222. *Eight inputs, including:
  1223. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1224. * Should be a Variable Tensor.
  1225. * @li accum: A mutable Tensor of the same type as "var".
  1226. * Should be a Variable Tensor.
  1227. * @li linear: A mutable Tensor of the same type as "var".
  1228. * Should be a Variable Tensor.
  1229. * @li grad: A Tensor of the same type as "var", for the gradient.
  1230. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1231. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1232. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1233. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1234. *@par Attributes:
  1235. *use_locking: An optional bool. Defaults to "False".
  1236. * If "True", updating of the "var" and "accum" tensors will be
  1237. * protected by a lock; otherwise the behavior is undefined,
  1238. * but may exhibit less contention.
  1239. *@par Outputs:
  1240. *@li var: A mutable Tensor. Has the same type as "var".
  1241. *@li accum: A mutable Tensor. Has the same type as "accum".
  1242. *@li linear: A mutable Tensor. Has the same type as "linear".
  1243. */
  1244. REG_OP(ApplyFtrlD)
  1245. .INPUT(var, TensorType::NumberType())
  1246. .INPUT(accum, TensorType::NumberType())
  1247. .INPUT(linear, TensorType::NumberType())
  1248. .INPUT(grad, TensorType::NumberType())
  1249. .INPUT(lr, TensorType::NumberType())
  1250. .INPUT(l1, TensorType::NumberType())
  1251. .INPUT(l2, TensorType::NumberType())
  1252. .INPUT(lr_power, TensorType::NumberType())
  1253. .OUTPUT(var, TensorType::NumberType())
  1254. .OUTPUT(accum, TensorType::NumberType())
  1255. .OUTPUT(linear, TensorType::NumberType())
  1256. .ATTR(use_locking, Bool, false)
  1257. .OP_END_FACTORY_REG(ApplyFtrlD)
  1258. /**
  1259. *@brief Update "var" according to the Ftrl-proximal scheme.
  1260. *@par Inputs:
  1261. *Nine inputs, including:
  1262. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1263. * Should be a Variable Tensor.
  1264. * @li accum: A mutable Tensor of the same type as "var".
  1265. * Should be a Variable Tensor.
  1266. * @li linear: A mutable Tensor of the same type as "var".
  1267. * Should be a Variable Tensor.
  1268. * @li grad: A Tensor of the same type as "var", for the gradient.
  1269. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1270. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1271. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1272. * @li l2_shrinkage: A Tensor of the same type as "var".
  1273. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1274. *@par Attributes:
  1275. *use_locking: An optional bool. Defaults to "False".
  1276. * If "True", updating of the "var" and "accum" tensors will be
  1277. * protected by a lock; otherwise the behavior is undefined,
  1278. * but may exhibit less contention.
  1279. *@par Outputs:
  1280. *var: A mutable Tensor. Has the same type as "var".
  1281. */
  1282. REG_OP(ApplyFtrlV2)
  1283. .INPUT(var, TensorType::NumberType())
  1284. .INPUT(accum, TensorType::NumberType())
  1285. .INPUT(linear, TensorType::NumberType())
  1286. .INPUT(grad, TensorType::NumberType())
  1287. .INPUT(lr, TensorType::NumberType())
  1288. .INPUT(l1, TensorType::NumberType())
  1289. .INPUT(l2, TensorType::NumberType())
  1290. .INPUT(l2_shrinkage, TensorType::NumberType())
  1291. .INPUT(lr_power, TensorType::NumberType())
  1292. .OUTPUT(var, TensorType::NumberType())
  1293. .ATTR(use_locking, Bool, false)
  1294. .OP_END_FACTORY_REG(ApplyFtrlV2)
  1295. /**
  1296. *@brief Update "var" according to the Ftrl-proximal scheme.
  1297. *@par Inputs:
  1298. *Nine inputs, including:
  1299. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1300. * Should be a Variable Tensor.
  1301. * @li accum: A mutable Tensor of the same type as "var".
  1302. * Should be a Variable Tensor.
  1303. * @li linear: A mutable Tensor of the same type as "var".
  1304. * Should be a Variable Tensor.
  1305. * @li grad: A Tensor of the same type as "var", for the gradient.
  1306. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1307. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1308. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1309. * @li l2_shrinkage: A Tensor of the same type as "var".
  1310. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1311. *@par Attributes:
  1312. *use_locking: An optional bool. Defaults to "False".
  1313. * If "True", updating of the "var" and "accum" tensors will be
  1314. * protected by a lock; otherwise the behavior is undefined,
  1315. * but may exhibit less contention.
  1316. *@par Outputs:
  1317. *var: A mutable Tensor. Has the same type as "var".
  1318. *accum: A mutable Tensor. Has the same type as "accum".
  1319. *linear: A mutable Tensor. Has the same type as "linear".
  1320. */
  1321. REG_OP(ApplyFtrlV2D)
  1322. .INPUT(var, TensorType::NumberType())
  1323. .INPUT(accum, TensorType::NumberType())
  1324. .INPUT(linear, TensorType::NumberType())
  1325. .INPUT(grad, TensorType::NumberType())
  1326. .INPUT(lr, TensorType::NumberType())
  1327. .INPUT(l1, TensorType::NumberType())
  1328. .INPUT(l2, TensorType::NumberType())
  1329. .INPUT(l2_shrinkage, TensorType::NumberType())
  1330. .INPUT(lr_power, TensorType::NumberType())
  1331. .OUTPUT(var, TensorType::NumberType())
  1332. .OUTPUT(accum, TensorType::NumberType())
  1333. .OUTPUT(linear, TensorType::NumberType())
  1334. .ATTR(use_locking, Bool, false)
  1335. .OP_END_FACTORY_REG(ApplyFtrlV2D)
  1336. /**
  1337. *@brief Updates "var" according to the Adam algorithm.\n
  1338. * lr_t <- text{learning\_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)\n
  1339. * m_t <- beta_1 * m_{t-1} + (1 - beta_1) * g\n
  1340. * v_t <- max(beta2 * v{t-1}, abs(g))\n
  1341. * variable <- variable - lr_t * m_t / (sqrt{v_t} + epsilon)
  1342. *
  1343. *@attention Constraints:\n
  1344. * *The input tensors must have the same shape.*
  1345. *
  1346. *@par Inputs:
  1347. *@li var: A mutable Tensor of the type TensorType::NumberType().
  1348. * Should be from a Variable().
  1349. *@li m: A mutable Tensor of the same type as "var".
  1350. * Should be from a Variable().
  1351. *@li v: A mutable Tensor of the same type as "var".
  1352. * Should be from a Variable().
  1353. *@li beta1_power: A scalar of the same type as "var".
  1354. *@li beta2_power: A scalar of the same type as "var".
  1355. *@li lr: learning_rate. A scalar of the same type as "var".
  1356. *@li beta1: A scalar of the same type as "var".
  1357. *@li beta2: A scalar of the same type as "var".
  1358. *@li epsilon: A scalar of the same type as "var".
  1359. *@li grad: A Tensor of the same type as "var", for the gradient.
  1360. *
  1361. *@par Attributes:\n
  1362. *@li use_locking: An optional bool. Defaults to "False".
  1363. * If "True", updating of the "var", m", and "v" tensors will be protected
  1364. * by a lock; otherwise the behavior is undefined, but may exhibit less
  1365. * contention.
  1366. *@li use_nesterov: An optional bool. Defaults to "False".
  1367. If "True", uses the nesterov update.
  1368. *
  1369. *@par Outputs:
  1370. * var: A mutable Tensor. Has the same type as intput "var".
  1371. */
  1372. REG_OP(ApplyAdam)
  1373. .INPUT(var, TensorType::NumberType())
  1374. .INPUT(m, TensorType::NumberType())
  1375. .INPUT(v, TensorType::NumberType())
  1376. .INPUT(beta1_power, TensorType::NumberType())
  1377. .INPUT(beta2_power, TensorType::NumberType())
  1378. .INPUT(lr, TensorType::NumberType())
  1379. .INPUT(beta1, TensorType::NumberType())
  1380. .INPUT(beta2, TensorType::NumberType())
  1381. .INPUT(epsilon, TensorType::NumberType())
  1382. .INPUT(grad, TensorType::NumberType())
  1383. .OUTPUT(var, TensorType::NumberType())
  1384. .ATTR(use_locking, Bool, false)
  1385. .ATTR(use_nesterov, Bool, false)
  1386. .OP_END_FACTORY_REG(ApplyAdam)
  1387. /**
  1388. *@brief Updates "var" according to the Adam algorithm.\n
  1389. * lr_t <- text{learning\_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)\n
  1390. * m_t <- beta_1 * m_{t-1} + (1 - beta_1) * g\n
  1391. * v_t <- max(beta2 * v{t-1}, abs(g))\n
  1392. * variable <- variable - lr_t * m_t / (sqrt{v_t} + epsilon)
  1393. *
  1394. *@attention Constraints:\n
  1395. * *The input tensors must have the same shape.*
  1396. *
  1397. *@par Inputs:
  1398. *@li var: A mutable Tensor of the type TensorType::NumberType().
  1399. * Should be from a Variable().
  1400. *@li m: A mutable Tensor of the same type as "var".
  1401. * Should be from a Variable().
  1402. *@li v: A mutable Tensor of the same type as "var".
  1403. * Should be from a Variable().
  1404. *@li beta1_power: A scalar of the same type as "var".
  1405. *@li beta2_power: A scalar of the same type as "var".
  1406. *@li lr: learning_rate. A scalar of the same type as "var".
  1407. *@li beta1: A scalar of the same type as "var".
  1408. *@li beta2: A scalar of the same type as "var".
  1409. *@li epsilon: A scalar of the same type as "var".
  1410. *@li grad: A Tensor of the same type as "var", for the gradient.
  1411. *
  1412. *@par Attributes:\n
  1413. *@li use_locking: An optional bool. Defaults to "False".
  1414. * If "True", updating of the "var", m", and "v" tensors will be protected
  1415. * by a lock; otherwise the behavior is undefined, but may exhibit less
  1416. * contention.
  1417. *@li use_nesterov: An optional bool. Defaults to "False".
  1418. If "True", uses the nesterov update.
  1419. *
  1420. *@par Outputs:
  1421. *@li var: A mutable tensor. Has the same type as input "var".
  1422. *@li m: A mutable tensor. Has the same type as input "m".
  1423. *@li v: A mutable tensor. Has the same type as input "v".
  1424. */
  1425. REG_OP(ApplyAdamD)
  1426. .INPUT(var, TensorType::NumberType())
  1427. .INPUT(m, TensorType::NumberType())
  1428. .INPUT(v, TensorType::NumberType())
  1429. .INPUT(beta1_power, TensorType::NumberType())
  1430. .INPUT(beta2_power, TensorType::NumberType())
  1431. .INPUT(lr, TensorType::NumberType())
  1432. .INPUT(beta1, TensorType::NumberType())
  1433. .INPUT(beta2, TensorType::NumberType())
  1434. .INPUT(epsilon, TensorType::NumberType())
  1435. .INPUT(grad, TensorType::NumberType())
  1436. .OUTPUT(var, TensorType::NumberType())
  1437. .OUTPUT(m, TensorType::NumberType())
  1438. .OUTPUT(v, TensorType::NumberType())
  1439. .ATTR(use_locking, Bool, false)
  1440. .ATTR(use_nesterov, Bool, false)
  1441. .OP_END_FACTORY_REG(ApplyAdamD)
  1442. /**
  1443. *@brief Updates "var" according to the proximal adadelta scheme.
  1444. *@par Inputs:
  1445. *Seven inputs, including:
  1446. * @li var: A mutable Tensor of type TensorType::NumberType().
  1447. * Should be a Variable Tensor.
  1448. * @li accum: A mutable Tensor of the same type as "var".
  1449. * Should be a Variable Tensor.
  1450. * @li accum_update: A mutable Tensor of the same type as "var".
  1451. * Should be a Variable Tensor.
  1452. * @li lr: A scalar of the same type as "var", for the scaling factor.
  1453. * @li rho: A scalar of the same type as "var", for the decay factor.
  1454. * @li epsilon: A scalar of the same type as "var", for the constant factor.
  1455. * @li grad: A Tensor of the same type as "var", for the gradient.
  1456. *@par Attributes:
  1457. *use_locking: An optional bool. Defaults to "False".
  1458. * If "True", updating of the "var", "accum" and "accum_update" tensors will be
  1459. * protected by a lock; otherwise the behavior is undefined,
  1460. * but may exhibit less contention.
  1461. *@par Outputs:
  1462. *var: A mutable Tensor. Has the same type as "var".
  1463. */
  1464. REG_OP(ApplyAdadelta)
  1465. .INPUT(var, TensorType::NumberType())
  1466. .INPUT(accum, TensorType::NumberType())
  1467. .INPUT(accum_update, TensorType::NumberType())
  1468. .INPUT(lr, TensorType::NumberType())
  1469. .INPUT(rho, TensorType::NumberType())
  1470. .INPUT(epsilon, TensorType::NumberType())
  1471. .INPUT(grad, TensorType::NumberType())
  1472. .OUTPUT(var, TensorType::NumberType())
  1473. .ATTR(use_locking, Bool, false)
  1474. .OP_END_FACTORY_REG(ApplyAdadelta)
  1475. /**
  1476. *@brief Updates "var" according to the proximal adadelta scheme.
  1477. *@par Inputs:
  1478. *Seven inputs, including:
  1479. * @li var: A mutable Tensor of type TensorType::NumberType().
  1480. * Should be a Variable Tensor.
  1481. * @li accum: A mutable Tensor of the same type as "var".
  1482. * Should be a Variable Tensor.
  1483. * @li accum_update: A mutable Tensor of the same type as "var".
  1484. * Should be a Variable Tensor.
  1485. * @li lr: A scalar of the same type as "var", for the scaling factor.
  1486. * @li rho: A scalar of the same type as "var", for the decay factor.
  1487. * @li epsilon: A scalar of the same type as "var", for the constant factor.
  1488. * @li grad: A Tensor of the same type as "var", for the gradient.
  1489. *@par Attributes:
  1490. *use_locking: An optional bool. Defaults to "False".
  1491. * If "True", updating of the "var", "accum" and "accum_update" tensors will be
  1492. * protected by a lock; otherwise the behavior is undefined,
  1493. * but may exhibit less contention.
  1494. *@par Outputs:
  1495. *@li var: A mutable Tensor. Has the same type as "var".
  1496. *@li accum: A mutable Tensor. Has the same type as "var".
  1497. *@li accum_update: A mutable Tensor. Has the same type as "var".
  1498. */
  1499. REG_OP(ApplyAdadeltaD)
  1500. .INPUT(var, TensorType::NumberType())
  1501. .INPUT(accum, TensorType::NumberType())
  1502. .INPUT(accum_update, TensorType::NumberType())
  1503. .INPUT(lr, TensorType::NumberType())
  1504. .INPUT(rho, TensorType::NumberType())
  1505. .INPUT(epsilon, TensorType::NumberType())
  1506. .INPUT(grad, TensorType::NumberType())
  1507. .OUTPUT(var, TensorType::NumberType())
  1508. .OUTPUT(accum, TensorType::NumberType())
  1509. .OUTPUT(accum_update, TensorType::NumberType())
  1510. .ATTR(use_locking, Bool, false)
  1511. .OP_END_FACTORY_REG(ApplyAdadeltaD)
  1512. /**
  1513. * @brief Updates "var" according to the ApplyMomentum algorithm. \n
  1514. * accum = accum * momentum + x1 * x2 \n
  1515. * if use_nesterov is True: \n
  1516. * var -= x1 * x2 * lr + accum * momentum * lr \n
  1517. * else:\n
  1518. * var -= accum * lr
  1519. *
  1520. * @par Inputs:
  1521. * Six inputs, including:
  1522. * @li var: A mutable Tensor has type TensorType::NumberType().
  1523. * Should be a Variable Tensor.
  1524. * @li accum: A mutable Tensor has the same type as "var".
  1525. * Should be a Variable Tensor.
  1526. * @li lr: A scalar has the same type as "var", for the scaling factor.
  1527. * @li x1: A Tensor has type TensorType::NumberType().
  1528. * @li momentum: A scalar has the same type as "var".
  1529. * @li x2: A scalar has the same type as "var".
  1530. *
  1531. * @par Attributes:
  1532. * Two attributes, including:
  1533. * @li use_nesterov: An optional bool. Defaults to "False". \n
  1534. * If True, the tensor passed to compute grad will be var - lr * momentum * accum, \n
  1535. * so in the end, the var you get is actually var - lr * momentum * accum.
  1536. * @li use_locking: An optional bool. Defaults to "False". \n
  1537. * If "True", updating of the "var", m", and "v" tensors will be protected \n
  1538. * by a lock; otherwise the behavior is undefined, but may exhibit less contention.
  1539. *
  1540. * @par Outputs:
  1541. * Two outputs, including:
  1542. * @li var: A mutable Tensor has the same type as "var".
  1543. * @li accum: A mutable Tensor has the same type as "var".
  1544. */
  1545. REG_OP(FusedMulApplyMomentum)
  1546. .INPUT(var, TensorType::NumberType())
  1547. .INPUT(accum, TensorType::NumberType())
  1548. .INPUT(lr, TensorType::NumberType())
  1549. .INPUT(x1, TensorType::NumberType())
  1550. .INPUT(momentum, TensorType::NumberType())
  1551. .INPUT(x2, TensorType::NumberType())
  1552. .OUTPUT(var, TensorType::NumberType())
  1553. .OUTPUT(accum, TensorType::NumberType())
  1554. .ATTR(use_nesterov, Bool, false)
  1555. .ATTR(use_locking, Bool, false)
  1556. .OP_END_FACTORY_REG(FusedMulApplyMomentum)
  1557. /**
  1558. * @brief Updates "var" according to the ApplyMomentum algorithm. \n
  1559. * accum = accum * momentum + x1 * x2 \n
  1560. * if use_nesterov is True: \n
  1561. * var -= x1 * x2 * lr + accum * momentum * lr \n
  1562. * else: \n
  1563. * var -= accum * lr
  1564. *
  1565. * @par Inputs:
  1566. * Seven inputs, including:
  1567. * @li var: A mutable Tensor of type float32.
  1568. * Should be a Variable Tensor.
  1569. * @li accum: A mutable Tensor has type TensorType::NumberType().
  1570. * Should be a Variable Tensor.
  1571. * @li lr: A scalar has the same type as "accum", for the scaling factor.
  1572. * @li x1: A Tensor has the same type as "accum".
  1573. * @li momentum: A scalar has the same type as "accum".
  1574. * @li x2: A scalar has the same type as "accum".
  1575. * @li var_copy: A Tensor has type float16.
  1576. *
  1577. * @par Attributes:
  1578. * Two Attributes, including:
  1579. * @li use_nesterov: An optional bool. Defaults to "False". \n
  1580. * If True, the tensor passed to compute grad will be var - lr * momentum * accum, \n
  1581. * so in the end, the var you get is actually var - lr * momentum * accum.
  1582. * @li use_locking: An optional bool. Defaults to "False". \n
  1583. * If "True", updating of the "var", m", and "v" tensors will be protected \n
  1584. * by a lock; otherwise the behavior is undefined, but may exhibit less contention.
  1585. *
  1586. * @par Outputs:
  1587. * Three outputs, including:
  1588. * @li var: A Tensor has the type float32.
  1589. * @li var_copy: A Tensor has the type float16.
  1590. * @li accum: A Tensor has the same type as input "accum".
  1591. */
  1592. REG_OP(FusedMulApplyMomentumExtern)
  1593. .INPUT(var, TensorType(DT_FLOAT))
  1594. .INPUT(accum, TensorType::NumberType())
  1595. .INPUT(lr, TensorType::NumberType())
  1596. .INPUT(x1, TensorType::NumberType())
  1597. .INPUT(momentum, TensorType::NumberType())
  1598. .INPUT(x2, TensorType::NumberType())
  1599. .INPUT(var_copy, TensorType(DT_FLOAT16))
  1600. .OUTPUT(var, TensorType(DT_FLOAT))
  1601. .OUTPUT(var_copy, TensorType(DT_FLOAT16))
  1602. .OUTPUT(accum, TensorType::NumberType())
  1603. .ATTR(use_nesterov, Bool, false)
  1604. .ATTR(use_locking, Bool, false)
  1605. .OP_END_FACTORY_REG(FusedMulApplyMomentumExtern)
  1606. /**
  1607. *@brief Update "g" according to the LARS algorithm.
  1608. *@par Inputs:
  1609. *Four inputs, including:
  1610. * @li w: A Tensor. Must be of type TensorType::DT_FLOAT.
  1611. * @li g: A Tensor of the same type and shape as "w".
  1612. * @li weight_decay: A Tensor of the same type as "w", Must be a scalar.
  1613. * @li learning_rate: A Tensor of the same type as "w", Must be a scalar.
  1614. *@par Attributes:
  1615. *Three Attributes, including:
  1616. * @li hyperpara: An optional float. Default value is 0.001.
  1617. * @li epsilon: An optional float. Default value is 1e-5.Avoid denominator is 0.
  1618. * @li use_clip: An optional bool. Defaults to "False".\n
  1619. * If "True", updating learning rate.
  1620. *@par Outputs:
  1621. *g_new: Tensor of the same type as "w".
  1622. */
  1623. REG_OP(LarsV2)
  1624. .INPUT(w, TensorType(DT_FLOAT))
  1625. .INPUT(g, TensorType(DT_FLOAT))
  1626. .INPUT(weight_decay, TensorType(DT_FLOAT))
  1627. .INPUT(learning_rate, TensorType(DT_FLOAT))
  1628. .OUTPUT(g_new, TensorType(DT_FLOAT))
  1629. .ATTR(hyperpara, Float, 0.001)
  1630. .ATTR(epsilon, Float, 0.00001)
  1631. .ATTR(use_clip, Bool, false)
  1632. .OP_END_FACTORY_REG(LarsV2)
  1633. /**
  1634. *@brief Update "g" according to the LARS algorithm.
  1635. *@par Inputs:
  1636. *Six inputs, including:
  1637. * @li w: A Tensor. Must be of type TensorType::DT_FLOAT.
  1638. * @li g: A Tensor of the same type and shape as "w".
  1639. * @li w_square_sum: A Tensor of square_sum(w), has the same type as "w", Must be a scalar.
  1640. * @li g_square_sum: A Tensor of square(g), has the same type as "w", Must be a scalar.
  1641. * @li weight_decay: A Tensor of the same type as "w", Must be a scalar.
  1642. * @li learning_rate: A Tensor of the same type as "w", Must be a scalar.
  1643. *@par Attributes:
  1644. *Three Attributes, including:
  1645. * @li hyperpara: An optional float. Default value is 0.001.
  1646. * @li epsilon: An optional float. Default value is 1e-5.Avoid denominator is 0.
  1647. * @li use_clip: An optional bool. Defaults to "False".\n
  1648. * If "True", updating learning rate.
  1649. *@par Outputs:
  1650. *g_new: Tensor of the same type as "w".
  1651. */
  1652. REG_OP(LarsV2Update)
  1653. .INPUT(w, TensorType(DT_FLOAT))
  1654. .INPUT(g, TensorType(DT_FLOAT))
  1655. .INPUT(w_square_sum, TensorType(DT_FLOAT))
  1656. .INPUT(g_square_sum, TensorType(DT_FLOAT))
  1657. .INPUT(weight_decay, TensorType(DT_FLOAT))
  1658. .INPUT(learning_rate, TensorType(DT_FLOAT))
  1659. .OUTPUT(g_new, TensorType(DT_FLOAT))
  1660. .ATTR(hyperpara, Float, 0.001)
  1661. .ATTR(epsilon, Float, 0.00001)
  1662. .ATTR(use_clip, Bool, false)
  1663. .OP_END_FACTORY_REG(LarsV2Update)
  1664. /**
  1665. * @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme.
  1666. * @par Inputs:
  1667. * Nine inputs, including:
  1668. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1669. * Should be a Variable Tensor.
  1670. * @li accum: A mutable Tensor of the same type as "var".
  1671. * Should be a Variable Tensor.
  1672. * @li linear: A mutable Tensor of the same type as "var".
  1673. * Should be a Variable Tensor.
  1674. * @li grad: A Tensor of the same type as "var", for the gradient.
  1675. * @li indices: A vector of indices into the first dimension of var and accum.
  1676. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1677. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1678. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1679. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1680. * @par Attributes:
  1681. * use_locking: An optional bool. Defaults to "False".
  1682. * If "True", updating of the "var" and "accum" tensors will be
  1683. * protected by a lock; otherwise the behavior is undefined,
  1684. * but may exhibit less contention.
  1685. * @par Outputs:
  1686. * var: A Tensor. Has the same type and format as input "var".
  1687. */
  1688. REG_OP(SparseApplyFtrl)
  1689. .INPUT(var, TensorType({DT_FLOAT}))
  1690. .INPUT(accum, TensorType({DT_FLOAT}))
  1691. .INPUT(linear, TensorType({DT_FLOAT}))
  1692. .INPUT(grad, TensorType({DT_FLOAT}))
  1693. .INPUT(indices, TensorType({DT_INT32}))
  1694. .INPUT(lr, TensorType({DT_FLOAT}))
  1695. .INPUT(l1, TensorType({DT_FLOAT}))
  1696. .INPUT(l2, TensorType({DT_FLOAT}))
  1697. .INPUT(lr_power, TensorType({DT_FLOAT}))
  1698. .OUTPUT(var, TensorType({DT_FLOAT}))
  1699. .ATTR(use_locking, Bool, false)
  1700. .OP_END_FACTORY_REG(SparseApplyFtrl)
  1701. /**
  1702. * @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme.
  1703. * @par Inputs:
  1704. * Five inputs, including:
  1705. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1706. * Should be a Variable Tensor.
  1707. * @li accum: A mutable Tensor of the same type as "var".
  1708. * Should be a Variable Tensor.
  1709. * @li linear: A mutable Tensor of the same type as "var".
  1710. * Should be a Variable Tensor.
  1711. * @li grad: A Tensor of the same type as "var", for the gradient.
  1712. * @li indices: A vector of indices into the first dimension of var and accum.
  1713. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1714. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1715. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1716. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1717. * @par Attributes:
  1718. * use_locking: An optional bool. Defaults to "False".
  1719. * If "True", updating of the "var" and "accum" tensors will be
  1720. * protected by a lock; otherwise the behavior is undefined,
  1721. * but may exhibit less contention.
  1722. * @par Outputs:
  1723. * @li var: A Tensor. Has the same type and format as input "var".
  1724. * @li accum: A Tensor. Has the same type and format as input "accum".
  1725. * @li linear: A Tensor. Has the same type and format as input "linear".
  1726. */
  1727. REG_OP(SparseApplyFtrlD)
  1728. .INPUT(var, TensorType({DT_FLOAT}))
  1729. .INPUT(accum, TensorType({DT_FLOAT}))
  1730. .INPUT(linear, TensorType({DT_FLOAT}))
  1731. .INPUT(grad, TensorType({DT_FLOAT}))
  1732. .INPUT(indices, TensorType({DT_INT32}))
  1733. .OUTPUT(var, TensorType({DT_FLOAT}))
  1734. .OUTPUT(accum, TensorType({DT_FLOAT}))
  1735. .OUTPUT(linear, TensorType({DT_FLOAT}))
  1736. .REQUIRED_ATTR(lr, Float)
  1737. .REQUIRED_ATTR(l1, Float)
  1738. .REQUIRED_ATTR(l2, Float)
  1739. .REQUIRED_ATTR(lr_power, Float)
  1740. .ATTR(use_locking, Bool, false)
  1741. .OP_END_FACTORY_REG(SparseApplyFtrlD)
  1742. /**
  1743. * @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme.
  1744. * That is for rows we have grad for, we update var, accum and linear
  1745. * @par Inputs:
  1746. * Ten inputs, including:
  1747. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1748. * Should be a Variable Tensor.
  1749. * @li accum: A mutable Tensor of the same type as "var".
  1750. * Should be a Variable Tensor.
  1751. * @li linear: A mutable Tensor of the same type as "var".
  1752. * Should be a Variable Tensor.
  1753. * @li grad: A Tensor of the same type as "var", for the gradient.
  1754. * @li indices: A vector of indices into the first dimension of var and accum.
  1755. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1756. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1757. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1758. * @li l2_shrinkage: A Tensor of the same type as "var", L2 shrinkage regulariation. Must be a scalar.
  1759. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1760. * @par Attributes:
  1761. * use_locking: An optional bool. Defaults to "False".
  1762. * If "True", updating of the "var" and "accum" tensors will be
  1763. * rotected by a lock; otherwise the behavior is undefined,
  1764. * but may exhibit less contention.
  1765. * @par Outputs:
  1766. * var: A Tensor. Has the same type and format as input "var".
  1767. */
  1768. REG_OP(SparseApplyFtrlV2)
  1769. .INPUT(var, TensorType({DT_FLOAT}))
  1770. .INPUT(accum, TensorType({DT_FLOAT}))
  1771. .INPUT(linear, TensorType({DT_FLOAT}))
  1772. .INPUT(grad, TensorType({DT_FLOAT}))
  1773. .INPUT(indices, TensorType({DT_INT32}))
  1774. .INPUT(lr, TensorType({DT_FLOAT}))
  1775. .INPUT(l1, TensorType({DT_FLOAT}))
  1776. .INPUT(l2, TensorType({DT_FLOAT}))
  1777. .INPUT(l2_shrinkage, TensorType({DT_FLOAT}))
  1778. .INPUT(lr_power, TensorType({DT_FLOAT}))
  1779. .OUTPUT(var, TensorType({DT_FLOAT}))
  1780. .ATTR(use_locking, Bool, false)
  1781. .OP_END_FACTORY_REG(SparseApplyFtrlV2)
  1782. /**
  1783. * @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme.
  1784. * That is for rows we have grad for, we update var, accum and linear
  1785. * @par Inputs:
  1786. * Five inputs, including:
  1787. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1788. * Should be a Variable Tensor.
  1789. * @li accum: A mutable Tensor of the same type as "var".
  1790. * Should be a Variable Tensor.
  1791. * @li linear: A mutable Tensor of the same type as "var".
  1792. * Should be a Variable Tensor.
  1793. * @li grad: A Tensor of the same type as "var", for the gradient.
  1794. * @li indices: A vector of indices into the first dimension of var and accum.
  1795. * @par Attributes:
  1796. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1797. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1798. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1799. * @li l2_shrinkage: A Tensor of the same type as "var", L2 shrinkage regulariation. Must be a scalar.
  1800. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1801. * @li use_locking: An optional bool. Defaults to "False".
  1802. * If "True", updating of the "var" and "accum" tensors will be
  1803. * rotected by a lock; otherwise the behavior is undefined,
  1804. * but may exhibit less contention.
  1805. * @par Outputs:
  1806. * @li var: A Tensor. Has the same type and format as input "var".
  1807. * @li accum: A Tensor. Has the same type and format as input "accum".
  1808. * @li linear: A Tensor. Has the same type and format as input "linear".
  1809. */
  1810. REG_OP(SparseApplyFtrlV2D)
  1811. .INPUT(var, TensorType({DT_FLOAT}))
  1812. .INPUT(accum, TensorType({DT_FLOAT}))
  1813. .INPUT(linear, TensorType({DT_FLOAT}))
  1814. .INPUT(grad, TensorType({DT_FLOAT}))
  1815. .INPUT(indices, TensorType({DT_INT32}))
  1816. .OUTPUT(var, TensorType({DT_FLOAT}))
  1817. .OUTPUT(accum, TensorType({DT_FLOAT}))
  1818. .OUTPUT(linear, TensorType({DT_FLOAT}))
  1819. .REQUIRED_ATTR(lr, Float)
  1820. .REQUIRED_ATTR(l1, Float)
  1821. .REQUIRED_ATTR(l2, Float)
  1822. .REQUIRED_ATTR(l2_shrinkage, Float)
  1823. .REQUIRED_ATTR(lr_power, Float)
  1824. .ATTR(use_locking, Bool, false)
  1825. .OP_END_FACTORY_REG(SparseApplyFtrlV2D)
  1826. /**
  1827. * @brief Updates "var" in specified index according to the RMSProp algorithm.
  1828. * mean_square = decay * mean_square + (1-decay) * gradient ** 2\n
  1829. * Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n
  1830. * ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n
  1831. * mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n
  1832. * var <- var - mom\n
  1833. *
  1834. * @par Inputs:
  1835. * @li var: A mutable tensor. Must be one of the data types defined in\n
  1836. * TensorType::NumberType(). Should be from a Variable().
  1837. * @li ms: A mutable tensor. Must have the same type as "var". Should be from a
  1838. * Variable().
  1839. * @li mom: A mutable tensor. Must have the same type as "var". Should be from a
  1840. * Variable().
  1841. * @li lr: A scalar. Must have the same type as "var".
  1842. * @li rho: A scalar. Must have the same type as "var".
  1843. * @li momentum: A scalar. Must have the same type as "var".
  1844. * @li epsilon: A scalar. Must have the same type as "var".
  1845. * @li grad: A tensor, specifying the gradient.
  1846. * @li indices: A vector of indices into the first dimension of var, mom and ms.
  1847. *
  1848. * @par Attributes:
  1849. * use_locking: An optional "bool". Defaults to "False". If "True", updating of
  1850. * the "var", "ms", and "mom" tensors will be protected by a lock; otherwise the
  1851. * behavior is undefined, but may exhibit less contention.
  1852. *
  1853. * @par Outputs:
  1854. * var: A mutable tensor. Has the same type as input "var".
  1855. *
  1856. * @attention Constraints:
  1857. * @li Note that in this sparse implementation, "ms" and "mom" will not update
  1858. * in iterations during which "grad" is 0.
  1859. * @li The input tensors "var", "ms", "mom" must have the same shape.
  1860. *
  1861. */
  1862. REG_OP(SparseApplyRMSProp)
  1863. .INPUT(var, TensorType::NumberType())
  1864. .INPUT(ms, TensorType::NumberType())
  1865. .INPUT(mom, TensorType::NumberType())
  1866. .INPUT(lr, TensorType::NumberType())
  1867. .INPUT(rho, TensorType::NumberType())
  1868. .INPUT(momentum, TensorType::NumberType())
  1869. .INPUT(epsilon, TensorType::NumberType())
  1870. .INPUT(grad, TensorType::NumberType())
  1871. .INPUT(indices, TensorType::IndexNumberType())
  1872. .OUTPUT(var, TensorType::NumberType())
  1873. .ATTR(use_locking, Bool, false)
  1874. .OP_END_FACTORY_REG(SparseApplyRMSProp)
  1875. /**
  1876. * @brief Updates "var" in specified index according to the RMSProp algorithm.
  1877. * a const input will be considered as an attribute.\n
  1878. * mean_square = decay * mean_square + (1-decay) * gradient ** 2\n
  1879. * Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n
  1880. * ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n
  1881. * mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n
  1882. * var <- var - mom
  1883. *
  1884. * @par Inputs:
  1885. * @li var: A mutable tensor. Must be one of the data types defined in
  1886. * TensorType::NumberType(). Should be from a Variable().
  1887. * @li ms: A mutable tensor. Must have the same type as "var". Should be from a
  1888. * Variable().
  1889. * @li mom: A mutable tensor. Must have the same type as "var". Should be from a
  1890. * Variable().
  1891. * @li lr: A scalar. Must have the same type as "var".
  1892. * @li grad: A tensor, specifying the gradient.
  1893. *
  1894. * @par Attributes:
  1895. * @li use_locking: An optional "bool". Defaults to "False". If "True",
  1896. * updating of the "var", "ms", and "mom" tensors will be protected by a lock;
  1897. * otherwise the behavior is undefined, but may exhibit less contention.
  1898. * @li rho: A required scalar. Must have the same type as "var".
  1899. * @li momentum: A required scalar. Must have the same type as "var".
  1900. * @li epsilon: A required scalar. Must have the same type as "var".
  1901. *
  1902. * @par Outputs:
  1903. * @li var: A mutable tensor. Must have the same type as input "var".
  1904. * @li ms: A mutable tensor. Must have the same type as input "ms".
  1905. * @li mom: A mutable tensor. Must have the same type as input "mom".
  1906. *
  1907. * @attention Constraints:
  1908. * @li Note that in this sparse implementation, "ms" and "mom" will not update
  1909. * in iterations during which "grad" is 0.
  1910. * @li The input tensors "var", "ms" and "mom" must have the same shape.
  1911. */
  1912. REG_OP(SparseApplyRMSPropD)
  1913. .INPUT(var, TensorType::NumberType())
  1914. .INPUT(ms, TensorType::NumberType())
  1915. .INPUT(mom, TensorType::NumberType())
  1916. .INPUT(lr, TensorType::NumberType())
  1917. .INPUT(grad, TensorType::NumberType())
  1918. .INPUT(indices, TensorType::IndexNumberType())
  1919. .OUTPUT(var, TensorType::NumberType())
  1920. .OUTPUT(ms, TensorType::NumberType())
  1921. .OUTPUT(mom, TensorType::NumberType())
  1922. .REQUIRED_ATTR(rho, Float)
  1923. .REQUIRED_ATTR(momentum, Float)
  1924. .REQUIRED_ATTR(epsilon, Float)
  1925. .ATTR(use_locking, Bool, false)
  1926. .OP_END_FACTORY_REG(SparseApplyRMSPropD)
  1927. /**
  1928. *@brief Clean memory of workspace list.
  1929. *@par Attributes:
  1930. * @li automic_add_mem_size: sizes of workspaces.
  1931. */
  1932. REG_OP(AtomicAddrClean)
  1933. .ATTR(automic_add_mem_size, ListInt, {})
  1934. .OP_END_FACTORY_REG(AtomicAddrClean)
  1935. } // namespace ge
  1936. #endif // GE_OP_TRAINING_OPS_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示