You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

nn_training_ops.h 95 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_OP_TRAINING_OPS_H
  17. #define GE_OP_TRAINING_OPS_H
  18. #include "graph/operator_reg.h"
  19. namespace ge {
  20. /**
  21. *@brief Updates "var" according to the AdaMax algorithm.\n
  22. * t-1 mean previous period.
  23. * m_t <- beta1 * m{t-1} + (1 - beta1) * grad\n
  24. * v_t <- max(beta2 * v{t-1}, abs(grad))\n
  25. * var <- var - lr / (1 - beta1^t) * m_t / (v_t + epsilon)
  26. *
  27. *@attention Constraints:\n
  28. * the input tensors must have the same shape.
  29. *
  30. *@par Inputs:
  31. *@li var: A mutable tensor. Must be one of the following types: TensorType::NumberType().
  32. * Should be from a Variable().
  33. *@li m: A mutable tensor. Has the same type as "var".
  34. * Should be from a Variable().
  35. *@li v: A mutable tensor. Has the same type as "var".
  36. * Should be from a Variable().
  37. *@li beta1_power: A scalar. Has the same type as "var".
  38. *@li lr: learning_rate. A scalar. Has the same type as "var".
  39. *@li beta1: A scalar. Has the same type as "var".
  40. *@li beta2: A scalar. Has the same type as "var".
  41. *@li epsilon: A scalar. Has the same type as "var".
  42. *@li grad: A tensor for the gradient. Has the same type as "var".
  43. *
  44. *@par Attributes:\n
  45. * use_locking: An optional bool. Defaults to "False".
  46. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  47. * by a lock; otherwise the behavior is undefined, but may exhibit less
  48. * contention.
  49. *
  50. *@par Outputs:
  51. * var: A mutable tensor. Has the same type as input "var".
  52. *
  53. */
  54. REG_OP(ApplyAdaMax)
  55. .INPUT(var, TensorType::NumberType())
  56. .INPUT(m, TensorType::NumberType())
  57. .INPUT(v, TensorType::NumberType())
  58. .INPUT(beta1_power, TensorType::NumberType())
  59. .INPUT(lr, TensorType::NumberType())
  60. .INPUT(beta1, TensorType::NumberType())
  61. .INPUT(beta2, TensorType::NumberType())
  62. .INPUT(epsilon, TensorType::NumberType())
  63. .INPUT(grad, TensorType::NumberType())
  64. .OUTPUT(var, TensorType::NumberType())
  65. .ATTR(use_locking, Bool, false)
  66. .OP_END_FACTORY_REG(ApplyAdaMax)
  67. /**
  68. *@brief Updates "var" according to the AdaMax algorithm.\n
  69. * t-1 mean previous period.
  70. * m_t <- beta1 * m{t-1} + (1 - beta1) * grad\n
  71. * v_t <- max(beta2 * v{t-1}, abs(grad))\n
  72. * var <- var - lr / (1 - beta1^t) * m_t / (v_t + epsilon)
  73. *
  74. *@attention Constraints:\n
  75. * the input tensors must have the same shape.
  76. *
  77. *@par Inputs:
  78. *@li var: A mutable tensor. Must be one of the following types: TensorType::NumberType().
  79. * Should be from a Variable().
  80. *@li m: A mutable tensor. Has the same type as "var".
  81. * Should be from a Variable().
  82. *@li v: A mutable tensor. Has the same type as "var".
  83. * Should be from a Variable().
  84. *@li beta1_power: A scalar. Has the same type as "var".
  85. *@li lr: learning_rate. A scalar. Has the same type as "var".
  86. *@li beta1: A scalar. Has the same type as "var".
  87. *@li beta2: A scalar. Has the same type as "var".
  88. *@li epsilon: A scalar. Has the same type as "var".
  89. *@li grad: A tensor for the gradient. Has the same type as "var".
  90. *
  91. *@par Attributes:\n
  92. * use_locking: An optional bool. Defaults to "False".
  93. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  94. * by a lock; otherwise the behavior is undefined, but may exhibit less
  95. * contention.
  96. *
  97. *@par Outputs:
  98. * var: A mutable tensor. Has the same type as input "var".
  99. *
  100. *
  101. */
  102. REG_OP(ApplyAdaMaxD)
  103. .INPUT(var, TensorType::NumberType())
  104. .INPUT(m, TensorType::NumberType())
  105. .INPUT(v, TensorType::NumberType())
  106. .INPUT(beta1_power, TensorType::NumberType())
  107. .INPUT(lr, TensorType::NumberType())
  108. .INPUT(beta1, TensorType::NumberType())
  109. .INPUT(beta2, TensorType::NumberType())
  110. .INPUT(epsilon, TensorType::NumberType())
  111. .INPUT(grad, TensorType::NumberType())
  112. .OUTPUT(var, TensorType::NumberType())
  113. .OUTPUT(m, TensorType::NumberType())
  114. .OUTPUT(v, TensorType::NumberType())
  115. .ATTR(use_locking, Bool, false)
  116. .OP_END_FACTORY_REG(ApplyAdaMaxD)
  117. /**
  118. *@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme.
  119. *@par Inputs:
  120. * Five inputs, including:
  121. *@li var: An NCHW, NHWC, or ND Tensor of type float32.
  122. *@li accum: An NCHW, NHWC, or ND Tensor of type float32.
  123. *@li lr: An NCHW, NHWC, or ND Tensor of type float32.
  124. *@li grad: An NCHW, NHWC, or ND Tensor of type float32.
  125. *@li indices: An NCHW, NHWC, or ND Tensor of type float32.
  126. *@par Attributes:
  127. *@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
  128. *@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False".
  129. *@par Outputs:
  130. *var: A Tensor. Has the same type and format as input "var".
  131. */
  132. REG_OP(SparseApplyAdagrad)
  133. .INPUT(var, TensorType({DT_FLOAT}))
  134. .INPUT(accum, TensorType({DT_FLOAT}))
  135. .INPUT(lr, TensorType({DT_FLOAT}))
  136. .INPUT(grad, TensorType({DT_FLOAT}))
  137. .INPUT(indices, TensorType({DT_INT32}))
  138. .OUTPUT(var, TensorType({DT_FLOAT}))
  139. .ATTR(use_locking, Bool, false)
  140. .ATTR(update_slots, Bool, true)
  141. .OP_END_FACTORY_REG(SparseApplyAdagrad)
  142. /**
  143. *@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme.
  144. *@par Inputs:
  145. * Four inputs, including:
  146. *@li var: An NCHW, NHWC, or ND Tensor of type float32.
  147. *@li accum: An NCHW, NHWC, or ND Tensor of type float32.
  148. *@li grad: An NCHW, NHWC, or ND Tensor of type float32.
  149. *@li indices: An NCHW, NHWC, or ND Tensor of type int32.
  150. *@par Attributes:
  151. *@li lr: Required, used for computation.
  152. *@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
  153. *@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False".
  154. *@par Outputs:
  155. *@li var: A Tensor. Has the same type and format as input "var".
  156. *@li accum: A Tensor. Has the same type and format as input "var".
  157. */
  158. REG_OP(SparseApplyAdagradD)
  159. .INPUT(var, TensorType({DT_FLOAT}))
  160. .INPUT(accum, TensorType({DT_FLOAT}))
  161. .INPUT(grad, TensorType({DT_FLOAT}))
  162. .INPUT(indices, TensorType({DT_INT32}))
  163. .OUTPUT(var, TensorType({DT_FLOAT}))
  164. .OUTPUT(accum, TensorType({DT_FLOAT}))
  165. .REQUIRED_ATTR(lr, Float)
  166. .ATTR(use_locking, Bool, false)
  167. .ATTR(update_slots, Bool, true)
  168. .OP_END_FACTORY_REG(SparseApplyAdagradD)
  169. /**
  170. *@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme.
  171. *@par Inputs:
  172. *Six inputs, including:
  173. *@li var: An NCHW, NHWC, or ND Tensor of type float32.
  174. *@li accum: An NCHW, NHWC, or ND Tensor of type float32.
  175. *@li lr: An NCHW, NHWC, or ND Tensor of type float32.
  176. *@li epsilon: An NCHW, NHWC, or ND Tensor of type float32.
  177. *@li grad: An NCHW, NHWC, or ND Tensor of type float32.
  178. *@li indices: An NCHW, NHWC, or ND Tensor of type float32.
  179. *@par Attributes:
  180. *@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
  181. *@li update_slots: An optional bool. Defaults to "True". If "False", the computation logic will be different.
  182. *@par Outputs:
  183. *var: A Tensor. Has the same type and format as input "var".
  184. */
  185. REG_OP(SparseApplyAdagradV2)
  186. .INPUT(var, TensorType({DT_FLOAT}))
  187. .INPUT(accum, TensorType({DT_FLOAT}))
  188. .INPUT(lr, TensorType({DT_FLOAT}))
  189. .INPUT(epsilon, TensorType({DT_FLOAT}))
  190. .INPUT(grad, TensorType({DT_FLOAT}))
  191. .INPUT(indices, TensorType({DT_INT32}))
  192. .OUTPUT(var, TensorType({DT_FLOAT}))
  193. .ATTR(use_locking, Bool, false)
  194. .ATTR(update_slots, Bool, true)
  195. .OP_END_FACTORY_REG(SparseApplyAdagradV2)
  196. /**
  197. *@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme.
  198. *@par Inputs:
  199. *Four inputs, including:
  200. *@li var: An NCHW, NHWC, or ND Tensor of type float32.
  201. *@li accum: An NCHW, NHWC, or ND Tensor of type float32.
  202. *@li grad: An NCHW, NHWC, or ND Tensor of type float32.
  203. *@li indices: An NCHW, NHWC, or ND Tensor of type int32.
  204. *@par Attributes:
  205. *@li lr: Required, used for computation.
  206. *@li epsilon: Required, used for computation.
  207. *@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
  208. *@li update_slots: An optional bool. Defaults to "True". If "False", the computation logic will be different.
  209. *@par Outputs:
  210. *@li var: A Tensor. Has the same type and format as input "var".
  211. *@li accum: A Tensor. Has the same type and format as input "accum".
  212. */
  213. REG_OP(SparseApplyAdagradV2D)
  214. .INPUT(var, TensorType({DT_FLOAT}))
  215. .INPUT(accum, TensorType({DT_FLOAT}))
  216. .INPUT(grad, TensorType({DT_FLOAT}))
  217. .INPUT(indices, TensorType({DT_INT32}))
  218. .OUTPUT(var, TensorType({DT_FLOAT}))
  219. .OUTPUT(accum, TensorType({DT_FLOAT}))
  220. .REQUIRED_ATTR(lr, Float)
  221. .REQUIRED_ATTR(epsilon, Float)
  222. .ATTR(use_locking, Bool, false)
  223. .ATTR(update_slots, Bool, true)
  224. .OP_END_FACTORY_REG(SparseApplyAdagradV2D)
  225. /**
  226. *@brief Updates "var" according to the momentum scheme. Set use_nesterov = True if you
  227. * want to use Nesterov momentum.\n
  228. * computing process: \n
  229. * accum = accum * momentum + grad\n
  230. * var -= lr * accum
  231. *
  232. *@attention Constraints:\n
  233. * the input tensors must have the same shape.
  234. *
  235. *@par Inputs:
  236. *@li var: A mutable tensor. Should be from a Variable().
  237. *@li accum: A mutable tensor. Has the same type as "var".
  238. * Should be from a Variable().
  239. *@li lr: A scalar. Has the same type as "var".
  240. *@li grad: A tensor for the gradient. Has the same type as "var".
  241. *
  242. *@par Attributes:
  243. *@li use_nesterov: An optional bool. Defaults to "False".
  244. * If "True", the tensor passed to compute grad will be
  245. * var - lr * momentum * accum, so in the end, the var you get is actually
  246. * var - lr * momentum * accum.
  247. *
  248. *@li use_locking: An optional bool. Defaults to "False".\n
  249. * If "True", updating of the "var", "ms", and "mom" tensors is protected by a lock;
  250. * otherwise the behavior is undefined, but may exhibit less contention.
  251. *
  252. *@par Outputs:
  253. * var: A mutable tensor. Has the same type as input "var".
  254. *
  255. */
  256. REG_OP(ApplyMomentum)
  257. .INPUT(var, TensorType::NumberType())
  258. .INPUT(accum, TensorType::NumberType())
  259. .INPUT(lr, TensorType::NumberType())
  260. .INPUT(grad, TensorType::NumberType())
  261. .INPUT(momentum, TensorType::NumberType())
  262. .OUTPUT(var, TensorType::NumberType())
  263. .ATTR(use_nesterov, Bool, false)
  264. .ATTR(use_locking, Bool, false)
  265. .OP_END_FACTORY_REG(ApplyMomentum)
  266. REG_OP(ApplyMomentumCCE)
  267. .INPUT(var, TensorType::NumberType())
  268. .INPUT(accum, TensorType::NumberType())
  269. .INPUT(lr, TensorType::NumberType())
  270. .INPUT(grad, TensorType::NumberType())
  271. .INPUT(momentum, TensorType::NumberType())
  272. .OUTPUT(var, TensorType::NumberType())
  273. .ATTR(use_nesterov, Bool, false)
  274. .ATTR(use_locking, Bool, false)
  275. .OP_END_FACTORY_REG(ApplyMomentumCCE)
  276. /**
  277. *@brief Updates "var" according to the momentum scheme. Set use_nesterov = True if you
  278. * want to use Nesterov momentum.\n
  279. * computing process: \n
  280. * accum = accum * momentum + grad\n
  281. * var -= lr * accum
  282. *
  283. *@attention Constraints:\n
  284. * the input tensors must have the same shape.
  285. *
  286. *@par Inputs:
  287. *@li var: A mutable tensor. Should be from a Variable().
  288. *@li accum: A mutable tensor. Has the same type as "var".
  289. * Should be from a Variable().
  290. *@li lr: A scalar. Has the same type as "var".
  291. *@li grad: A tensor for the gradient. Has the same type as "var".
  292. *
  293. *@par Attributes:
  294. *@li use_nesterov: An optional bool. Defaults to "False".
  295. * If "True", the tensor passed to compute grad will be
  296. * var - lr * momentum * accum, so in the end, the var you get is actually
  297. * var - lr * momentum * accum.
  298. *
  299. *@li use_locking: An optional bool. Defaults to "False".\n
  300. * If "True", updating of the "var", "ms", and "mom" tensors is protected by a lock;
  301. * otherwise the behavior is undefined, but may exhibit less contention.
  302. *
  303. *@par Outputs:
  304. * var: A mutable tensor. Has the same type as input "var".
  305. * accum: A mutable tensor. Has the same type as input "accum".
  306. *
  307. */
  308. REG_OP(ApplyMomentumD)
  309. .INPUT(var, TensorType::NumberType())
  310. .INPUT(accum, TensorType::NumberType())
  311. .INPUT(lr, TensorType::NumberType())
  312. .INPUT(grad, TensorType::NumberType())
  313. .INPUT(momentum, TensorType::NumberType())
  314. .OUTPUT(var, TensorType::NumberType())
  315. .OUTPUT(accum, TensorType::NumberType())
  316. .ATTR(use_nesterov, Bool, false)
  317. .ATTR(use_locking, Bool, false)
  318. .OP_END_FACTORY_REG(ApplyMomentumD)
  319. /**
  320. *@brief Updates '*var' according to the momentum scheme.
  321. * accum = accum * momentum - grad * lr \n
  322. * if use_nesterov is True: \n
  323. * var += accum * momentum - grad * lr \n
  324. * else: \n
  325. * var += accum
  326. *
  327. *@par Inputs:
  328. *@li var: A mutable tensor. Must be one of the data types defined in
  329. * TensorType::NumberType(). Should be from a Variable().
  330. *@li accum: A mutable tensor. Has the same type as "var". Should be from a
  331. * Variable().
  332. *@li lr: A tensor for the learning rate. Has the same type as "var". Should be
  333. * from a Variable().
  334. *@li grad: A tensor for the gradient. Has the same type as "var". Should be
  335. * from a Variable().
  336. *@li momentum: A scalar. Has the same type as "var".
  337. *
  338. *@par Attributes:
  339. *@li use_nesterov: An optional bool. Defaults to "False".
  340. * If "True", var will be updated by using Nesterov momentum.
  341. *@li use_locking: An optional bool. Defaults to "False".
  342. * If "True", updating of the "var" tensor is protected by a lock;
  343. * otherwise the behavior is undefined, but may exhibit less contention.
  344. *
  345. *@par Outputs:
  346. * var: A mutable tensor. Has the same type as input "var".
  347. *
  348. *@attention Constraints:
  349. * The input tensors must have the same shape.
  350. *
  351. *
  352. */
  353. REG_OP(ApplyKerasMomentum)
  354. .INPUT(var, TensorType::NumberType())
  355. .INPUT(accum, TensorType::NumberType())
  356. .INPUT(lr, TensorType::NumberType())
  357. .INPUT(grad, TensorType::NumberType())
  358. .INPUT(momentum, TensorType::NumberType())
  359. .OUTPUT(var, TensorType::NumberType())
  360. .ATTR(use_locking, Bool, false)
  361. .ATTR(use_nesterov, Bool, false)
  362. .OP_END_FACTORY_REG(ApplyKerasMomentum)
  363. /**
  364. *@brief Updates '*var' according to the momentum scheme.
  365. * accum = accum * momentum - grad * lr \n
  366. * if use_nesterov is True: \n
  367. * var += accum * momentum - grad * lr \n
  368. * else: \n
  369. * var += accum
  370. *
  371. *@par Inputs:
  372. *@li var: A mutable tensor. Must be one of the data types defined in
  373. * TensorType::NumberType(). Should be from a Variable().
  374. *@li accum: A mutable tensor. Has the same type as "var". Should be from a
  375. * Variable().
  376. *@li lr: A tensor for the learning rate. Has the same type as "var". Should be
  377. * from a Variable().
  378. *@li grad: A tensor for the gradient. Has the same type as "var". Should be
  379. * from a Variable().
  380. *@li momentum: A scalar. Has the same type as "var". Should be from a
  381. * Variable().
  382. *
  383. *@par Attributes:
  384. *@li use_nesterov: An optional bool. Defaults to "False".
  385. * If "True", var will be updated by using nesterov momentum
  386. *@li use_locking: An optional bool. Defaults to "False".
  387. * If "True", updating of the "var" tensor is protected by a lock;
  388. * otherwise the behavior is undefined, but may exhibit less contention.
  389. *
  390. *@par Outputs:
  391. *@li var: A mutable tensor. Has the same type as input "var".
  392. *@li accum: A mutable tensor. Has the same type as input "var"
  393. *
  394. *@attention Constraints:
  395. * The input tensors must have the same shape.
  396. *
  397. *
  398. */
  399. REG_OP(ApplyKerasMomentumD)
  400. .INPUT(var, TensorType::NumberType())
  401. .INPUT(accum, TensorType::NumberType())
  402. .INPUT(lr, TensorType::NumberType())
  403. .INPUT(grad, TensorType::NumberType())
  404. .INPUT(momentum, TensorType::NumberType())
  405. .OUTPUT(var, TensorType::NumberType())
  406. .OUTPUT(accum, TensorType::NumberType())
  407. .ATTR(use_locking, Bool, false)
  408. .ATTR(use_nesterov, Bool, false)
  409. .OP_END_FACTORY_REG(ApplyKerasMomentumD)
  410. /**
  411. *@brief Updates '*var' according to the Adam algorithm..
  412. * lr_t := {learning_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)
  413. * m_t := beta_1 * m_{t-1} + (1 - beta_1) * g
  414. * v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g
  415. * vhat_t := max{vhat_{t-1}, v_t}
  416. * variable := variable - lr_t * m_t / (sqrt{vhat_t} + epsilon)
  417. *
  418. *@par Inputs:
  419. *@li var: A mutable tensor. Must be one of the data types defined in
  420. * TensorType::NumberType(). Should be from a Variable().
  421. *@li m: A mutable tensor. Has the same type as "var". Should be from a
  422. * Variable().
  423. *@li v: A mutable tensor. Has the same type as "var". Should be from a
  424. * Variable().
  425. *@li vhat: A mutable tensor. Has the same type as "var". Should be from a
  426. * Variable().
  427. *@li beta1_power: A mutable tensor. Has the same type as "var". Should be from a
  428. * Variable().
  429. *@li beta2_power: A mutable tensor. Has the same type as "var". Should be from a
  430. * Variable().
  431. *@li lr: A tensor for the learning rate. Has the same type as "var". Should be
  432. * from a Variable().
  433. *@li grad: A tensor for the gradient. Has the same type as "var". Should be
  434. * from a Variable().
  435. *
  436. *@par Attributes:
  437. *@li beta1: A scalar. Has the same type as "var".
  438. *@li beta2: A scalar. Has the same type as "var".
  439. *@li epsilon: A scalar. Has the same type as "var".
  440. *@li use_locking: An optional bool. Defaults to "False".
  441. * If "True", updating of the "var" tensor is protected by a lock;
  442. * otherwise the behavior is undefined, but may exhibit less contention.
  443. *
  444. *@par Outputs:
  445. *@li var: A mutable tensor. Has the same type as input "var".
  446. *@li m: A mutable tensor. Has the same type as input "var"
  447. *@li v: A mutable tensor. Has the same type as input "var"
  448. *@li vhat: A mutable tensor. Has the same type as input "var"
  449. *
  450. *@attention Constraints:
  451. * The input tensors must have the same shape.
  452. *
  453. *
  454. */
  455. REG_OP(ApplyAdamWithAmsgradD)
  456. .INPUT(var, TensorType::NumberType())
  457. .INPUT(m, TensorType::NumberType())
  458. .INPUT(v, TensorType::NumberType())
  459. .INPUT(vhat, TensorType::NumberType())
  460. .INPUT(beta1_power, TensorType::NumberType())
  461. .INPUT(beta2_power, TensorType::NumberType())
  462. .INPUT(lr, TensorType::NumberType())
  463. .INPUT(grad, TensorType::NumberType())
  464. .OUTPUT(var, TensorType::NumberType())
  465. .OUTPUT(m, TensorType::NumberType())
  466. .OUTPUT(v, TensorType::NumberType())
  467. .OUTPUT(vhat, TensorType::NumberType())
  468. .REQUIRED_ATTR(beta1, Float)
  469. .REQUIRED_ATTR(beta2, Float)
  470. .REQUIRED_ATTR(epsilon, Float)
  471. .ATTR(use_locking, Bool, false)
  472. .OP_END_FACTORY_REG(ApplyAdamWithAmsgradD)
  473. /**
  474. *@brief Updates '*var' according to the Adam algorithm..
  475. * lr_t := {learning_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)
  476. * m_t := beta_1 * m_{t-1} + (1 - beta_1) * g
  477. * v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g
  478. * vhat_t := max{vhat_{t-1}, v_t}
  479. * variable := variable - lr_t * m_t / (sqrt{vhat_t} + epsilon)
  480. *
  481. *@par Inputs:
  482. *@li var: A mutable tensor. Must be one of the data types defined in
  483. * TensorType::NumberType(). Should be from a Variable().
  484. *@li m: A mutable tensor. Has the same type as "var". Should be from a
  485. * Variable().
  486. *@li v: A mutable tensor. Has the same type as "var". Should be from a
  487. * Variable().
  488. *@li vhat: A mutable tensor. Has the same type as "var". Should be from a
  489. * Variable().
  490. *@li beta1_power: A mutable tensor. Has the same type as "var". Should be from a
  491. * Variable().
  492. *@li beta2_power: A mutable tensor. Has the same type as "var". Should be from a
  493. * Variable().
  494. *@li lr: A tensor for the learning rate. Has the same type as "var". Should be
  495. * from a Variable().
  496. *@li grad: A tensor for the gradient. Has the same type as "var". Should be
  497. * from a Variable().
  498. *
  499. *@par Attributes:
  500. *@li beta1: A scalar. Has the same type as "var".
  501. *@li beta2: A scalar. Has the same type as "var".
  502. *@li epsilon: A scalar. Has the same type as "var".
  503. *@li use_locking: An optional bool. Defaults to "False".
  504. * If "True", updating of the "var" tensor is protected by a lock;
  505. * otherwise the behavior is undefined, but may exhibit less contention.
  506. *
  507. *@par Outputs:
  508. *@li var: A mutable tensor. Has the same type as input "var".
  509. *@li m: A mutable tensor. Has the same type as input "var"
  510. *@li v: A mutable tensor. Has the same type as input "var"
  511. *@li vhat: A mutable tensor. Has the same type as input "var"
  512. *
  513. *@attention Constraints:
  514. * The input tensors must have the same shape.
  515. *
  516. *
  517. */
  518. REG_OP(ApplyAdamWithAmsgrad)
  519. .INPUT(var, TensorType::NumberType())
  520. .INPUT(m, TensorType::NumberType())
  521. .INPUT(v, TensorType::NumberType())
  522. .INPUT(vhat, TensorType::NumberType())
  523. .INPUT(beta1_power, TensorType::NumberType())
  524. .INPUT(beta2_power, TensorType::NumberType())
  525. .INPUT(lr, TensorType::NumberType())
  526. .INPUT(beta1, TensorType::NumberType())
  527. .INPUT(beta2, TensorType::NumberType())
  528. .INPUT(epsilon, TensorType::NumberType())
  529. .INPUT(grad, TensorType::NumberType())
  530. .OUTPUT(var, TensorType::NumberType())
  531. .ATTR(use_locking, Bool, false)
  532. .OP_END_FACTORY_REG(ApplyAdamWithAmsgrad)
  533. /**
  534. *@brief Updates "var" according to the AddSign update.\n
  535. * t-1 mean previous period.
  536. * m_t <- beta1 * m_{t-1} + (1 - beta1) * grad\n
  537. * update <- exp(logbase * sign_decay * sign(grad) * sign(m_t)) * grad\n
  538. * var <- var - lr * update
  539. *
  540. *@attention Constraints:\n
  541. * the input tensors must have the same shape.
  542. *
  543. *@par Inputs:
  544. *@li var: A mutable tensor. Should be from a Variable().
  545. *@li m: A mutable tensor. Has the same type as "var".
  546. * Should be from a Variable().
  547. *@li lr: A scalar. Has the same type as "var".
  548. *@li logbase: A scalar. Has the same type as "var".
  549. *@li sign_decay: A scalar. Has the same type as "var".
  550. *@li beta: A scalar. Has the same type as "var".
  551. *@li grad: A tensor for the gradient. Has the same type as "var".
  552. *
  553. *@par Attributes:
  554. * use_locking: An optional bool. Defaults to "False".
  555. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  556. * by a lock; otherwise the behavior is undefined, but may exhibit less
  557. * contention.
  558. *
  559. *@par Outputs:
  560. * var: A mutable tensor. Has the same type as input "var".
  561. *
  562. */
  563. REG_OP(ApplyPowerSign)
  564. .INPUT(var, TensorType::NumberType())
  565. .INPUT(m, TensorType::NumberType())
  566. .INPUT(lr, TensorType::NumberType())
  567. .INPUT(logbase, TensorType::NumberType())
  568. .INPUT(sign_decay, TensorType::NumberType())
  569. .INPUT(beta, TensorType::NumberType())
  570. .INPUT(grad, TensorType::NumberType())
  571. .OUTPUT(var, TensorType::NumberType())
  572. .ATTR(use_locking, Bool, false)
  573. .OP_END_FACTORY_REG(ApplyPowerSign)
  574. /**
  575. *@brief Updates "var" according to the AddSign update.\n
  576. * t-1 mean previous period.
  577. * m_t <- beta1 * m_{t-1} + (1 - beta1) * grad\n
  578. * update <- exp(logbase * sign_decay * sign(grad) * sign(m_t)) * grad\n
  579. * var <- var - lr * update
  580. *
  581. *@attention Constraints:\n
  582. * the input tensors must have the same shape.
  583. *
  584. *@par Inputs:
  585. *@li var: A mutable tensor. Should be from a Variable().
  586. *@li m: A mutable tensor. Has the same type as "var".
  587. * Should be from a Variable().
  588. *@li lr: A scalar. Has the same type as "var".
  589. *@li logbase: A scalar. Has the same type as "var".
  590. *@li sign_decay: A scalar. Has the same type as "var".
  591. *@li beta: A scalar. Has the same type as "var".
  592. *@li grad: A tensor for the gradient. Has the same type as "var".
  593. *
  594. *@par Attributes:
  595. * use_locking: An optional bool. Defaults to "False".
  596. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  597. * by a lock; otherwise the behavior is undefined, but may exhibit less
  598. * contention.
  599. *
  600. *@par Outputs:
  601. *@li var: A mutable tensor. Has the same type as input "var".
  602. *@li m: A mutable tensor. Has the same type as input "var".
  603. *
  604. *
  605. */
  606. REG_OP(ApplyPowerSignD)
  607. .INPUT(var, TensorType::NumberType())
  608. .INPUT(m, TensorType::NumberType())
  609. .INPUT(lr, TensorType::NumberType())
  610. .INPUT(logbase, TensorType::NumberType())
  611. .INPUT(sign_decay, TensorType::NumberType())
  612. .INPUT(beta, TensorType::NumberType())
  613. .INPUT(grad, TensorType::NumberType())
  614. .OUTPUT(var, TensorType::NumberType())
  615. .OUTPUT(m, TensorType::NumberType())
  616. .ATTR(use_locking, Bool, false)
  617. .OP_END_FACTORY_REG(ApplyPowerSignD)
  618. /**
  619. *@brief Updates "var" as FOBOS algorithm with fixed learning rate.\n
  620. * prox_v = var - alpha * delta\n
  621. * var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
  622. *
  623. *@attention Constraints:\n
  624. * the input tensors must have the same shape.
  625. *
  626. *@par Inputs:
  627. *@li var: A mutable tensor. Should be from a Variable().
  628. *@li alpha: A scalar. Has the same type as "var".
  629. *@li l1: A scalar. Has the same type as "var".
  630. *@li l2: A scalar. Has the same type as "var".
  631. *@li delta: A tensor. Has the same type as "var". The change.
  632. *
  633. *@par Attributes:
  634. * use_locking: An optional bool. Defaults to "False".
  635. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  636. * by a lock; otherwise the behavior is undefined, but may exhibit less
  637. * contention.
  638. *
  639. *@par Outputs:
  640. * var: A mutable tensor. Has the same type as input "var".
  641. *
  642. */
  643. REG_OP(ApplyProximalGradientDescent)
  644. .INPUT(var, TensorType::NumberType())
  645. .INPUT(alpha, TensorType::NumberType())
  646. .INPUT(l1, TensorType::NumberType())
  647. .INPUT(l2, TensorType::NumberType())
  648. .INPUT(delta, TensorType::NumberType())
  649. .OUTPUT(var, TensorType::NumberType())
  650. .ATTR(use_locking, Bool, false)
  651. .OP_END_FACTORY_REG(ApplyProximalGradientDescent)
  652. /**
  653. *@brief Updates "var" according to the AddSign update.
  654. *@par Inputs:
  655. *Seven inputs, including:
  656. * @li var: A mutable Tensor of type TensorType::NumberType().
  657. * Should be a Variable Tensor.
  658. * @li m: A mutable Tensor of the same type as "var".
  659. * Should be a Variable Tensor.
  660. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  661. * @li alpha: A Tensor of the same type as "var". Must be a scalar.
  662. * @li sign_decay: A Tensor of the same type as "var". Must be a scalar.
  663. * @li beta: A Tensor of the same type as "var". Must be a scalar.
  664. * @li grad: A Tensor of the same type as "var", for the gradient.
  665. *@par Attributes:
  666. *use_locking: An optional bool. Defaults to "False".
  667. * If "True", updating of the "var" and "m" tensors will be
  668. * protected by a lock; otherwise the behavior is undefined,
  669. * but may exhibit less contention.
  670. *@par Outputs:
  671. *var: A mutable Tensor. Has the same type as "var".
  672. */
  673. REG_OP(ApplyAddSign)
  674. .INPUT(var, TensorType::NumberType())
  675. .INPUT(m, TensorType::NumberType())
  676. .INPUT(lr, TensorType::NumberType())
  677. .INPUT(alpha, TensorType::NumberType())
  678. .INPUT(sign_decay, TensorType::NumberType())
  679. .INPUT(beta, TensorType::NumberType())
  680. .INPUT(grad, TensorType::NumberType())
  681. .OUTPUT(var, TensorType::NumberType())
  682. .ATTR(use_locking, Bool, false)
  683. .OP_END_FACTORY_REG(ApplyAddSign)
  684. /**
  685. *@brief Updates "var" according to the AddSign update.
  686. *@par Inputs:
  687. *Seven inputs, including:
  688. * @li var: A mutable Tensor of type TensorType::NumberType().
  689. * Should be a Variable Tensor.
  690. * @li m: A mutable Tensor of the same type as "var".
  691. * Should be a Variable Tensor.
  692. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  693. * @li alpha: A Tensor of the same type as "var". Must be a scalar.
  694. * @li sign_decay: A Tensor of the same type as "var". Must be a scalar.
  695. * @li beta: A Tensor of the same type as "var". Must be a scalar.
  696. * @li grad: A Tensor of the same type as "var", for the gradient.
  697. *@par Attributes:
  698. *use_locking: An optional bool. Defaults to "False".
  699. * If "True", updating of the "var" and "m" tensors will be
  700. * protected by a lock; otherwise the behavior is undefined,
  701. * but may exhibit less contention.
  702. *@par Outputs:
  703. *@li var: A mutable Tensor. Has the same type as "var".
  704. *@li m: A mutable Tensor. Has the same type as "m".
  705. */
  706. REG_OP(ApplyAddSignD)
  707. .INPUT(var, TensorType::NumberType())
  708. .INPUT(m, TensorType::NumberType())
  709. .INPUT(lr, TensorType::NumberType())
  710. .INPUT(alpha, TensorType::NumberType())
  711. .INPUT(sign_decay, TensorType::NumberType())
  712. .INPUT(beta, TensorType::NumberType())
  713. .INPUT(grad, TensorType::NumberType())
  714. .OUTPUT(var, TensorType::NumberType())
  715. .OUTPUT(m, TensorType::NumberType())
  716. .ATTR(use_locking, Bool, false)
  717. .OP_END_FACTORY_REG(ApplyAddSignD)
  718. /**
  719. *@brief Updates "var" according to the centered RMSProp algorithm.\n
  720. * The centered RMSProp algorithm uses an estimate of the centered second moment
  721. * (i.e., the variance) for normalization, as opposed to regular RMSProp, which
  722. * uses the (uncentered) second moment. This often helps with training, but is
  723. * slightly more expensive in terms of computation and memory.
  724. *
  725. * t-1 mean previous period.
  726. * mg <- rho * mg{t-1} + (1-rho) * grad\n
  727. * ms <- rho * ms{t-1} + (1-rho) * grad * grad\n
  728. * mom <- momentum * mom{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)\n
  729. * var <- var - mom\n
  730. *
  731. *@attention Constraints:\n
  732. *@li in dense implementation of this algorithm, mg, ms, and mom will
  733. * update even if the grad is zero, but in this sparse implementation, mg, ms,
  734. * and mom will not update in iterations during which the grad is zero.
  735. *@li the input tensors must have the same shape.
  736. *
  737. *@par Inputs:
  738. *@li var: A mutable tensor. Should be from a Variable().
  739. *@li mg: A mutable tensor. Has the same type as "var".
  740. * Should be from a Variable().
  741. *@li ms: A mutable tensor. Has the same type as "var".
  742. * Should be from a Variable().
  743. *@li mom: A mutable tensor. Has the same type as "var".
  744. * Should be from a Variable().
  745. *@li lr: A scalar. Has the same type as "var".
  746. *@li rho: A scalar. Has the same type as "var".
  747. *@li momentum: A tensor. Has the same type as "var".
  748. *@li epsilon: A scalar. Has the same type as "var".
  749. *@li grad: A tensor for the gradient. Has the same type as "var".
  750. *
  751. *@par Attributes:
  752. * use_locking: An optional bool. Defaults to "False".
  753. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  754. * by a lock; otherwise the behavior is undefined, but may exhibit less
  755. * contention.
  756. *
  757. *@par Outputs:
  758. * var: A mutable tensor. Has the same type as input "var".
  759. *
  760. */
  761. REG_OP(ApplyCenteredRMSProp)
  762. .INPUT(var, TensorType::NumberType())
  763. .INPUT(mg, TensorType::NumberType())
  764. .INPUT(ms, TensorType::NumberType())
  765. .INPUT(mom, TensorType::NumberType())
  766. .INPUT(lr, TensorType::NumberType())
  767. .INPUT(rho, TensorType::NumberType())
  768. .INPUT(momentum, TensorType::NumberType())
  769. .INPUT(epsilon, TensorType::NumberType())
  770. .INPUT(grad, TensorType::NumberType())
  771. .OUTPUT(var, TensorType::NumberType())
  772. .ATTR(use_locking, Bool, false)
  773. .OP_END_FACTORY_REG(ApplyCenteredRMSProp)
  774. /**
  775. *@brief Updates "var" according to the centered RMSProp algorithm.\n
  776. * The centered RMSProp algorithm uses an estimate of the centered second moment
  777. * (i.e., the variance) for normalization, as opposed to regular RMSProp, which
  778. * uses the (uncentered) second moment. This often helps with training, but is
  779. * slightly more expensive in terms of computation and memory.
  780. *
  781. * t-1 mean previous period.
  782. * mg <- rho * mg{t-1} + (1-rho) * grad\n
  783. * ms <- rho * ms{t-1} + (1-rho) * grad * grad\n
  784. * mom <- momentum * mom{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)\n
  785. * var <- var - mom\n
  786. *
  787. *@attention Constraints:\n
  788. *@li in dense implementation of this algorithm, mg, ms, and mom will
  789. * update even if the grad is zero, but in this sparse implementation, mg, ms,
  790. * and mom will not update in iterations during which the grad is zero.
  791. *@li the input tensors must have the same shape.
  792. *
  793. *@par Inputs:
  794. *@li var: A mutable tensor. Should be from a Variable().
  795. *@li mg: A mutable tensor. Has the same type as "var".
  796. * Should be from a Variable().
  797. *@li ms: A mutable tensor. Has the same type as "var".
  798. * Should be from a Variable().
  799. *@li mom: A mutable tensor. Has the same type as "var".
  800. * Should be from a Variable().
  801. *@li lr: A scalar. Has the same type as "var".
  802. *@li rho: A scalar. Has the same type as "var".
  803. *@li momentum: A tensor. Has the same type as "var".
  804. *@li epsilon: A scalar. Has the same type as "var".
  805. *@li grad: A tensor for the gradient. Has the same type as "var".
  806. *
  807. *@par Attributes:
  808. * use_locking: An optional bool. Defaults to "False".
  809. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  810. * by a lock; otherwise the behavior is undefined, but may exhibit less
  811. * contention.
  812. *
  813. *@par Outputs:
  814. *@li var: A mutable Tensor. Has the same type as "var".
  815. *@li mg: A mutable Tensor. Has the same type as "mg".
  816. *@li ms: A mutable Tensor. Has the same type as "ms".
  817. *@li mom: A mutable Tensor. Has the same type as "mom".
  818. *
  819. */
  820. REG_OP(ApplyCenteredRMSPropD)
  821. .INPUT(var, TensorType::NumberType())
  822. .INPUT(mg, TensorType::NumberType())
  823. .INPUT(ms, TensorType::NumberType())
  824. .INPUT(mom, TensorType::NumberType())
  825. .INPUT(lr, TensorType::NumberType())
  826. .INPUT(rho, TensorType::NumberType())
  827. .INPUT(momentum, TensorType::NumberType())
  828. .INPUT(epsilon, TensorType::NumberType())
  829. .INPUT(grad, TensorType::NumberType())
  830. .OUTPUT(var, TensorType::NumberType())
  831. .OUTPUT(mg, TensorType::NumberType())
  832. .OUTPUT(ms, TensorType::NumberType())
  833. .OUTPUT(mom, TensorType::NumberType())
  834. .ATTR(use_locking, Bool, false)
  835. .OP_END_FACTORY_REG(ApplyCenteredRMSPropD)
  836. /**
  837. *@brief Updates "var" by subtracting 'alpha' * 'delta' from it.\n
  838. * var -= delta * alpha
  839. *
  840. *@attention Constraints:\n
  841. * the input tensors must have the same shape.
  842. *
  843. *@par Inputs:
  844. *@li var: A mutable tensor. Should be from a Variable().
  845. *@li alpha: A scalar. Has the same type as "var".
  846. *@li delta: A tensor for the change. Has the same type as "var".
  847. *
  848. *@par Attributes:
  849. * use_locking: An optional bool. Defaults to "False".
  850. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  851. * by a lock; otherwise the behavior is undefined, but may exhibit less
  852. * contention.
  853. *
  854. *@par Outputs:
  855. * var: A mutable tensor. Has the same type as input "var".
  856. *
  857. */
  858. REG_OP(ApplyGradientDescent)
  859. .INPUT(var, TensorType::NumberType())
  860. .INPUT(alpha, TensorType::NumberType())
  861. .INPUT(delta, TensorType::NumberType())
  862. .OUTPUT(var, TensorType::NumberType())
  863. .ATTR(use_locking, Bool, false)
  864. .OP_END_FACTORY_REG(ApplyGradientDescent)
  865. /**
  866. *@brief Updates "var" according to the adagrad scheme.\n
  867. * accum += grad * grad\n
  868. * var -= lr * grad * (1 / sqrt(accum))
  869. *
  870. *@attention Constraints:\n
  871. * the input tensors must have the same shape.
  872. *
  873. *@par Inputs:
  874. *@li var: A mutable tensor. Should be from a Variable().
  875. *@li accum: A mutable tensor. Has the same type as "var".
  876. * Should be from a Variable().
  877. *@li lr: A scalar. Has the same type as "var".
  878. *@li grad: A tensor for the gradient. Has the same type as "var".
  879. *
  880. *@par Attributes:
  881. * use_locking: An optional bool. Defaults to "False".
  882. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  883. * by a lock; otherwise the behavior is undefined, but may exhibit less
  884. * contention.
  885. *
  886. *@par Outputs:
  887. * var: A mutable tensor. Has the same type as input "var".
  888. *
  889. */
  890. REG_OP(ApplyAdagrad)
  891. .INPUT(var, TensorType::NumberType())
  892. .INPUT(accum, TensorType::NumberType())
  893. .INPUT(lr, TensorType::NumberType())
  894. .INPUT(grad, TensorType::NumberType())
  895. .OUTPUT(var, TensorType::NumberType())
  896. .ATTR(update_slots, Bool, true)
  897. .ATTR(use_locking, Bool, false)
  898. .OP_END_FACTORY_REG(ApplyAdagrad)
  899. /**
  900. *@brief Updates "var" according to the adagrad scheme.\n
  901. * accum += grad * grad\n
  902. * var -= lr * grad * (1 / sqrt(accum))
  903. *
  904. *@attention Constraints:\n
  905. * the input tensors must have the same shape.
  906. *
  907. *@par Inputs:
  908. *@li var: A mutable tensor. Should be from a Variable().
  909. *@li accum: A mutable tensor. Has the same type as "var".
  910. * Should be from a Variable().
  911. *@li lr: A scalar. Has the same type as "var".
  912. *@li grad: A tensor for the gradient. Has the same type as "var".
  913. *
  914. *@par Attributes:
  915. * use_locking: An optional bool. Defaults to "False".
  916. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  917. * by a lock; otherwise the behavior is undefined, but may exhibit less
  918. * contention.
  919. *
  920. *@par Outputs:
  921. *@li var: A mutable tensor. Has the same type as input "var".
  922. *@li accum: A mutable tensor. Has the same type as input "var".
  923. *
  924. *
  925. */
  926. REG_OP(ApplyAdagradD)
  927. .INPUT(var, TensorType::NumberType())
  928. .INPUT(accum, TensorType::NumberType())
  929. .INPUT(lr, TensorType::NumberType())
  930. .INPUT(grad, TensorType::NumberType())
  931. .OUTPUT(var, TensorType::NumberType())
  932. .OUTPUT(accum, TensorType::NumberType())
  933. .ATTR(update_slots, Bool, true)
  934. .ATTR(use_locking, Bool, false)
  935. .OP_END_FACTORY_REG(ApplyAdagradD)
  936. /**
  937. * @brief Updates "var" according to the adagradv2 scheme.
  938. * accum += grad * grad \n
  939. * var -= lr * grad * (1 / sqrt(accum) + epsilon)
  940. *
  941. * @par Inputs:
  942. * @li var: A mutable tensor. Must be one of the data types defined in
  943. * TensorType::NumberType(). Should be from a Variable().
  944. * @li accum: A mutable tensor. Has the same type as "var". Should be from a
  945. * Variable().
  946. * @li lr: A tensor for the learning rate. Has the same type as "var". Should be
  947. * from a Variable().
  948. * @li grad: A tensor for the gradient. Has the same type as "var". Should be
  949. * from a Variable().
  950. * @li epsilon: A scalar. Has the same type as "var".
  951. *
  952. * @par Attributes:
  953. * @li update_slots: An optional bool. Defaults to "True".
  954. * If "True", "accum" will be updated
  955. * @li use_locking: An optional bool. Defaults to "False".
  956. * If "True", updating of the "var" tensor is protected by a lock;
  957. * otherwise the behavior is undefined, but may exhibit less contention.
  958. *
  959. * @par Outputs:
  960. * var: A mutable tensor. Has the same type as input "var".
  961. *
  962. * @attention Constraints:
  963. * The input tensors must have the same shape.
  964. *
  965. */
  966. REG_OP(ApplyAdagradV2)
  967. .INPUT(var, TensorType::NumberType())
  968. .INPUT(accum, TensorType::NumberType())
  969. .INPUT(lr, TensorType::NumberType())
  970. .INPUT(epsilon, TensorType::NumberType())
  971. .INPUT(grad, TensorType::NumberType())
  972. .OUTPUT(var, TensorType::NumberType())
  973. .ATTR(update_slots, Bool, true)
  974. .ATTR(use_locking, Bool, false)
  975. .OP_END_FACTORY_REG(ApplyAdagradV2)
  976. /**
  977. * @brief Updates "var" according to the adagradv2 scheme.
  978. * accum += grad * grad \n
  979. * var -= lr * grad * (1 / sqrt(accum) + epsilon)
  980. *
  981. * @par Inputs:
  982. * @li var: A mutable tensor. Must be one of the data types defined in
  983. * TensorType::NumberType(). Should be from a Variable().
  984. * @li accum: A mutable tensor. Has the same type as "var". Should be from a
  985. * Variable().
  986. * @li lr: A tensor for the learning rate. Has the same type as "var". Should be
  987. * from a Variable().
  988. * @li grad: A tensor for the gradient. Has the same type as "var". Should be
  989. * from a Variable().
  990. *
  991. * @par Attributes:
  992. * @li epsilon: A scalar. Has the same type as "var".
  993. * @li update_slots: An optional bool. Defaults to "True".
  994. * If "True", "accum" will be updated
  995. * @li use_locking: An optional bool. Defaults to "False".
  996. * If "True", updating of the "var" tensor is protected by a lock;
  997. * otherwise the behavior is undefined, but may exhibit less contention.
  998. *
  999. * @par Outputs:
  1000. * var: A mutable tensor. Has the same type as input "var".
  1001. *
  1002. * @attention Constraints:
  1003. * The input tensors must have the same shape.
  1004. *
  1005. */
  1006. REG_OP(ApplyAdagradV2D)
  1007. .INPUT(var, TensorType::NumberType())
  1008. .INPUT(accum, TensorType::NumberType())
  1009. .INPUT(lr, TensorType::NumberType())
  1010. .INPUT(grad, TensorType::NumberType())
  1011. .OUTPUT(var, TensorType::NumberType())
  1012. .OUTPUT(accum, TensorType::NumberType())
  1013. .REQUIRED_ATTR(epsilon, Float)
  1014. .ATTR(update_slots, Bool, true)
  1015. .ATTR(use_locking, Bool, false)
  1016. .OP_END_FACTORY_REG(ApplyAdagradV2D)
  1017. /**
  1018. *@brief Updates "var" according to the proximal adagrad scheme.
  1019. *@par Inputs:
  1020. *Eight inputs, including:
  1021. * @li var: A mutable Tensor. Must be one of the following types:
  1022. * TensorType::NumberType(). Should be a Variable Tensor.
  1023. * @li gradient_accumulator: A mutable Tensor. Must have the same
  1024. * type as "var". Should be a Variable Tensor.
  1025. * @li gradient_squared_accumulator: A mutable Tensor of the same type as "var".
  1026. * Should be a Variable Tensor.
  1027. * @li grad: A Tensor of the same type as "var", for the gradient.
  1028. * @li lr: A Tensor of the same type as "var".
  1029. * Scaling factor. Must be a scalar.
  1030. * @li l1: A Tensor of the same type as "var".
  1031. * L1 regulariation. Must be a scalar.
  1032. * @li l2: A Tensor of the same type as "var".
  1033. * L2 regulariation. Must be a scalar.
  1034. * @li global_step: A Tensor of type int32 or int64.
  1035. * Training step number. Must be a scalar.
  1036. *@par Attributes:
  1037. *use_locking: An optional bool. Defaults to "False".
  1038. * If "True", updating of the var and accum tensors will be
  1039. * protected by a lock; otherwise the behavior is undefined,
  1040. * but may exhibit less contention.
  1041. *@par Outputs:
  1042. *var: A mutable Tensor. Has the same type as "var".
  1043. */
  1044. REG_OP(ApplyAdagradDA)
  1045. .INPUT(var, TensorType::NumberType())
  1046. .INPUT(gradient_accumulator, TensorType::NumberType())
  1047. .INPUT(gradient_squared_accumulator, TensorType::NumberType())
  1048. .INPUT(grad, TensorType::NumberType())
  1049. .INPUT(lr, TensorType::NumberType())
  1050. .INPUT(l1, TensorType::NumberType())
  1051. .INPUT(l2, TensorType::NumberType())
  1052. .INPUT(global_step, TensorType({DT_INT32, DT_INT64}))
  1053. .OUTPUT(var, TensorType::NumberType())
  1054. .ATTR(use_locking, Bool, false)
  1055. .OP_END_FACTORY_REG(ApplyAdagradDA)
  1056. /**
  1057. *@brief Updates "var" according to the proximal adagrad scheme.
  1058. *@par Inputs:
  1059. *Eight inputs, including:
  1060. * @li var: A mutable Tensor. Must be one of the following types:
  1061. * TensorType::NumberType(). Should be a Variable Tensor.
  1062. * @li gradient_accumulator: A mutable Tensor. Must have the same
  1063. * type as "var". Should be a Variable Tensor.
  1064. * @li gradient_squared_accumulator: A mutable Tensor of the same type as "var".
  1065. * Should be a Variable Tensor.
  1066. * @li grad: A Tensor of the same type as "var", for the gradient.
  1067. * @li lr: A Tensor of the same type as "var".
  1068. * Scaling factor. Must be a scalar.
  1069. * @li l1: A Tensor of the same type as "var".
  1070. * L1 regulariation. Must be a scalar.
  1071. * @li l2: A Tensor of the same type as "var".
  1072. * L2 regulariation. Must be a scalar.
  1073. * @li global_step: A Tensor of type int32 or int64.
  1074. * Training step number. Must be a scalar.
  1075. *@par Attributes:
  1076. *use_locking: An optional bool. Defaults to "False".
  1077. * If "True", updating of the var and accum tensors will be
  1078. * protected by a lock; otherwise the behavior is undefined,
  1079. * but may exhibit less contention.
  1080. *@par Outputs:
  1081. *var: A mutable Tensor. Has the same type as "var".
  1082. *gradient_accumulator: A mutable Tensor. Has the same type as "var".
  1083. *gradient_squared_accumulator: A mutable Tensor. Has the same type as "var".
  1084. */
  1085. REG_OP(ApplyAdagradDAD)
  1086. .INPUT(var, TensorType::NumberType())
  1087. .INPUT(gradient_accumulator, TensorType::NumberType())
  1088. .INPUT(gradient_squared_accumulator, TensorType::NumberType())
  1089. .INPUT(grad, TensorType::NumberType())
  1090. .INPUT(lr, TensorType::NumberType())
  1091. .INPUT(l1, TensorType::NumberType())
  1092. .INPUT(l2, TensorType::NumberType())
  1093. .INPUT(global_step, TensorType({DT_INT32, DT_INT64}))
  1094. .OUTPUT(var, TensorType::NumberType())
  1095. .OUTPUT(gradient_accumulator, TensorType::NumberType())
  1096. .OUTPUT(gradient_squared_accumulator, TensorType::NumberType())
  1097. .ATTR(use_locking, Bool, false)
  1098. .OP_END_FACTORY_REG(ApplyAdagradDAD)
  1099. /**
  1100. *@brief Returns the dimension index in the destination data format given the one in
  1101. * the source data format.
  1102. *
  1103. *@par Inputs:
  1104. * x: A tensor of type int32 or int64.
  1105. * A Tensor with each element as a dimension index in source data format.
  1106. * Must be in the range [-4, 4).
  1107. *
  1108. *@par Attributes:
  1109. *@li src_format: An optional string. Defaults to NHWC.
  1110. * source data format.
  1111. *@li dst_format: An optional string. Defaults to NCHW.
  1112. * destination data format.
  1113. *
  1114. *@par Outputs:
  1115. * y: A tensor. Has the same type as "x".
  1116. *
  1117. */
  1118. REG_OP(DataFormatDimMap)
  1119. .INPUT(x, TensorType::IndexNumberType())
  1120. .ATTR(src_format, String, "NHWC")
  1121. .ATTR(dst_format, String, "NCHW")
  1122. .OUTPUT(y, TensorType::IndexNumberType())
  1123. .OP_END_FACTORY_REG(DataFormatDimMap)
  1124. /**
  1125. * @brief Implements stochastic gradient descent (optionally with momentum).\n
  1126. * Nesterov momentum is based on the formula from
  1127. * On the importance of initialization and momentum in deep learning.\n
  1128. * @par Inputs:
  1129. * @li parameters: A mutable tensor of type float16 or float32.\n
  1130. * Specifies the iterable of parameters to optimize or dicts defining parameter
  1131. * groups.
  1132. * @li gradient: A tensor of type float16 or float32.\n
  1133. * Specifies the gradient of training step.
  1134. * @li learning_rate: A tensor of type float16 or float32.\n
  1135. * Specifies the learing_rate of training step.
  1136. * @li accum: A tensor of type float16 or float32.
  1137. * Specifies the velocity of training step.
  1138. * @li momentum: A tensor of type float16 or float32.
  1139. * Specifies the momentum factor.
  1140. * @li stat: A tensor of type float16 or float32.
  1141. * Specifies the status representing the first step or not.
  1142. * @par Attributes:
  1143. * @li dampening: An optional float, specifying the dampening for momentum.
  1144. * Defaults to "0.0".
  1145. * @li weight_decay: An optional float, specifying the L2 penalty. Defaults to
  1146. * "0.0".
  1147. * @li nesterov: An optional bool, specifying whether to enable Nesterov
  1148. * momentum. Defaults to "False".
  1149. * @par Outputs:
  1150. * parameters: A mutable tensor same as input "parameters".
  1151. * @see ApplyMomentum()
  1152. */
  1153. REG_OP(SGD)
  1154. .INPUT(parameters, TensorType(DT_FLOAT, DT_FLOAT16))
  1155. .INPUT(gradient, TensorType(DT_FLOAT, DT_FLOAT16))
  1156. .INPUT(learning_rate, TensorType(DT_FLOAT, DT_FLOAT16))
  1157. .INPUT(accum, TensorType(DT_FLOAT, DT_FLOAT16))
  1158. .INPUT(momentum, TensorType(DT_FLOAT, DT_FLOAT16))
  1159. .INPUT(stat, TensorType(DT_FLOAT, DT_FLOAT16))
  1160. .OUTPUT(parameters, TensorType(DT_FLOAT, DT_FLOAT16))
  1161. .ATTR(dampening, Float, 0.0)
  1162. .ATTR(weight_decay, Float, 0.0)
  1163. .ATTR(nesterov, Bool, false)
  1164. .OP_END_FACTORY_REG(SGD)
  1165. /**
  1166. * @brief Updates "var" according to the RMSProp algorithm.\n
  1167. * mean_square = decay * mean_square + (1-decay) * gradient ** 2\n
  1168. * Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n
  1169. * ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n
  1170. * mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n
  1171. * var <- var - mom\n
  1172. *
  1173. * @par Inputs:
  1174. * @li var: A mutable tensor. Must be one of the data types defined in\n
  1175. * TensorType::NumberType(). Should be from a Variable().
  1176. * @li ms: A mutable tensor. Must have the same type as "var". Should be from a
  1177. * Variable().
  1178. * @li mom: A mutable tensor. Must have the same type as "var". Should be from a
  1179. * Variable().
  1180. * @li lr: A scalar. Must have the same type as "var".
  1181. * @li rho: A scalar. Must have the same type as "var".
  1182. * @li momentum: A scalar. Must have the same type as "var".
  1183. * @li epsilon: A scalar. Must have the same type as "var".
  1184. * @li grad: A tensor, specifying the gradient. Must have the same type as "var".
  1185. *
  1186. * @par Attributes:
  1187. * use_locking: An optional "bool". Defaults to "False". If "True", updating of\n
  1188. * the "var", "ms", and "mom" tensors will be protected by a lock; otherwise the\n
  1189. * behavior is undefined, but may exhibit less contention.
  1190. *
  1191. * @par Outputs:
  1192. * var: A mutable tensor. Has the same type as input "var".
  1193. *
  1194. * @attention Constraints:
  1195. * @li Note that in dense implementation of this algorithm, "ms" and "mom" will \n
  1196. * update even if "grad" is 0, but in this sparse implementation, "ms" and "mom" \n
  1197. * will not update in iterations during which "grad" is 0.
  1198. * @li The input tensors "var", "ms", "mom" and "grad" must have the same shape.
  1199. */
  1200. REG_OP(ApplyRMSProp)
  1201. .INPUT(var, TensorType::NumberType())
  1202. .INPUT(ms, TensorType::NumberType())
  1203. .INPUT(mom, TensorType::NumberType())
  1204. .INPUT(lr, TensorType::NumberType())
  1205. .INPUT(rho, TensorType::NumberType())
  1206. .INPUT(momentum, TensorType::NumberType())
  1207. .INPUT(epsilon, TensorType::NumberType())
  1208. .INPUT(grad, TensorType::NumberType())
  1209. .OUTPUT(var, TensorType::NumberType())
  1210. .ATTR(use_locking, Bool, false)
  1211. .OP_END_FACTORY_REG(ApplyRMSProp)
  1212. /**
  1213. * @brief Updates "var" according to the RMSProp algorithm, a const input will be
  1214. * considered as an attribute.\n
  1215. * mean_square = decay * mean_square + (1-decay) * gradient ** 2\n
  1216. * Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n
  1217. * ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n
  1218. * mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n
  1219. * var <- var - mom
  1220. *
  1221. * @par Inputs:
  1222. * @li var: A mutable tensor. Must be one of the data types defined in\n
  1223. * TensorType::NumberType(). Should be from a Variable().
  1224. * @li ms: A mutable tensor. Must have the same type as "var". Should be from a
  1225. * Variable().
  1226. * @li mom: A mutable tensor. Must have the same type as "var". Should be from a
  1227. * Variable().
  1228. * @li lr: A scalar. Must have the same type as "var".
  1229. * @li grad: A tensor, specifying the gradient. Must have the same type as "var".
  1230. *
  1231. * @par Attributes:
  1232. * @li use_locking: An optional "bool". Defaults to "False". If "True", updating\n
  1233. * of the "var", "ms", and "mom" tensors will be protected by a lock; \n
  1234. * otherwise the behavior is undefined, but may exhibit less contention.
  1235. * @li rho: A required scalar. Must have the same type as "var".
  1236. * @li momentum: A required scalar. Must have the same type as "var".
  1237. * @li epsilon: A required scalar. Must have the same type as "var".
  1238. *
  1239. * @par Outputs:
  1240. * var: A mutable tensor. Must have the same type as input "var".
  1241. *
  1242. * @attention Constraints:
  1243. * @li Note that in dense implementation of this algorithm, "ms" and "mom" will\n
  1244. * update even if "grad" is 0, but in this sparse implementation, "ms" and "mom"\n
  1245. * will not update in iterations during which "grad" is 0.
  1246. * @li The input tensors "var", "ms", "mom" and "grad" must have the same shape.
  1247. */
  1248. REG_OP(ApplyRMSPropD)
  1249. .INPUT(var, TensorType::NumberType())
  1250. .INPUT(ms, TensorType::NumberType())
  1251. .INPUT(mom, TensorType::NumberType())
  1252. .INPUT(lr, TensorType::NumberType())
  1253. .INPUT(grad, TensorType::NumberType())
  1254. .OUTPUT(var, TensorType::NumberType())
  1255. .OUTPUT(ms, TensorType::NumberType())
  1256. .OUTPUT(mom, TensorType::NumberType())
  1257. .REQUIRED_ATTR(rho, Float)
  1258. .REQUIRED_ATTR(momentum, Float)
  1259. .REQUIRED_ATTR(epsilon, Float)
  1260. .ATTR(use_locking, Bool, false)
  1261. .OP_END_FACTORY_REG(ApplyRMSPropD)
  1262. /**
  1263. *@brief Update "var" and "accum" according to FOBOS with Adagrad learning rate.
  1264. *@par Inputs:
  1265. *Six inputs, including:
  1266. * @li var: A mutable Tensor of type TensorType::NumberType().
  1267. * Should be from a Variable().
  1268. * @li accum: A mutable Tensor of the same type as "var". Should be from a Variable().
  1269. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1270. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1271. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1272. * @li grad: A Tensor of the same type as "var", for the gradient.
  1273. *@par Attributes:
  1274. *use_locking: An optional bool. Defaults to "False". If "True", updating of the "var" and "accum" *tensors will be protected by a lock; otherwise the behavior is undefined, but may exhibit less *contention.
  1275. *@par Outputs:
  1276. * @li var: A mutable tensor. Must have the same type as input "var".
  1277. * @li ms: A mutable tensor. Must have the same type as input "ms".
  1278. * @li mom: A mutable tensor. Must have the same type as input "mom".
  1279. */
  1280. REG_OP(ApplyProximalAdagrad)
  1281. .INPUT(var, TensorType::NumberType())
  1282. .INPUT(accum, TensorType::NumberType())
  1283. .INPUT(lr, TensorType::NumberType())
  1284. .INPUT(l1, TensorType::NumberType())
  1285. .INPUT(l2, TensorType::NumberType())
  1286. .INPUT(grad, TensorType::NumberType())
  1287. .OUTPUT(var, TensorType::NumberType())
  1288. .ATTR(use_locking, Bool, false)
  1289. .OP_END_FACTORY_REG(ApplyProximalAdagrad)
  1290. /**
  1291. *@brief Update "var" and "accum" according to FOBOS with Adagrad learning rate.
  1292. *@par Inputs:
  1293. *Six inputs, including:
  1294. * @li var: A mutable Tensor of type TensorType::NumberType().
  1295. * Should be from a Variable().
  1296. * @li accum: A mutable Tensor of the same type as "var". Should be from a Variable().
  1297. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1298. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1299. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1300. * @li grad: A Tensor of the same type as "var", for the gradient.
  1301. *@par Attributes:
  1302. *use_locking: An optional bool. Defaults to "False". If "True", updating of the "var" and "accum" *tensors will be protected by a lock; otherwise the behavior is undefined, but may exhibit less *contention.
  1303. *@par Outputs:
  1304. * @li var: A mutable Tensor. Has the same type as "var".
  1305. * @li accum: A mutable Tensor. Has the same type as "var".
  1306. */
  1307. REG_OP(ApplyProximalAdagradD)
  1308. .INPUT(var, TensorType::NumberType())
  1309. .INPUT(accum, TensorType::NumberType())
  1310. .INPUT(lr, TensorType::NumberType())
  1311. .INPUT(l1, TensorType::NumberType())
  1312. .INPUT(l2, TensorType::NumberType())
  1313. .INPUT(grad, TensorType::NumberType())
  1314. .OUTPUT(var, TensorType::NumberType())
  1315. .OUTPUT(accum, TensorType::NumberType())
  1316. .ATTR(use_locking, Bool, false)
  1317. .OP_END_FACTORY_REG(ApplyProximalAdagradD)
  1318. /**
  1319. *@brief Updates entries in 'var' and 'accum' according to the Proximal Adagrad algorithm.\ n
  1320. * Compared with op ApplyProximalAdagrad, an additional index tensor is input,
  1321. * Only the indices into the first dimensions of "var" and "accum" are updated.
  1322. *@par Inputs:
  1323. * Seven inputs, including:\n
  1324. * @li var: A mutable Tensor.\n
  1325. * TensorType::NumberType(). Should be a Variable Tensor.
  1326. * @li accum: A mutable Tensor of the same type as "var".\n
  1327. * Should be a Variable Tensor.
  1328. * @li lr: A Tensor of the same type as "var".\n
  1329. * Scaling factor. Must be a scalar.
  1330. * @li l1: A Tensor of the same type as "var".\n
  1331. * L1 regulariation. Must be a scalar.
  1332. * @li l2: A Tensor of the same type as "var".\n
  1333. * L2 regulariation. Must be a scalar.
  1334. * @li grad: A Tensor. Has the same type as "var". \n
  1335. * The gradient.
  1336. * @li indices: A vector of indices into the first dimension of "var" and "accum".\n
  1337. * TensorType::IndexNumberType().
  1338. *@par Attributes:
  1339. *use_locking: An optional bool. Defaults to "False".\n
  1340. * If "True", updating of the var and accum tensors will be protected by a lock; \n
  1341. * If "False", the behavior is undefined, but may exhibit less contention.
  1342. *@par Outputs:
  1343. *var: A mutable Tensor. Has the same type as "var".
  1344. */
  1345. REG_OP(SparseApplyProximalAdagrad)
  1346. .INPUT(var, TensorType::NumberType())
  1347. .INPUT(accum, TensorType::NumberType())
  1348. .INPUT(lr, TensorType::NumberType())
  1349. .INPUT(l1, TensorType::NumberType())
  1350. .INPUT(l2, TensorType::NumberType())
  1351. .INPUT(grad, TensorType::NumberType())
  1352. .INPUT(indices, TensorType::IndexNumberType())
  1353. .OUTPUT(var, TensorType::NumberType())
  1354. .ATTR(use_locking, Bool, false)
  1355. .OP_END_FACTORY_REG(SparseApplyProximalAdagrad)
  1356. /**
  1357. *@brief Updates entries in 'var' and 'accum' according to the Proximal Adagrad algorithm.\ n
  1358. * Compared with op ApplyProximalAdagrad, an additional index tensor is input,
  1359. * Only the indices into the first dimensions of "var" and "accum" are updated.
  1360. *@par Inputs:
  1361. * Seven inputs, including:\n
  1362. * @li var: A mutable Tensor.\n
  1363. * TensorType::NumberType(). Should be a Variable Tensor.
  1364. * @li accum: A mutable Tensor of the same type as "var".\n
  1365. * Should be a Variable Tensor.
  1366. * @li lr: A Tensor of the same type as "var".\n
  1367. * Scaling factor. Must be a scalar.
  1368. * @li l1: A Tensor of the same type as "var".\n
  1369. * L1 regulariation. Must be a scalar.
  1370. * @li l2: A Tensor of the same type as "var".\n
  1371. * L2 regulariation. Must be a scalar.
  1372. * @li grad: A Tensor. Has the same type as "var". \n
  1373. * The gradient.
  1374. * @li indices: A vector of indices into the first dimension of "var" and "accum".\n
  1375. * TensorType::IndexNumberType().
  1376. *@par Attributes:
  1377. *use_locking: An optional bool. Defaults to "False".\n
  1378. * If "True", updating of the var and accum tensors will be protected by a lock; \n
  1379. * If "False", the behavior is undefined, but may exhibit less contention.
  1380. *@par Outputs:
  1381. *@li var: A mutable Tensor. Has the same type as "var".
  1382. *@li accum: A mutable Tensor. Has the same type as "var".
  1383. */
  1384. REG_OP(SparseApplyProximalAdagradD)
  1385. .INPUT(var, TensorType::NumberType())
  1386. .INPUT(accum, TensorType::NumberType())
  1387. .INPUT(lr, TensorType::NumberType())
  1388. .INPUT(l1, TensorType::NumberType())
  1389. .INPUT(l2, TensorType::NumberType())
  1390. .INPUT(grad, TensorType::NumberType())
  1391. .INPUT(indices, TensorType::IndexNumberType())
  1392. .OUTPUT(var, TensorType::NumberType())
  1393. .OUTPUT(accum, TensorType::NumberType())
  1394. .ATTR(use_locking, Bool, false)
  1395. .OP_END_FACTORY_REG(SparseApplyProximalAdagradD)
  1396. /**
  1397. *@brief Updates "var" according to the Ftrl-proximal scheme.
  1398. *@par Inputs:
  1399. *Eight inputs, including:
  1400. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1401. * Should be a Variable Tensor.
  1402. * @li accum: A mutable Tensor of the same type as "var".
  1403. * Should be a Variable Tensor.
  1404. * @li linear: A mutable Tensor of the same type as "var".
  1405. * Should be a Variable Tensor.
  1406. * @li grad: A Tensor of the same type as "var", for the gradient.
  1407. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1408. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1409. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1410. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1411. *@par Attributes:
  1412. *use_locking: An optional bool. Defaults to "False".
  1413. * If "True", updating of the "var" and "accum" tensors will be
  1414. * protected by a lock; otherwise the behavior is undefined,
  1415. * but may exhibit less contention.
  1416. *@par Outputs:
  1417. *var: A mutable Tensor. Has the same type as "var".
  1418. */
  1419. REG_OP(ApplyFtrl)
  1420. .INPUT(var, TensorType::NumberType())
  1421. .INPUT(accum, TensorType::NumberType())
  1422. .INPUT(linear, TensorType::NumberType())
  1423. .INPUT(grad, TensorType::NumberType())
  1424. .INPUT(lr, TensorType::NumberType())
  1425. .INPUT(l1, TensorType::NumberType())
  1426. .INPUT(l2, TensorType::NumberType())
  1427. .INPUT(lr_power, TensorType::NumberType())
  1428. .OUTPUT(var, TensorType::NumberType())
  1429. .ATTR(use_locking, Bool, false)
  1430. .OP_END_FACTORY_REG(ApplyFtrl)
  1431. /**
  1432. *@brief Updates "var" according to the Ftrl-proximal scheme.
  1433. *@par Inputs:
  1434. *Eight inputs, including:
  1435. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1436. * Should be a Variable Tensor.
  1437. * @li accum: A mutable Tensor of the same type as "var".
  1438. * Should be a Variable Tensor.
  1439. * @li linear: A mutable Tensor of the same type as "var".
  1440. * Should be a Variable Tensor.
  1441. * @li grad: A Tensor of the same type as "var", for the gradient.
  1442. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1443. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1444. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1445. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1446. *@par Attributes:
  1447. *use_locking: An optional bool. Defaults to "False".
  1448. * If "True", updating of the "var" and "accum" tensors will be
  1449. * protected by a lock; otherwise the behavior is undefined,
  1450. * but may exhibit less contention.
  1451. *@par Outputs:
  1452. *@li var: A mutable Tensor. Has the same type as "var".
  1453. *@li accum: A mutable Tensor. Has the same type as "accum".
  1454. *@li linear: A mutable Tensor. Has the same type as "linear".
  1455. */
  1456. REG_OP(ApplyFtrlD)
  1457. .INPUT(var, TensorType::NumberType())
  1458. .INPUT(accum, TensorType::NumberType())
  1459. .INPUT(linear, TensorType::NumberType())
  1460. .INPUT(grad, TensorType::NumberType())
  1461. .INPUT(lr, TensorType::NumberType())
  1462. .INPUT(l1, TensorType::NumberType())
  1463. .INPUT(l2, TensorType::NumberType())
  1464. .INPUT(lr_power, TensorType::NumberType())
  1465. .OUTPUT(var, TensorType::NumberType())
  1466. .OUTPUT(accum, TensorType::NumberType())
  1467. .OUTPUT(linear, TensorType::NumberType())
  1468. .ATTR(use_locking, Bool, false)
  1469. .OP_END_FACTORY_REG(ApplyFtrlD)
  1470. /**
  1471. *@brief Update "var" according to the Ftrl-proximal scheme.
  1472. *@par Inputs:
  1473. *Nine inputs, including:
  1474. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1475. * Should be a Variable Tensor.
  1476. * @li accum: A mutable Tensor of the same type as "var".
  1477. * Should be a Variable Tensor.
  1478. * @li linear: A mutable Tensor of the same type as "var".
  1479. * Should be a Variable Tensor.
  1480. * @li grad: A Tensor of the same type as "var", for the gradient.
  1481. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1482. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1483. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1484. * @li l2_shrinkage: A Tensor of the same type as "var".
  1485. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1486. *@par Attributes:
  1487. *use_locking: An optional bool. Defaults to "False".
  1488. * If "True", updating of the "var" and "accum" tensors will be
  1489. * protected by a lock; otherwise the behavior is undefined,
  1490. * but may exhibit less contention.
  1491. *@par Outputs:
  1492. *var: A mutable Tensor. Has the same type as "var".
  1493. */
  1494. REG_OP(ApplyFtrlV2)
  1495. .INPUT(var, TensorType::NumberType())
  1496. .INPUT(accum, TensorType::NumberType())
  1497. .INPUT(linear, TensorType::NumberType())
  1498. .INPUT(grad, TensorType::NumberType())
  1499. .INPUT(lr, TensorType::NumberType())
  1500. .INPUT(l1, TensorType::NumberType())
  1501. .INPUT(l2, TensorType::NumberType())
  1502. .INPUT(l2_shrinkage, TensorType::NumberType())
  1503. .INPUT(lr_power, TensorType::NumberType())
  1504. .OUTPUT(var, TensorType::NumberType())
  1505. .ATTR(use_locking, Bool, false)
  1506. .OP_END_FACTORY_REG(ApplyFtrlV2)
  1507. /**
  1508. *@brief Update "var" according to the Ftrl-proximal scheme.
  1509. *@par Inputs:
  1510. *Nine inputs, including:
  1511. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1512. * Should be a Variable Tensor.
  1513. * @li accum: A mutable Tensor of the same type as "var".
  1514. * Should be a Variable Tensor.
  1515. * @li linear: A mutable Tensor of the same type as "var".
  1516. * Should be a Variable Tensor.
  1517. * @li grad: A Tensor of the same type as "var", for the gradient.
  1518. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1519. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1520. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1521. * @li l2_shrinkage: A Tensor of the same type as "var".
  1522. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1523. *@par Attributes:
  1524. *use_locking: An optional bool. Defaults to "False".
  1525. * If "True", updating of the "var" and "accum" tensors will be
  1526. * protected by a lock; otherwise the behavior is undefined,
  1527. * but may exhibit less contention.
  1528. *@par Outputs:
  1529. *var: A mutable Tensor. Has the same type as "var".
  1530. *accum: A mutable Tensor. Has the same type as "accum".
  1531. *linear: A mutable Tensor. Has the same type as "linear".
  1532. */
  1533. REG_OP(ApplyFtrlV2D)
  1534. .INPUT(var, TensorType::NumberType())
  1535. .INPUT(accum, TensorType::NumberType())
  1536. .INPUT(linear, TensorType::NumberType())
  1537. .INPUT(grad, TensorType::NumberType())
  1538. .INPUT(lr, TensorType::NumberType())
  1539. .INPUT(l1, TensorType::NumberType())
  1540. .INPUT(l2, TensorType::NumberType())
  1541. .INPUT(l2_shrinkage, TensorType::NumberType())
  1542. .INPUT(lr_power, TensorType::NumberType())
  1543. .OUTPUT(var, TensorType::NumberType())
  1544. .OUTPUT(accum, TensorType::NumberType())
  1545. .OUTPUT(linear, TensorType::NumberType())
  1546. .ATTR(use_locking, Bool, false)
  1547. .OP_END_FACTORY_REG(ApplyFtrlV2D)
  1548. /**
  1549. *@brief Updates "var" according to the Adam algorithm.\n
  1550. * lr_t <- text{learning\_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)\n
  1551. * m_t <- beta_1 * m_{t-1} + (1 - beta_1) * g\n
  1552. * v_t <- max(beta2 * v{t-1}, abs(g))\n
  1553. * variable <- variable - lr_t * m_t / (sqrt{v_t} + epsilon)
  1554. *
  1555. *@attention Constraints:\n
  1556. * *The input tensors must have the same shape.*
  1557. *
  1558. *@par Inputs:
  1559. *@li var: A mutable Tensor of the type TensorType::NumberType().
  1560. * Should be from a Variable().
  1561. *@li m: A mutable Tensor of the same type as "var".
  1562. * Should be from a Variable().
  1563. *@li v: A mutable Tensor of the same type as "var".
  1564. * Should be from a Variable().
  1565. *@li beta1_power: A scalar of the same type as "var".
  1566. *@li beta2_power: A scalar of the same type as "var".
  1567. *@li lr: learning_rate. A scalar of the same type as "var".
  1568. *@li beta1: A scalar of the same type as "var".
  1569. *@li beta2: A scalar of the same type as "var".
  1570. *@li epsilon: A scalar of the same type as "var".
  1571. *@li grad: A Tensor of the same type as "var", for the gradient.
  1572. *
  1573. *@par Attributes:\n
  1574. *@li use_locking: An optional bool. Defaults to "False".
  1575. * If "True", updating of the "var", m", and "v" tensors will be protected
  1576. * by a lock; otherwise the behavior is undefined, but may exhibit less
  1577. * contention.
  1578. *@li use_nesterov: An optional bool. Defaults to "False".
  1579. If "True", uses the nesterov update.
  1580. *
  1581. *@par Outputs:
  1582. * var: A mutable Tensor. Has the same type as intput "var".
  1583. */
  1584. REG_OP(ApplyAdam)
  1585. .INPUT(var, TensorType::NumberType())
  1586. .INPUT(m, TensorType::NumberType())
  1587. .INPUT(v, TensorType::NumberType())
  1588. .INPUT(beta1_power, TensorType::NumberType())
  1589. .INPUT(beta2_power, TensorType::NumberType())
  1590. .INPUT(lr, TensorType::NumberType())
  1591. .INPUT(beta1, TensorType::NumberType())
  1592. .INPUT(beta2, TensorType::NumberType())
  1593. .INPUT(epsilon, TensorType::NumberType())
  1594. .INPUT(grad, TensorType::NumberType())
  1595. .OUTPUT(var, TensorType::NumberType())
  1596. .ATTR(use_locking, Bool, false)
  1597. .ATTR(use_nesterov, Bool, false)
  1598. .OP_END_FACTORY_REG(ApplyAdam)
  1599. /**
  1600. *@brief Updates "var" according to the Adam algorithm.\n
  1601. * lr_t <- text{learning\_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)\n
  1602. * m_t <- beta_1 * m_{t-1} + (1 - beta_1) * g\n
  1603. * v_t <- max(beta2 * v{t-1}, abs(g))\n
  1604. * variable <- variable - lr_t * m_t / (sqrt{v_t} + epsilon)
  1605. *
  1606. *@attention Constraints:\n
  1607. * *The input tensors must have the same shape.*
  1608. *
  1609. *@par Inputs:
  1610. *@li var: A mutable Tensor of the type TensorType::NumberType().
  1611. * Should be from a Variable().
  1612. *@li m: A mutable Tensor of the same type as "var".
  1613. * Should be from a Variable().
  1614. *@li v: A mutable Tensor of the same type as "var".
  1615. * Should be from a Variable().
  1616. *@li beta1_power: A scalar of the same type as "var".
  1617. *@li beta2_power: A scalar of the same type as "var".
  1618. *@li lr: learning_rate. A scalar of the same type as "var".
  1619. *@li beta1: A scalar of the same type as "var".
  1620. *@li beta2: A scalar of the same type as "var".
  1621. *@li epsilon: A scalar of the same type as "var".
  1622. *@li grad: A Tensor of the same type as "var", for the gradient.
  1623. *
  1624. *@par Attributes:\n
  1625. *@li use_locking: An optional bool. Defaults to "False".
  1626. * If "True", updating of the "var", m", and "v" tensors will be protected
  1627. * by a lock; otherwise the behavior is undefined, but may exhibit less
  1628. * contention.
  1629. *@li use_nesterov: An optional bool. Defaults to "False".
  1630. If "True", uses the nesterov update.
  1631. *
  1632. *@par Outputs:
  1633. *@li var: A mutable tensor. Has the same type as input "var".
  1634. *@li m: A mutable tensor. Has the same type as input "m".
  1635. *@li v: A mutable tensor. Has the same type as input "v".
  1636. */
  1637. REG_OP(ApplyAdamD)
  1638. .INPUT(var, TensorType::NumberType())
  1639. .INPUT(m, TensorType::NumberType())
  1640. .INPUT(v, TensorType::NumberType())
  1641. .INPUT(beta1_power, TensorType::NumberType())
  1642. .INPUT(beta2_power, TensorType::NumberType())
  1643. .INPUT(lr, TensorType::NumberType())
  1644. .INPUT(beta1, TensorType::NumberType())
  1645. .INPUT(beta2, TensorType::NumberType())
  1646. .INPUT(epsilon, TensorType::NumberType())
  1647. .INPUT(grad, TensorType::NumberType())
  1648. .OUTPUT(var, TensorType::NumberType())
  1649. .OUTPUT(m, TensorType::NumberType())
  1650. .OUTPUT(v, TensorType::NumberType())
  1651. .ATTR(use_locking, Bool, false)
  1652. .ATTR(use_nesterov, Bool, false)
  1653. .OP_END_FACTORY_REG(ApplyAdamD)
  1654. /**
  1655. *@brief Updates "var" according to the proximal adadelta scheme.
  1656. *@par Inputs:
  1657. *Seven inputs, including:
  1658. * @li var: A mutable Tensor of type TensorType::NumberType().
  1659. * Should be a Variable Tensor.
  1660. * @li accum: A mutable Tensor of the same type as "var".
  1661. * Should be a Variable Tensor.
  1662. * @li accum_update: A mutable Tensor of the same type as "var".
  1663. * Should be a Variable Tensor.
  1664. * @li lr: A scalar of the same type as "var", for the scaling factor.
  1665. * @li rho: A scalar of the same type as "var", for the decay factor.
  1666. * @li epsilon: A scalar of the same type as "var", for the constant factor.
  1667. * @li grad: A Tensor of the same type as "var", for the gradient.
  1668. *@par Attributes:
  1669. *use_locking: An optional bool. Defaults to "False".
  1670. * If "True", updating of the "var", "accum" and "accum_update" tensors will be
  1671. * protected by a lock; otherwise the behavior is undefined,
  1672. * but may exhibit less contention.
  1673. *@par Outputs:
  1674. *var: A mutable Tensor. Has the same type as "var".
  1675. */
  1676. REG_OP(ApplyAdadelta)
  1677. .INPUT(var, TensorType::NumberType())
  1678. .INPUT(accum, TensorType::NumberType())
  1679. .INPUT(accum_update, TensorType::NumberType())
  1680. .INPUT(lr, TensorType::NumberType())
  1681. .INPUT(rho, TensorType::NumberType())
  1682. .INPUT(epsilon, TensorType::NumberType())
  1683. .INPUT(grad, TensorType::NumberType())
  1684. .OUTPUT(var, TensorType::NumberType())
  1685. .ATTR(use_locking, Bool, false)
  1686. .OP_END_FACTORY_REG(ApplyAdadelta)
  1687. /**
  1688. *@brief Updates "var" according to the proximal adadelta scheme.
  1689. *@par Inputs:
  1690. *Seven inputs, including:
  1691. * @li var: A mutable Tensor of type TensorType::NumberType().
  1692. * Should be a Variable Tensor.
  1693. * @li accum: A mutable Tensor of the same type as "var".
  1694. * Should be a Variable Tensor.
  1695. * @li accum_update: A mutable Tensor of the same type as "var".
  1696. * Should be a Variable Tensor.
  1697. * @li lr: A scalar of the same type as "var", for the scaling factor.
  1698. * @li rho: A scalar of the same type as "var", for the decay factor.
  1699. * @li epsilon: A scalar of the same type as "var", for the constant factor.
  1700. * @li grad: A Tensor of the same type as "var", for the gradient.
  1701. *@par Attributes:
  1702. *use_locking: An optional bool. Defaults to "False".
  1703. * If "True", updating of the "var", "accum" and "accum_update" tensors will be
  1704. * protected by a lock; otherwise the behavior is undefined,
  1705. * but may exhibit less contention.
  1706. *@par Outputs:
  1707. *@li var: A mutable Tensor. Has the same type as "var".
  1708. *@li accum: A mutable Tensor. Has the same type as "var".
  1709. *@li accum_update: A mutable Tensor. Has the same type as "var".
  1710. */
  1711. REG_OP(ApplyAdadeltaD)
  1712. .INPUT(var, TensorType::NumberType())
  1713. .INPUT(accum, TensorType::NumberType())
  1714. .INPUT(accum_update, TensorType::NumberType())
  1715. .INPUT(lr, TensorType::NumberType())
  1716. .INPUT(rho, TensorType::NumberType())
  1717. .INPUT(epsilon, TensorType::NumberType())
  1718. .INPUT(grad, TensorType::NumberType())
  1719. .OUTPUT(var, TensorType::NumberType())
  1720. .OUTPUT(accum, TensorType::NumberType())
  1721. .OUTPUT(accum_update, TensorType::NumberType())
  1722. .ATTR(use_locking, Bool, false)
  1723. .OP_END_FACTORY_REG(ApplyAdadeltaD)
  1724. /**
  1725. * @brief Updates "var" according to the ApplyMomentum algorithm. \n
  1726. * accum = accum * momentum + x1 * x2 \n
  1727. * if use_nesterov is True: \n
  1728. * var -= x1 * x2 * lr + accum * momentum * lr \n
  1729. * else:\n
  1730. * var -= accum * lr
  1731. *
  1732. * @par Inputs:
  1733. * Six inputs, including:
  1734. * @li var: A mutable Tensor has type TensorType::NumberType().
  1735. * Should be a Variable Tensor.
  1736. * @li accum: A mutable Tensor has the same type as "var".
  1737. * Should be a Variable Tensor.
  1738. * @li lr: A scalar has the same type as "var", for the scaling factor.
  1739. * @li x1: A Tensor has type TensorType::NumberType().
  1740. * @li momentum: A scalar has the same type as "var".
  1741. * @li x2: A scalar has the same type as "var".
  1742. *
  1743. * @par Attributes:
  1744. * Two attributes, including:
  1745. * @li use_nesterov: An optional bool. Defaults to "False". \n
  1746. * If True, the tensor passed to compute grad will be var - lr * momentum * accum, \n
  1747. * so in the end, the var you get is actually var - lr * momentum * accum.
  1748. * @li use_locking: An optional bool. Defaults to "False". \n
  1749. * If "True", updating of the "var", m", and "v" tensors will be protected \n
  1750. * by a lock; otherwise the behavior is undefined, but may exhibit less contention.
  1751. *
  1752. * @par Outputs:
  1753. * Two outputs, including:
  1754. * @li var: A mutable Tensor has the same type as "var".
  1755. * @li accum: A mutable Tensor has the same type as "var".
  1756. */
  1757. REG_OP(FusedMulApplyMomentum)
  1758. .INPUT(var, TensorType::NumberType())
  1759. .INPUT(accum, TensorType::NumberType())
  1760. .INPUT(lr, TensorType::NumberType())
  1761. .INPUT(x1, TensorType::NumberType())
  1762. .INPUT(momentum, TensorType::NumberType())
  1763. .INPUT(x2, TensorType::NumberType())
  1764. .OUTPUT(var, TensorType::NumberType())
  1765. .OUTPUT(accum, TensorType::NumberType())
  1766. .ATTR(use_nesterov, Bool, false)
  1767. .ATTR(use_locking, Bool, false)
  1768. .OP_END_FACTORY_REG(FusedMulApplyMomentum)
  1769. /**
  1770. * @brief Updates "var" according to the ApplyMomentum algorithm. \n
  1771. * accum = accum * momentum + x1 * x2 \n
  1772. * if use_nesterov is True: \n
  1773. * var -= x1 * x2 * lr + accum * momentum * lr \n
  1774. * else: \n
  1775. * var -= accum * lr
  1776. *
  1777. * @par Inputs:
  1778. * Seven inputs, including:
  1779. * @li var: A mutable Tensor of type float32.
  1780. * Should be a Variable Tensor.
  1781. * @li accum: A mutable Tensor has type TensorType::NumberType().
  1782. * Should be a Variable Tensor.
  1783. * @li lr: A scalar has the same type as "accum", for the scaling factor.
  1784. * @li x1: A Tensor has the same type as "accum".
  1785. * @li momentum: A scalar has the same type as "accum".
  1786. * @li x2: A scalar has the same type as "accum".
  1787. * @li var_copy: A Tensor has type float16.
  1788. *
  1789. * @par Attributes:
  1790. * Two Attributes, including:
  1791. * @li use_nesterov: An optional bool. Defaults to "False". \n
  1792. * If True, the tensor passed to compute grad will be var - lr * momentum * accum, \n
  1793. * so in the end, the var you get is actually var - lr * momentum * accum.
  1794. * @li use_locking: An optional bool. Defaults to "False". \n
  1795. * If "True", updating of the "var", m", and "v" tensors will be protected \n
  1796. * by a lock; otherwise the behavior is undefined, but may exhibit less contention.
  1797. *
  1798. * @par Outputs:
  1799. * Three outputs, including:
  1800. * @li var: A Tensor has the type float32.
  1801. * @li var_copy: A Tensor has the type float16.
  1802. * @li accum: A Tensor has the same type as input "accum".
  1803. */
  1804. REG_OP(FusedMulApplyMomentumExtern)
  1805. .INPUT(var, TensorType(DT_FLOAT))
  1806. .INPUT(accum, TensorType::NumberType())
  1807. .INPUT(lr, TensorType::NumberType())
  1808. .INPUT(x1, TensorType::NumberType())
  1809. .INPUT(momentum, TensorType::NumberType())
  1810. .INPUT(x2, TensorType::NumberType())
  1811. .INPUT(var_copy, TensorType(DT_FLOAT16))
  1812. .OUTPUT(var, TensorType(DT_FLOAT))
  1813. .OUTPUT(var_copy, TensorType(DT_FLOAT16))
  1814. .OUTPUT(accum, TensorType::NumberType())
  1815. .ATTR(use_nesterov, Bool, false)
  1816. .ATTR(use_locking, Bool, false)
  1817. .OP_END_FACTORY_REG(FusedMulApplyMomentumExtern)
  1818. /**
  1819. *@brief Update "g" according to the LARS algorithm.
  1820. *@par Inputs:
  1821. *Four inputs, including:
  1822. * @li w: A Tensor. Must be of type TensorType::DT_FLOAT.
  1823. * @li g: A Tensor of the same type and shape as "w".
  1824. * @li weight_decay: A Tensor of the same type as "w", Must be a scalar.
  1825. * @li learning_rate: A Tensor of the same type as "w", Must be a scalar.
  1826. *@par Attributes:
  1827. *Three Attributes, including:
  1828. * @li hyperpara: An optional float. Default value is 0.001.
  1829. * @li epsilon: An optional float. Default value is 1e-5.Avoid denominator is 0.
  1830. * @li use_clip: An optional bool. Defaults to "False".\n
  1831. * If "True", updating learning rate.
  1832. *@par Outputs:
  1833. *g_new: Tensor of the same type as "w".
  1834. */
  1835. REG_OP(LarsV2)
  1836. .INPUT(w, TensorType(DT_FLOAT))
  1837. .INPUT(g, TensorType(DT_FLOAT))
  1838. .INPUT(weight_decay, TensorType(DT_FLOAT))
  1839. .INPUT(learning_rate, TensorType(DT_FLOAT))
  1840. .OUTPUT(g_new, TensorType(DT_FLOAT))
  1841. .ATTR(hyperpara, Float, 0.001)
  1842. .ATTR(epsilon, Float, 0.00001)
  1843. .ATTR(use_clip, Bool, false)
  1844. .OP_END_FACTORY_REG(LarsV2)
  1845. /**
  1846. *@brief Update "g" according to the LARS algorithm.
  1847. *@par Inputs:
  1848. *Six inputs, including:
  1849. * @li w: A Tensor. Must be of type TensorType::DT_FLOAT.
  1850. * @li g: A Tensor of the same type and shape as "w".
  1851. * @li w_square_sum: A Tensor of square_sum(w), has the same type as "w", Must be a scalar.
  1852. * @li g_square_sum: A Tensor of square(g), has the same type as "w", Must be a scalar.
  1853. * @li weight_decay: A Tensor of the same type as "w", Must be a scalar.
  1854. * @li learning_rate: A Tensor of the same type as "w", Must be a scalar.
  1855. *@par Attributes:
  1856. *Three Attributes, including:
  1857. * @li hyperpara: An optional float. Default value is 0.001.
  1858. * @li epsilon: An optional float. Default value is 1e-5.Avoid denominator is 0.
  1859. * @li use_clip: An optional bool. Defaults to "False".\n
  1860. * If "True", updating learning rate.
  1861. *@par Outputs:
  1862. *g_new: Tensor of the same type as "w".
  1863. */
  1864. REG_OP(LarsV2Update)
  1865. .INPUT(w, TensorType(DT_FLOAT))
  1866. .INPUT(g, TensorType(DT_FLOAT))
  1867. .INPUT(w_square_sum, TensorType(DT_FLOAT))
  1868. .INPUT(g_square_sum, TensorType(DT_FLOAT))
  1869. .INPUT(weight_decay, TensorType(DT_FLOAT))
  1870. .INPUT(learning_rate, TensorType(DT_FLOAT))
  1871. .OUTPUT(g_new, TensorType(DT_FLOAT))
  1872. .ATTR(hyperpara, Float, 0.001)
  1873. .ATTR(epsilon, Float, 0.00001)
  1874. .ATTR(use_clip, Bool, false)
  1875. .OP_END_FACTORY_REG(LarsV2Update)
  1876. /**
  1877. * @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme.
  1878. * @par Inputs:
  1879. * Nine inputs, including:
  1880. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1881. * Should be a Variable Tensor.
  1882. * @li accum: A mutable Tensor of the same type as "var".
  1883. * Should be a Variable Tensor.
  1884. * @li linear: A mutable Tensor of the same type as "var".
  1885. * Should be a Variable Tensor.
  1886. * @li grad: A Tensor of the same type as "var", for the gradient.
  1887. * @li indices: A vector of indices into the first dimension of var and accum.
  1888. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1889. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1890. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1891. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1892. * @par Attributes:
  1893. * use_locking: An optional bool. Defaults to "False".
  1894. * If "True", updating of the "var" and "accum" tensors will be
  1895. * protected by a lock; otherwise the behavior is undefined,
  1896. * but may exhibit less contention.
  1897. * @par Outputs:
  1898. * var: A Tensor. Has the same type and format as input "var".
  1899. */
  1900. REG_OP(SparseApplyFtrl)
  1901. .INPUT(var, TensorType({DT_FLOAT}))
  1902. .INPUT(accum, TensorType({DT_FLOAT}))
  1903. .INPUT(linear, TensorType({DT_FLOAT}))
  1904. .INPUT(grad, TensorType({DT_FLOAT}))
  1905. .INPUT(indices, TensorType({DT_INT32}))
  1906. .INPUT(lr, TensorType({DT_FLOAT}))
  1907. .INPUT(l1, TensorType({DT_FLOAT}))
  1908. .INPUT(l2, TensorType({DT_FLOAT}))
  1909. .INPUT(lr_power, TensorType({DT_FLOAT}))
  1910. .OUTPUT(var, TensorType({DT_FLOAT}))
  1911. .ATTR(use_locking, Bool, false)
  1912. .OP_END_FACTORY_REG(SparseApplyFtrl)
  1913. /**
  1914. * @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme.
  1915. * @par Inputs:
  1916. * Five inputs, including:
  1917. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1918. * Should be a Variable Tensor.
  1919. * @li accum: A mutable Tensor of the same type as "var".
  1920. * Should be a Variable Tensor.
  1921. * @li linear: A mutable Tensor of the same type as "var".
  1922. * Should be a Variable Tensor.
  1923. * @li grad: A Tensor of the same type as "var", for the gradient.
  1924. * @li indices: A vector of indices into the first dimension of var and accum.
  1925. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1926. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1927. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1928. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1929. * @par Attributes:
  1930. * use_locking: An optional bool. Defaults to "False".
  1931. * If "True", updating of the "var" and "accum" tensors will be
  1932. * protected by a lock; otherwise the behavior is undefined,
  1933. * but may exhibit less contention.
  1934. * @par Outputs:
  1935. * @li var: A Tensor. Has the same type and format as input "var".
  1936. * @li accum: A Tensor. Has the same type and format as input "accum".
  1937. * @li linear: A Tensor. Has the same type and format as input "linear".
  1938. */
  1939. REG_OP(SparseApplyFtrlD)
  1940. .INPUT(var, TensorType({DT_FLOAT}))
  1941. .INPUT(accum, TensorType({DT_FLOAT}))
  1942. .INPUT(linear, TensorType({DT_FLOAT}))
  1943. .INPUT(grad, TensorType({DT_FLOAT}))
  1944. .INPUT(indices, TensorType({DT_INT32}))
  1945. .OUTPUT(var, TensorType({DT_FLOAT}))
  1946. .OUTPUT(accum, TensorType({DT_FLOAT}))
  1947. .OUTPUT(linear, TensorType({DT_FLOAT}))
  1948. .REQUIRED_ATTR(lr, Float)
  1949. .REQUIRED_ATTR(l1, Float)
  1950. .REQUIRED_ATTR(l2, Float)
  1951. .REQUIRED_ATTR(lr_power, Float)
  1952. .ATTR(use_locking, Bool, false)
  1953. .OP_END_FACTORY_REG(SparseApplyFtrlD)
  1954. /**
  1955. * @brief Updates relevant entries in '*var' according to the Ftrl-proximal scheme.
  1956. * That is for rows we have grad for, "var", "accum" and "linear" are updated.
  1957. * @par Inputs:
  1958. * Ten inputs, including:
  1959. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1960. * Should be a Variable Tensor.
  1961. * @li accum: A mutable Tensor of the same type as "var".
  1962. * Should be a Variable Tensor.
  1963. * @li linear: A mutable Tensor of the same type as "var".
  1964. * Should be a Variable Tensor.
  1965. * @li grad: A Tensor of the same type as "var", for the gradient.
  1966. * @li indices: A vector of indices into the first dimension of "var" and "accum".
  1967. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1968. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1969. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1970. * @li l2_shrinkage: A Tensor of the same type as "var", L2 shrinkage regulariation. Must be a scalar.
  1971. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1972. * @par Attributes:
  1973. * use_locking: An optional bool. Defaults to "False".
  1974. * If "True", updating of the "var" and "accum" tensors will be
  1975. * protected by a lock; otherwise the behavior is undefined,
  1976. * but may exhibit less contention.
  1977. * @par Outputs:
  1978. * var: A Tensor. Has the same type and format as input "var".
  1979. */
  1980. REG_OP(SparseApplyFtrlV2)
  1981. .INPUT(var, TensorType({DT_FLOAT}))
  1982. .INPUT(accum, TensorType({DT_FLOAT}))
  1983. .INPUT(linear, TensorType({DT_FLOAT}))
  1984. .INPUT(grad, TensorType({DT_FLOAT}))
  1985. .INPUT(indices, TensorType({DT_INT32}))
  1986. .INPUT(lr, TensorType({DT_FLOAT}))
  1987. .INPUT(l1, TensorType({DT_FLOAT}))
  1988. .INPUT(l2, TensorType({DT_FLOAT}))
  1989. .INPUT(l2_shrinkage, TensorType({DT_FLOAT}))
  1990. .INPUT(lr_power, TensorType({DT_FLOAT}))
  1991. .OUTPUT(var, TensorType({DT_FLOAT}))
  1992. .ATTR(use_locking, Bool, false)
  1993. .OP_END_FACTORY_REG(SparseApplyFtrlV2)
  1994. /**
  1995. * @brief Updates relevant entries in '*var' according to the Ftrl-proximal scheme.
  1996. * That is for rows we have grad for, "var", "accum" and "linear" are updated.
  1997. * @par Inputs:
  1998. * Five inputs, including:
  1999. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  2000. * Should be a Variable Tensor.
  2001. * @li accum: A mutable Tensor of the same type as "var".
  2002. * Should be a Variable Tensor.
  2003. * @li linear: A mutable Tensor of the same type as "var".
  2004. * Should be a Variable Tensor.
  2005. * @li grad: A Tensor of the same type as "var", for the gradient.
  2006. * @li indices: A vector of indices into the first dimension of "var" and "accum".
  2007. * @par Attributes:
  2008. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  2009. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  2010. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  2011. * @li l2_shrinkage: A Tensor of the same type as "var", L2 shrinkage regulariation. Must be a scalar.
  2012. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  2013. * @li use_locking: An optional bool. Defaults to "False".
  2014. * If "True", updating of the "var" and "accum" tensors will be
  2015. * protected by a lock; otherwise the behavior is undefined,
  2016. * but may exhibit less contention.
  2017. * @par Outputs:
  2018. * @li var: A Tensor. Has the same type and format as input "var".
  2019. * @li accum: A Tensor. Has the same type and format as input "accum".
  2020. * @li linear: A Tensor. Has the same type and format as input "linear".
  2021. */
  2022. REG_OP(SparseApplyFtrlV2D)
  2023. .INPUT(var, TensorType({DT_FLOAT}))
  2024. .INPUT(accum, TensorType({DT_FLOAT}))
  2025. .INPUT(linear, TensorType({DT_FLOAT}))
  2026. .INPUT(grad, TensorType({DT_FLOAT}))
  2027. .INPUT(indices, TensorType({DT_INT32}))
  2028. .OUTPUT(var, TensorType({DT_FLOAT}))
  2029. .OUTPUT(accum, TensorType({DT_FLOAT}))
  2030. .OUTPUT(linear, TensorType({DT_FLOAT}))
  2031. .REQUIRED_ATTR(lr, Float)
  2032. .REQUIRED_ATTR(l1, Float)
  2033. .REQUIRED_ATTR(l2, Float)
  2034. .REQUIRED_ATTR(l2_shrinkage, Float)
  2035. .REQUIRED_ATTR(lr_power, Float)
  2036. .ATTR(use_locking, Bool, false)
  2037. .OP_END_FACTORY_REG(SparseApplyFtrlV2D)
  2038. /**
  2039. * @brief Updates "var" in specified index according to the RMSProp algorithm.
  2040. * mean_square = decay * mean_square + (1-decay) * gradient ** 2\n
  2041. * Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n
  2042. * ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n
  2043. * mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n
  2044. * var <- var - mom\n
  2045. *
  2046. * @par Inputs:
  2047. * @li var: A mutable tensor. Must be one of the data types defined in\n
  2048. * TensorType::NumberType(). Should be from a Variable().
  2049. * @li ms: A mutable tensor. Must have the same type as "var". Should be from a
  2050. * Variable().
  2051. * @li mom: A mutable tensor. Must have the same type as "var". Should be from a
  2052. * Variable().
  2053. * @li lr: A scalar. Must have the same type as "var".
  2054. * @li rho: A scalar. Must have the same type as "var".
  2055. * @li momentum: A scalar. Must have the same type as "var".
  2056. * @li epsilon: A scalar. Must have the same type as "var".
  2057. * @li grad: A tensor, specifying the gradient.
  2058. * @li indices: A vector of indices into the first dimension of "var", "mom" and "ms".
  2059. *
  2060. * @par Attributes:
  2061. * use_locking: An optional "bool". Defaults to "False". If "True", updating of
  2062. * the "var", "ms", and "mom" tensors will be protected by a lock; otherwise the
  2063. * behavior is undefined, but may exhibit less contention.
  2064. *
  2065. * @par Outputs:
  2066. * var: A mutable tensor. Has the same type as input "var".
  2067. *
  2068. * @attention Constraints:
  2069. * @li Note that in this sparse implementation, "ms" and "mom" will not update
  2070. * in iterations during which "grad" is 0.
  2071. * @li The input tensors "var", "ms", and "mom" must have the same shape.
  2072. *
  2073. */
  2074. REG_OP(SparseApplyRMSProp)
  2075. .INPUT(var, TensorType::NumberType())
  2076. .INPUT(ms, TensorType::NumberType())
  2077. .INPUT(mom, TensorType::NumberType())
  2078. .INPUT(lr, TensorType::NumberType())
  2079. .INPUT(rho, TensorType::NumberType())
  2080. .INPUT(momentum, TensorType::NumberType())
  2081. .INPUT(epsilon, TensorType::NumberType())
  2082. .INPUT(grad, TensorType::NumberType())
  2083. .INPUT(indices, TensorType::IndexNumberType())
  2084. .OUTPUT(var, TensorType::NumberType())
  2085. .ATTR(use_locking, Bool, false)
  2086. .OP_END_FACTORY_REG(SparseApplyRMSProp)
  2087. /**
  2088. * @brief Updates "var" in specified index according to the RMSProp algorithm.
  2089. * a const input will be considered as an attribute.\n
  2090. * mean_square = decay * mean_square + (1-decay) * gradient ** 2\n
  2091. * Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n
  2092. * ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n
  2093. * mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n
  2094. * var <- var - mom
  2095. *
  2096. * @par Inputs:
  2097. * @li var: A mutable tensor. Must be one of the data types defined in
  2098. * TensorType::NumberType(). Should be from a Variable().
  2099. * @li ms: A mutable tensor. Must have the same type as "var". Should be from a
  2100. * Variable().
  2101. * @li mom: A mutable tensor. Must have the same type as "var". Should be from a
  2102. * Variable().
  2103. * @li lr: A scalar. Must have the same type as "var".
  2104. * @li grad: A tensor, specifying the gradient.
  2105. *
  2106. * @par Attributes:
  2107. * @li use_locking: An optional "bool". Defaults to "False". If "True",
  2108. * updating of the "var", "ms", and "mom" tensors will be protected by a lock;
  2109. * otherwise the behavior is undefined, but may exhibit less contention.
  2110. * @li rho: A required scalar. Must have the same type as "var".
  2111. * @li momentum: A required scalar. Must have the same type as "var".
  2112. * @li epsilon: A required scalar. Must have the same type as "var".
  2113. *
  2114. * @par Outputs:
  2115. * @li var: A mutable tensor. Must have the same type as input "var".
  2116. * @li ms: A mutable tensor. Must have the same type as input "ms".
  2117. * @li mom: A mutable tensor. Must have the same type as input "mom".
  2118. *
  2119. * @attention Constraints:
  2120. * @li Note that in this sparse implementation, "ms" and "mom" will not update
  2121. * in iterations during which "grad" is 0.
  2122. * @li The input tensors "var", "ms" and "mom" must have the same shape.
  2123. */
  2124. REG_OP(SparseApplyRMSPropD)
  2125. .INPUT(var, TensorType::NumberType())
  2126. .INPUT(ms, TensorType::NumberType())
  2127. .INPUT(mom, TensorType::NumberType())
  2128. .INPUT(lr, TensorType::NumberType())
  2129. .INPUT(grad, TensorType::NumberType())
  2130. .INPUT(indices, TensorType::IndexNumberType())
  2131. .OUTPUT(var, TensorType::NumberType())
  2132. .OUTPUT(ms, TensorType::NumberType())
  2133. .OUTPUT(mom, TensorType::NumberType())
  2134. .REQUIRED_ATTR(rho, Float)
  2135. .REQUIRED_ATTR(momentum, Float)
  2136. .REQUIRED_ATTR(epsilon, Float)
  2137. .ATTR(use_locking, Bool, false)
  2138. .OP_END_FACTORY_REG(SparseApplyRMSPropD)
  2139. /**
  2140. * @brief Updates "var" in specified index according to the Adadelta algorithm.
  2141. * accum <- rho * accum + (1 - rho) * grad.square()\n
  2142. * update <- (accum_update + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad\n
  2143. * var <- var - update * lr\n
  2144. * accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n
  2145. *
  2146. * @par Inputs:
  2147. * @li var: A mutable tensor. Must be one of the data types defined in\n
  2148. * TensorType::NumberType(). Should be from a Variable().
  2149. * @li accum: A mutable tensor. Must have the same type as "var". Should be from a
  2150. * Variable().
  2151. * @li accum_update: A mutable tensor. Must have the same type as "var". Should be from a
  2152. * Variable().
  2153. * @li lr: A scalar. Must have the same type as "var".
  2154. * @li rho: A scalar. Must have the same type as "var".
  2155. * @li epsilon: A scalar. Must have the same type as "var".
  2156. * @li grad: A tensor, specifying the gradient.
  2157. * @li indices: A vector of indices into the first dimension of "var", "accum" and "accum_update".
  2158. *
  2159. * @par Attributes:
  2160. * use_locking: An optional "bool". Defaults to "False". If "True", updating of
  2161. * the "var", "accum", and "accum_update" tensors will be protected by a lock; otherwise the
  2162. * behavior is undefined, but may exhibit less contention.
  2163. *
  2164. * @par Outputs:
  2165. * var: A mutable tensor. Has the same type as input "var".
  2166. *
  2167. * @attention Constraints:
  2168. * @li Note that in this sparse implementation, "accum" and "accum_update" will not update
  2169. * in iterations during which "grad" is 0.
  2170. * @li The input tensors "var", "accum", and "accum_update" must have the same shape.
  2171. *
  2172. */
  2173. REG_OP(SparseApplyAdadelta)
  2174. .INPUT(var, TensorType::NumberType())
  2175. .INPUT(accum, TensorType::NumberType())
  2176. .INPUT(accum_update, TensorType::NumberType())
  2177. .INPUT(lr, TensorType::NumberType())
  2178. .INPUT(rho, TensorType::NumberType())
  2179. .INPUT(epsilon, TensorType::NumberType())
  2180. .INPUT(grad, TensorType::NumberType())
  2181. .INPUT(indices, TensorType::IndexNumberType())
  2182. .OUTPUT(var, TensorType::NumberType())
  2183. .ATTR(use_locking, Bool, false)
  2184. .OP_END_FACTORY_REG(SparseApplyAdadelta)
  2185. /**
  2186. * @brief Updates "var" in specified index according to the Adadelta algorithm.
  2187. * a const input will be considered as an attribute.\n
  2188. * accum <- rho * accum + (1 - rho) * grad.square()\n
  2189. * update <- (accum_update + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad\n
  2190. * var <- var - update * lr\n
  2191. * accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n
  2192. *
  2193. * @par Inputs:
  2194. * @li var: A mutable tensor. Must be one of the data types defined in
  2195. * TensorType::NumberType(). Should be from a Variable().
  2196. * @li accum: A mutable tensor. Must have the same type as "var". Should be from a
  2197. * Variable().
  2198. * @li accum_update: A mutable tensor. Must have the same type as "var". Should be from a
  2199. * Variable().
  2200. * @li lr: A scalar. Must have the same type as "var".
  2201. * @li rho: A scalar. Must have the same type as "var".
  2202. * @li grad: A tensor, specifying the gradient.
  2203. * @li indices: A vector of indices into the first dimension of "var", "accum" and "accum_update".
  2204. *
  2205. * @par Attributes:
  2206. * @li use_locking: An optional "bool". Defaults to "False". If "True",
  2207. * updating of the "var", "accum", and "accum_update" tensors will be protected by a lock;
  2208. * otherwise the behavior is undefined, but may exhibit less contention.
  2209. * @li epsilon: A required scalar. Must have the same type as "var".
  2210. *
  2211. * @par Outputs:
  2212. * @li var: A mutable tensor. Must have the same type as input "var".
  2213. * @li accum: A mutable tensor. Must have the same type as input "accum".
  2214. * @li accum_update: A mutable tensor. Must have the same type as input "accum_update".
  2215. *
  2216. * @attention Constraints:
  2217. * @li Note that in this sparse implementation, "accum" and "accum_update" will not update
  2218. * in iterations during which "grad" is 0.
  2219. * @li The input tensors "var", "accum" and "accum_update" must have the same shape.
  2220. */
  2221. REG_OP(SparseApplyAdadeltaD)
  2222. .INPUT(var, TensorType::NumberType())
  2223. .INPUT(accum, TensorType::NumberType())
  2224. .INPUT(accum_update, TensorType::NumberType())
  2225. .INPUT(lr, TensorType::NumberType())
  2226. .INPUT(rho, TensorType::NumberType())
  2227. .INPUT(grad, TensorType::NumberType())
  2228. .INPUT(indices, TensorType::IndexNumberType())
  2229. .OUTPUT(var, TensorType::NumberType())
  2230. .OUTPUT(accum, TensorType::NumberType())
  2231. .OUTPUT(accum_update, TensorType::NumberType())
  2232. .REQUIRED_ATTR(epsilon, Float)
  2233. .ATTR(use_locking, Bool, false)
  2234. .OP_END_FACTORY_REG(SparseApplyAdadeltaD)
  2235. /**
  2236. *@brief Clean memory of workspace list.
  2237. *@par Attributes:
  2238. * @li automic_add_mem_size: sizes of workspaces.
  2239. */
  2240. REG_OP(AtomicAddrClean)
  2241. .ATTR(automic_add_mem_size, ListInt, {})
  2242. .OP_END_FACTORY_REG(AtomicAddrClean)
  2243. } // namespace ge
  2244. #endif // GE_OP_TRAINING_OPS_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示