You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

nn_training_ops.h 102 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*!
  17. * \file nn_training_ops.h
  18. * \brief
  19. */
  20. #ifndef GE_OP_TRAINING_OPS_H
  21. #define GE_OP_TRAINING_OPS_H
  22. #include "graph/operator_reg.h"
  23. namespace ge {
  24. /**
  25. *@brief Updates "var" according to the AdaMax algorithm.
  26. * t-1 mean previous period.
  27. * m_t <- beta1 * m{t-1} + (1 - beta1) * grad\n
  28. * v_t <- max(beta2 * v{t-1}, abs(grad))\n
  29. * var <- var - lr / (1 - beta1^t) * m_t / (v_t + epsilon)
  30. *
  31. *@attention Constraints:
  32. * the input tensors must have the same shape.
  33. *
  34. *@par Inputs:
  35. *@li var: A mutable tensor. Must be one of the following types: TensorType::NumberType().
  36. * Should be from a Variable().
  37. *@li m: A mutable tensor. Has the same type as "var".
  38. * Should be from a Variable().
  39. *@li v: A mutable tensor. Has the same type as "var".
  40. * Should be from a Variable().
  41. *@li beta1_power: A scalar. Has the same type as "var".
  42. *@li lr: learning_rate. A scalar. Has the same type as "var".
  43. *@li beta1: A scalar. Has the same type as "var".
  44. *@li beta2: A scalar. Has the same type as "var".
  45. *@li epsilon: A scalar. Has the same type as "var".
  46. *@li grad: A tensor for the gradient. Has the same type as "var".
  47. *
  48. *@par Attributes:
  49. * use_locking: An optional bool. Defaults to "False".
  50. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  51. * by a lock; otherwise the behavior is undefined, but may exhibit less
  52. * contention.
  53. *
  54. *@par Outputs:
  55. * var: A mutable tensor. Has the same type as input "var".
  56. *
  57. *@par Third-party framework compatibility
  58. *Compatible with the TensorFlow operator ApplyAdaMax.
  59. *
  60. */
  61. REG_OP(ApplyAdaMax)
  62. .INPUT(var, TensorType::NumberType())
  63. .INPUT(m, TensorType::NumberType())
  64. .INPUT(v, TensorType::NumberType())
  65. .INPUT(beta1_power, TensorType::NumberType())
  66. .INPUT(lr, TensorType::NumberType())
  67. .INPUT(beta1, TensorType::NumberType())
  68. .INPUT(beta2, TensorType::NumberType())
  69. .INPUT(epsilon, TensorType::NumberType())
  70. .INPUT(grad, TensorType::NumberType())
  71. .OUTPUT(var, TensorType::NumberType())
  72. .ATTR(use_locking, Bool, false)
  73. .OP_END_FACTORY_REG(ApplyAdaMax)
  74. /**
  75. *@brief Updates "var" according to the AdaMax algorithm.
  76. * t-1 mean previous period.
  77. * m_t <- beta1 * m{t-1} + (1 - beta1) * grad\n
  78. * v_t <- max(beta2 * v{t-1}, abs(grad))\n
  79. * var <- var - lr / (1 - beta1^t) * m_t / (v_t + epsilon)
  80. *
  81. *@attention Constraints:
  82. * the input tensors must have the same shape.
  83. *
  84. *@par Inputs:
  85. *@li var: A mutable tensor. Must be one of the following types: TensorType::NumberType().
  86. * Should be from a Variable().
  87. *@li m: A mutable tensor. Has the same type as "var".
  88. * Should be from a Variable().
  89. *@li v: A mutable tensor. Has the same type as "var".
  90. * Should be from a Variable().
  91. *@li beta1_power: A scalar. Has the same type as "var".
  92. *@li lr: learning_rate. A scalar. Has the same type as "var".
  93. *@li beta1: A scalar. Has the same type as "var".
  94. *@li beta2: A scalar. Has the same type as "var".
  95. *@li epsilon: A scalar. Has the same type as "var".
  96. *@li grad: A tensor for the gradient. Has the same type as "var".
  97. *
  98. *@par Attributes:
  99. * use_locking: An optional bool. Defaults to "False".
  100. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  101. * by a lock; otherwise the behavior is undefined, but may exhibit less
  102. * contention.
  103. *
  104. *@par Outputs:
  105. *@li var: A mutable tensor. Has the same type as input "var".
  106. *@li m: A mutable tensor. Has the same type as input "m".
  107. *@li v: A mutable tensor. Has the same type as input "v".
  108. *
  109. *@par Third-party framework compatibility
  110. *Compatible with the TensorFlow operator ApplyAdaMax.
  111. *
  112. */
  113. REG_OP(ApplyAdaMaxD)
  114. .INPUT(var, TensorType::NumberType())
  115. .INPUT(m, TensorType::NumberType())
  116. .INPUT(v, TensorType::NumberType())
  117. .INPUT(beta1_power, TensorType::NumberType())
  118. .INPUT(lr, TensorType::NumberType())
  119. .INPUT(beta1, TensorType::NumberType())
  120. .INPUT(beta2, TensorType::NumberType())
  121. .INPUT(epsilon, TensorType::NumberType())
  122. .INPUT(grad, TensorType::NumberType())
  123. .OUTPUT(var, TensorType::NumberType())
  124. .OUTPUT(m, TensorType::NumberType())
  125. .OUTPUT(v, TensorType::NumberType())
  126. .ATTR(use_locking, Bool, false)
  127. .OP_END_FACTORY_REG(ApplyAdaMaxD)
  128. /**
  129. *@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme.
  130. *@par Inputs:
  131. * Five inputs, including:
  132. *@li var: An NCHW, NHWC, or ND Tensor of type float32.
  133. *@li accum: An NCHW, NHWC, or ND Tensor of type float32.
  134. *@li lr: An NCHW, NHWC, or ND Tensor of type float32.
  135. *@li grad: An NCHW, NHWC, or ND Tensor of type float32.
  136. *@li indices: An NCHW, NHWC, or ND Tensor of type float32.
  137. *@par Attributes:
  138. *@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
  139. *@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False".
  140. *@par Outputs:
  141. *var: A Tensor. Has the same type and format as input "var".
  142. *@par Third-party framework compatibility
  143. * Compatible with the TensorFlow operator SparseApplyAdagrad.
  144. */
  145. REG_OP(SparseApplyAdagrad)
  146. .INPUT(var, TensorType({DT_FLOAT}))
  147. .INPUT(accum, TensorType({DT_FLOAT}))
  148. .INPUT(lr, TensorType({DT_FLOAT}))
  149. .INPUT(grad, TensorType({DT_FLOAT}))
  150. .INPUT(indices, TensorType({DT_INT32}))
  151. .OUTPUT(var, TensorType({DT_FLOAT}))
  152. .ATTR(use_locking, Bool, false)
  153. .ATTR(update_slots, Bool, true)
  154. .OP_END_FACTORY_REG(SparseApplyAdagrad)
  155. /**
  156. *@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme.
  157. *@par Inputs:
  158. * Four inputs, including:
  159. *@li var: An NCHW, NHWC, or ND Tensor of type float32.
  160. *@li accum: An NCHW, NHWC, or ND Tensor of type float32.
  161. *@li grad: An NCHW, NHWC, or ND Tensor of type float32.
  162. *@li indices: An NCHW, NHWC, or ND Tensor of type int32.
  163. *@par Attributes:
  164. *@li lr: Required, used for computation.
  165. *@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
  166. *@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False".
  167. *@par Outputs:
  168. *@li var: A Tensor. Has the same type and format as input "var".
  169. *@li accum: A Tensor. Has the same type and format as input "var".
  170. *@par Third-party framework compatibility
  171. * Compatible with the TensorFlow operator SparseApplyAdagrad.
  172. *
  173. *@par Restrictions:
  174. *Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyAdagrad instead.
  175. */
  176. REG_OP(SparseApplyAdagradD)
  177. .INPUT(var, TensorType({DT_FLOAT}))
  178. .INPUT(accum, TensorType({DT_FLOAT}))
  179. .INPUT(grad, TensorType({DT_FLOAT}))
  180. .INPUT(indices, TensorType({DT_INT32}))
  181. .OUTPUT(var, TensorType({DT_FLOAT}))
  182. .OUTPUT(accum, TensorType({DT_FLOAT}))
  183. .REQUIRED_ATTR(lr, Float)
  184. .ATTR(use_locking, Bool, false)
  185. .ATTR(update_slots, Bool, true)
  186. .OP_END_FACTORY_REG(SparseApplyAdagradD)
  187. /**
  188. *@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme.
  189. *@par Inputs:
  190. *Six inputs, including:
  191. *@li var: An NCHW, NHWC, or ND Tensor of type float32.
  192. *@li accum: An NCHW, NHWC, or ND Tensor of type float32.
  193. *@li lr: An NCHW, NHWC, or ND Tensor of type float32.
  194. *@li epsilon: An NCHW, NHWC, or ND Tensor of type float32.
  195. *@li grad: An NCHW, NHWC, or ND Tensor of type float32.
  196. *@li indices: An NCHW, NHWC, or ND Tensor of type float32.
  197. *@par Attributes:
  198. *@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
  199. *@li update_slots: An optional bool. Defaults to "True". If "False", the computation logic will be different.
  200. *@par Outputs:
  201. *var: A Tensor. Has the same type and format as input "var".
  202. *@par Third-party framework compatibility
  203. *Compatible with the TensorFlow operator SparseApplyAdagradV2.
  204. */
  205. REG_OP(SparseApplyAdagradV2)
  206. .INPUT(var, TensorType({DT_FLOAT}))
  207. .INPUT(accum, TensorType({DT_FLOAT}))
  208. .INPUT(lr, TensorType({DT_FLOAT}))
  209. .INPUT(epsilon, TensorType({DT_FLOAT}))
  210. .INPUT(grad, TensorType({DT_FLOAT}))
  211. .INPUT(indices, TensorType({DT_INT32}))
  212. .OUTPUT(var, TensorType({DT_FLOAT}))
  213. .ATTR(use_locking, Bool, false)
  214. .ATTR(update_slots, Bool, true)
  215. .OP_END_FACTORY_REG(SparseApplyAdagradV2)
  216. /**
  217. *@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme.
  218. *@par Inputs:
  219. *Four inputs, including:
  220. *@li var: An NCHW, NHWC, or ND Tensor of type float32.
  221. *@li accum: An NCHW, NHWC, or ND Tensor of type float32.
  222. *@li grad: An NCHW, NHWC, or ND Tensor of type float32.
  223. *@li indices: An NCHW, NHWC, or ND Tensor of type int32.
  224. *@par Attributes:
  225. *@li lr: Required, used for computation.
  226. *@li epsilon: Required, used for computation.
  227. *@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
  228. *@li update_slots: An optional bool. Defaults to "True". If "False", the computation logic will be different.
  229. *@par Outputs:
  230. *@li var: A Tensor. Has the same type and format as input "var".
  231. *@li accum: A Tensor. Has the same type and format as input "accum".
  232. *@par Third-party framework compatibility
  233. *Compatible with the TensorFlow operator SparseApplyAdagradV2.
  234. *
  235. *@par Restrictions:
  236. *Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyAdagradV2 instead.
  237. */
  238. REG_OP(SparseApplyAdagradV2D)
  239. .INPUT(var, TensorType({DT_FLOAT}))
  240. .INPUT(accum, TensorType({DT_FLOAT}))
  241. .INPUT(grad, TensorType({DT_FLOAT}))
  242. .INPUT(indices, TensorType({DT_INT32}))
  243. .OUTPUT(var, TensorType({DT_FLOAT}))
  244. .OUTPUT(accum, TensorType({DT_FLOAT}))
  245. .REQUIRED_ATTR(lr, Float)
  246. .REQUIRED_ATTR(epsilon, Float)
  247. .ATTR(use_locking, Bool, false)
  248. .ATTR(update_slots, Bool, true)
  249. .OP_END_FACTORY_REG(SparseApplyAdagradV2D)
  250. /**
  251. *@brief Updates "var" according to the momentum scheme. Set use_nesterov = True if you
  252. * want to use Nesterov momentum.
  253. * computing process: \n
  254. * accum = accum * momentum + grad\n
  255. * var -= lr * accum
  256. *
  257. *@attention Constraints:
  258. * the input tensors must have the same shape.
  259. *
  260. *@par Inputs:
  261. *@li var: A mutable tensor. Should be from a Variable().
  262. *@li accum: A mutable tensor. Has the same type as "var".
  263. * Should be from a Variable().
  264. *@li lr: A scalar. Has the same type as "var".
  265. *@li grad: A tensor for the gradient. Has the same type as "var".
  266. *
  267. *@par Attributes:
  268. *@li use_nesterov: An optional bool. Defaults to "False".
  269. * If "True", the tensor passed to compute grad will be
  270. * var - lr * momentum * accum, so in the end, the var you get is actually
  271. * var - lr * momentum * accum.
  272. *
  273. *@li use_locking: An optional bool. Defaults to "False".
  274. * If "True", updating of the "var", "ms", and "mom" tensors is protected by a lock;
  275. * otherwise the behavior is undefined, but may exhibit less contention.
  276. *
  277. *@par Outputs:
  278. * var: A mutable tensor. Has the same type as input "var".
  279. *
  280. *@par Third-party framework compatibility
  281. *Compatible with the TensorFlow operator ApplyMomentum.
  282. *
  283. */
  284. REG_OP(ApplyMomentum)
  285. .INPUT(var, TensorType::NumberType())
  286. .INPUT(accum, TensorType::NumberType())
  287. .INPUT(lr, TensorType::NumberType())
  288. .INPUT(grad, TensorType::NumberType())
  289. .INPUT(momentum, TensorType::NumberType())
  290. .OUTPUT(var, TensorType::NumberType())
  291. .ATTR(use_nesterov, Bool, false)
  292. .ATTR(use_locking, Bool, false)
  293. .OP_END_FACTORY_REG(ApplyMomentum)
  294. /**
  295. *@brief Updates "var" according to the momentum scheme. Set use_nesterov = True if you
  296. * want to use Nesterov momentum.
  297. * computing process: \n
  298. * accum = accum * momentum + grad\n
  299. * var -= lr * accum
  300. *
  301. *@attention Constraints:
  302. * the input tensors must have the same shape.
  303. *
  304. *@par Inputs:
  305. *@li var: A mutable tensor. Should be from a Variable().
  306. *@li accum: A mutable tensor. Has the same type as "var".
  307. * Should be from a Variable().
  308. *@li lr: A scalar. Has the same type as "var".
  309. *@li grad: A tensor for the gradient. Has the same type as "var".
  310. *
  311. *@par Attributes:
  312. *@li use_nesterov: An optional bool. Defaults to "False".
  313. * If "True", the tensor passed to compute grad will be
  314. * var - lr * momentum * accum, so in the end, the var you get is actually
  315. * var - lr * momentum * accum.
  316. *
  317. *@li use_locking: An optional bool. Defaults to "False".
  318. * If "True", updating of the "var", "ms", and "mom" tensors is protected by a lock;
  319. * otherwise the behavior is undefined, but may exhibit less contention.
  320. *
  321. *@par Outputs:
  322. * var: A mutable tensor. Has the same type as input "var".
  323. * accum: A mutable tensor. Has the same type as input "accum".
  324. *@par Third-party framework compatibility
  325. *Compatible with the TensorFlow operator ApplyMomentum.
  326. *
  327. */
  328. REG_OP(ApplyMomentumD)
  329. .INPUT(var, TensorType::NumberType())
  330. .INPUT(accum, TensorType::NumberType())
  331. .INPUT(lr, TensorType::NumberType())
  332. .INPUT(grad, TensorType::NumberType())
  333. .INPUT(momentum, TensorType::NumberType())
  334. .OUTPUT(var, TensorType::NumberType())
  335. .OUTPUT(accum, TensorType::NumberType())
  336. .ATTR(use_nesterov, Bool, false)
  337. .ATTR(use_locking, Bool, false)
  338. .OP_END_FACTORY_REG(ApplyMomentumD)
  339. /**
  340. *@brief Updates '*var' according to the momentum scheme.
  341. * accum = accum * momentum - grad * lr \n
  342. * if use_nesterov is True: \n
  343. * var += accum * momentum - grad * lr \n
  344. * else: \n
  345. * var += accum
  346. *
  347. *@par Inputs:
  348. *@li var: A mutable tensor. Must be one of the data types defined in
  349. * TensorType::NumberType(). Should be from a Variable().
  350. *@li accum: A mutable tensor. Has the same type as "var". Should be from a
  351. * Variable().
  352. *@li lr: A tensor for the learning rate. Has the same type as "var". Should be
  353. * from a Variable().
  354. *@li grad: A tensor for the gradient. Has the same type as "var". Should be
  355. * from a Variable().
  356. *@li momentum: A scalar. Has the same type as "var".
  357. *
  358. *@par Attributes:
  359. *@li use_nesterov: An optional bool. Defaults to "False".
  360. * If "True", var will be updated by using Nesterov momentum.
  361. *@li use_locking: An optional bool. Defaults to "False".
  362. * If "True", updating of the "var" tensor is protected by a lock;
  363. * otherwise the behavior is undefined, but may exhibit less contention.
  364. *
  365. *@par Outputs:
  366. * var: A mutable tensor. Has the same type as input "var".
  367. *
  368. *@attention Constraints:
  369. * The input tensors must have the same shape.
  370. *
  371. *@par Third-party framework compatibility
  372. * Compatible with the TensorFlow operator ResourceApplyKerasMomentum.
  373. *
  374. */
  375. REG_OP(ApplyKerasMomentum)
  376. .INPUT(var, TensorType::NumberType())
  377. .INPUT(accum, TensorType::NumberType())
  378. .INPUT(lr, TensorType::NumberType())
  379. .INPUT(grad, TensorType::NumberType())
  380. .INPUT(momentum, TensorType::NumberType())
  381. .OUTPUT(var, TensorType::NumberType())
  382. .ATTR(use_locking, Bool, false)
  383. .ATTR(use_nesterov, Bool, false)
  384. .OP_END_FACTORY_REG(ApplyKerasMomentum)
  385. /**
  386. *@brief Updates '*var' according to the momentum scheme.
  387. * accum = accum * momentum - grad * lr \n
  388. * if use_nesterov is True: \n
  389. * var += accum * momentum - grad * lr \n
  390. * else: \n
  391. * var += accum
  392. *
  393. *@par Inputs:
  394. *@li var: A mutable tensor. Must be one of the data types defined in
  395. * TensorType::NumberType(). Should be from a Variable().
  396. *@li accum: A mutable tensor. Has the same type as "var". Should be from a
  397. * Variable().
  398. *@li lr: A tensor for the learning rate. Has the same type as "var". Should be
  399. * from a Variable().
  400. *@li grad: A tensor for the gradient. Has the same type as "var". Should be
  401. * from a Variable().
  402. *@li momentum: A scalar. Has the same type as "var". Should be from a
  403. * Variable().
  404. *
  405. *@par Attributes:
  406. *@li use_nesterov: An optional bool. Defaults to "False".
  407. * If "True", var will be updated by using nesterov momentum
  408. *@li use_locking: An optional bool. Defaults to "False".
  409. * If "True", updating of the "var" tensor is protected by a lock;
  410. * otherwise the behavior is undefined, but may exhibit less contention.
  411. *
  412. *@par Outputs:
  413. *@li var: A mutable tensor. Has the same type as input "var".
  414. *@li accum: A mutable tensor. Has the same type as input "var"
  415. *
  416. *@attention Constraints:
  417. * The input tensors must have the same shape.
  418. *
  419. *@par Third-party framework compatibility
  420. * Compatible with the TensorFlow operator ResourceApplyKerasMomentum.
  421. *
  422. *@par Restrictions:
  423. *Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyKerasMomentum instead.
  424. */
  425. REG_OP(ApplyKerasMomentumD)
  426. .INPUT(var, TensorType::NumberType())
  427. .INPUT(accum, TensorType::NumberType())
  428. .INPUT(lr, TensorType::NumberType())
  429. .INPUT(grad, TensorType::NumberType())
  430. .INPUT(momentum, TensorType::NumberType())
  431. .OUTPUT(var, TensorType::NumberType())
  432. .OUTPUT(accum, TensorType::NumberType())
  433. .ATTR(use_locking, Bool, false)
  434. .ATTR(use_nesterov, Bool, false)
  435. .OP_END_FACTORY_REG(ApplyKerasMomentumD)
  436. /**
  437. *@brief Updates '*var' according to the Adam algorithm.
  438. * lr_t := {learning_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)
  439. * m_t := beta_1 * m_{t-1} + (1 - beta_1) * g
  440. * v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g
  441. * vhat_t := max{vhat_{t-1}, v_t}
  442. * variable := variable - lr_t * m_t / (sqrt{vhat_t} + epsilon)
  443. *
  444. *@par Inputs:
  445. *@li var: A mutable tensor. Must be one of the data types defined in
  446. * TensorType::NumberType(). Should be from a Variable().
  447. *@li m: A mutable tensor. Has the same type as "var". Should be from a
  448. * Variable().
  449. *@li v: A mutable tensor. Has the same type as "var". Should be from a
  450. * Variable().
  451. *@li vhat: A mutable tensor. Has the same type as "var". Should be from a
  452. * Variable().
  453. *@li beta1_power: A mutable tensor. Has the same type as "var". Should be from a
  454. * Variable().
  455. *@li beta2_power: A mutable tensor. Has the same type as "var". Should be from a
  456. * Variable().
  457. *@li lr: A tensor for the learning rate. Has the same type as "var". Should be
  458. * from a Variable().
  459. *@li grad: A tensor for the gradient. Has the same type as "var". Should be
  460. * from a Variable().
  461. *
  462. *@par Attributes:
  463. *@li beta1: A scalar. Has the same type as "var".
  464. *@li beta2: A scalar. Has the same type as "var".
  465. *@li epsilon: A scalar. Has the same type as "var".
  466. *@li use_locking: An optional bool. Defaults to "False".
  467. * If "True", updating of the "var" tensor is protected by a lock;
  468. * otherwise the behavior is undefined, but may exhibit less contention.
  469. *
  470. *@par Outputs:
  471. *@li var: A mutable tensor. Has the same type as input "var".
  472. *@li m: A mutable tensor. Has the same type as input "var"
  473. *@li v: A mutable tensor. Has the same type as input "var"
  474. *@li vhat: A mutable tensor. Has the same type as input "var"
  475. *
  476. *@attention Constraints:
  477. * The input tensors must have the same shape.
  478. *
  479. *@par Third-party framework compatibility
  480. * Compatible with the TensorFlow operator ResourceApplyKerasMomentum.
  481. *
  482. *@par Restrictions:
  483. *Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdamWithAmsgrad instead.
  484. *
  485. */
  486. REG_OP(ApplyAdamWithAmsgradD)
  487. .INPUT(var, TensorType::NumberType())
  488. .INPUT(m, TensorType::NumberType())
  489. .INPUT(v, TensorType::NumberType())
  490. .INPUT(vhat, TensorType::NumberType())
  491. .INPUT(beta1_power, TensorType::NumberType())
  492. .INPUT(beta2_power, TensorType::NumberType())
  493. .INPUT(lr, TensorType::NumberType())
  494. .INPUT(grad, TensorType::NumberType())
  495. .OUTPUT(var, TensorType::NumberType())
  496. .OUTPUT(m, TensorType::NumberType())
  497. .OUTPUT(v, TensorType::NumberType())
  498. .OUTPUT(vhat, TensorType::NumberType())
  499. .REQUIRED_ATTR(beta1, Float)
  500. .REQUIRED_ATTR(beta2, Float)
  501. .REQUIRED_ATTR(epsilon, Float)
  502. .ATTR(use_locking, Bool, false)
  503. .OP_END_FACTORY_REG(ApplyAdamWithAmsgradD)
  504. /**
  505. *@brief Updates '*var' according to the Adam algorithm..
  506. * lr_t := {learning_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)
  507. * m_t := beta_1 * m_{t-1} + (1 - beta_1) * g
  508. * v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g
  509. * vhat_t := max{vhat_{t-1}, v_t}
  510. * variable := variable - lr_t * m_t / (sqrt{vhat_t} + epsilon)
  511. *
  512. *@par Inputs:
  513. *@li var: A mutable tensor. Must be one of the data types defined in
  514. * TensorType::NumberType(). Should be from a Variable().
  515. *@li m: A mutable tensor. Has the same type as "var". Should be from a
  516. * Variable().
  517. *@li v: A mutable tensor. Has the same type as "var". Should be from a
  518. * Variable().
  519. *@li vhat: A mutable tensor. Has the same type as "var". Should be from a
  520. * Variable().
  521. *@li beta1_power: A mutable tensor. Has the same type as "var". Should be from a
  522. * Variable().
  523. *@li beta2_power: A mutable tensor. Has the same type as "var". Should be from a
  524. * Variable().
  525. *@li lr: A tensor for the learning rate. Has the same type as "var". Should be
  526. * from a Variable().
  527. *@li grad: A tensor for the gradient. Has the same type as "var". Should be
  528. * from a Variable().
  529. *
  530. *@par Attributes:
  531. *@li beta1: A scalar. Has the same type as "var".
  532. *@li beta2: A scalar. Has the same type as "var".
  533. *@li epsilon: A scalar. Has the same type as "var".
  534. *@li use_locking: An optional bool. Defaults to "False".
  535. * If "True", updating of the "var" tensor is protected by a lock;
  536. * otherwise the behavior is undefined, but may exhibit less contention.
  537. *
  538. *@par Outputs:
  539. *@li var: A mutable tensor. Has the same type as input "var".
  540. *@li m: A mutable tensor. Has the same type as input "var"
  541. *@li v: A mutable tensor. Has the same type as input "var"
  542. *@li vhat: A mutable tensor. Has the same type as input "var"
  543. *
  544. *@attention Constraints:
  545. * The input tensors must have the same shape.
  546. *
  547. *@par Third-party framework compatibility
  548. * Compatible with the TensorFlow operator ResourceApplyKerasMomentum.
  549. *
  550. */
  551. REG_OP(ApplyAdamWithAmsgrad)
  552. .INPUT(var, TensorType::NumberType())
  553. .INPUT(m, TensorType::NumberType())
  554. .INPUT(v, TensorType::NumberType())
  555. .INPUT(vhat, TensorType::NumberType())
  556. .INPUT(beta1_power, TensorType::NumberType())
  557. .INPUT(beta2_power, TensorType::NumberType())
  558. .INPUT(lr, TensorType::NumberType())
  559. .INPUT(beta1, TensorType::NumberType())
  560. .INPUT(beta2, TensorType::NumberType())
  561. .INPUT(epsilon, TensorType::NumberType())
  562. .INPUT(grad, TensorType::NumberType())
  563. .OUTPUT(var, TensorType::NumberType())
  564. .ATTR(use_locking, Bool, false)
  565. .OP_END_FACTORY_REG(ApplyAdamWithAmsgrad)
  566. /**
  567. *@brief Updates "var" according to the AddSign update.
  568. * t-1 mean previous period.
  569. * m_t <- beta1 * m_{t-1} + (1 - beta1) * grad\n
  570. * update <- exp(logbase * sign_decay * sign(grad) * sign(m_t)) * grad\n
  571. * var <- var - lr * update
  572. *
  573. *@attention Constraints:
  574. * the input tensors must have the same shape.
  575. *
  576. *@par Inputs:
  577. *@li var: A mutable tensor. Should be from a Variable().
  578. *@li m: A mutable tensor. Has the same type as "var".
  579. * Should be from a Variable().
  580. *@li lr: A scalar. Has the same type as "var".
  581. *@li logbase: A scalar. Has the same type as "var".
  582. *@li sign_decay: A scalar. Has the same type as "var".
  583. *@li beta: A scalar. Has the same type as "var".
  584. *@li grad: A tensor for the gradient. Has the same type as "var".
  585. *
  586. *@par Attributes:
  587. * use_locking: An optional bool. Defaults to "False".
  588. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  589. * by a lock; otherwise the behavior is undefined, but may exhibit less
  590. * contention.
  591. *
  592. *@par Outputs:
  593. * var: A mutable tensor. Has the same type as input "var".
  594. *
  595. *@par Third-party framework compatibility
  596. *Compatible with the TensorFlow operator ApplyPowerSign.
  597. *
  598. */
  599. REG_OP(ApplyPowerSign)
  600. .INPUT(var, TensorType::NumberType())
  601. .INPUT(m, TensorType::NumberType())
  602. .INPUT(lr, TensorType::NumberType())
  603. .INPUT(logbase, TensorType::NumberType())
  604. .INPUT(sign_decay, TensorType::NumberType())
  605. .INPUT(beta, TensorType::NumberType())
  606. .INPUT(grad, TensorType::NumberType())
  607. .OUTPUT(var, TensorType::NumberType())
  608. .ATTR(use_locking, Bool, false)
  609. .OP_END_FACTORY_REG(ApplyPowerSign)
  610. /**
  611. *@brief Updates "var" according to the AddSign update.
  612. * t-1 mean previous period.
  613. * m_t <- beta1 * m_{t-1} + (1 - beta1) * grad\n
  614. * update <- exp(logbase * sign_decay * sign(grad) * sign(m_t)) * grad\n
  615. * var <- var - lr * update
  616. *
  617. *@attention Constraints:
  618. * the input tensors must have the same shape.
  619. *
  620. *@par Inputs:
  621. *@li var: A mutable tensor. Should be from a Variable().
  622. *@li m: A mutable tensor. Has the same type as "var".
  623. * Should be from a Variable().
  624. *@li lr: A scalar. Has the same type as "var".
  625. *@li logbase: A scalar. Has the same type as "var".
  626. *@li sign_decay: A scalar. Has the same type as "var".
  627. *@li beta: A scalar. Has the same type as "var".
  628. *@li grad: A tensor for the gradient. Has the same type as "var".
  629. *
  630. *@par Attributes:
  631. * use_locking: An optional bool. Defaults to "False".
  632. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  633. * by a lock; otherwise the behavior is undefined, but may exhibit less
  634. * contention.
  635. *
  636. *@par Outputs:
  637. *@li var: A mutable tensor. Has the same type as input "var".
  638. *@li m: A mutable tensor. Has the same type as input "var".
  639. *
  640. *@par Third-party framework compatibility
  641. *Compatible with the TensorFlow operator ApplyPowerSign.
  642. *
  643. */
  644. REG_OP(ApplyPowerSignD)
  645. .INPUT(var, TensorType::NumberType())
  646. .INPUT(m, TensorType::NumberType())
  647. .INPUT(lr, TensorType::NumberType())
  648. .INPUT(logbase, TensorType::NumberType())
  649. .INPUT(sign_decay, TensorType::NumberType())
  650. .INPUT(beta, TensorType::NumberType())
  651. .INPUT(grad, TensorType::NumberType())
  652. .OUTPUT(var, TensorType::NumberType())
  653. .OUTPUT(m, TensorType::NumberType())
  654. .ATTR(use_locking, Bool, false)
  655. .OP_END_FACTORY_REG(ApplyPowerSignD)
  656. /**
  657. *@brief Updates "var" as FOBOS algorithm with fixed learning rate.\n
  658. * prox_v = var - alpha * delta\n
  659. * var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
  660. *
  661. *@attention Constraints:\n
  662. * the input tensors must have the same shape.
  663. *
  664. *@par Inputs:
  665. *@li var: A mutable tensor. Should be from a Variable().
  666. *@li alpha: A scalar. Has the same type as "var".
  667. *@li l1: A scalar. Has the same type as "var".
  668. *@li l2: A scalar. Has the same type as "var".
  669. *@li delta: A tensor. Has the same type as "var". The change.
  670. *
  671. *@par Attributes:
  672. * use_locking: An optional bool. Defaults to "False".
  673. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  674. * by a lock; otherwise the behavior is undefined, but may exhibit less
  675. * contention.
  676. *
  677. *@par Outputs:
  678. * var: A mutable tensor. Has the same type as input "var".
  679. *
  680. *@par Third-party framework compatibility
  681. *Compatible with the TensorFlow operator ApplyProximalGradientDescent.
  682. *
  683. */
  684. REG_OP(ApplyProximalGradientDescent)
  685. .INPUT(var, TensorType::NumberType())
  686. .INPUT(alpha, TensorType::NumberType())
  687. .INPUT(l1, TensorType::NumberType())
  688. .INPUT(l2, TensorType::NumberType())
  689. .INPUT(delta, TensorType::NumberType())
  690. .OUTPUT(var, TensorType::NumberType())
  691. .ATTR(use_locking, Bool, false)
  692. .OP_END_FACTORY_REG(ApplyProximalGradientDescent)
  693. /**
  694. *@brief Updates "var" according to the AddSign update.
  695. *@par Inputs:
  696. *Seven inputs, including:
  697. * @li var: A mutable Tensor of type TensorType::NumberType().
  698. * Should be a Variable Tensor.
  699. * @li m: A mutable Tensor of the same type as "var".
  700. * Should be a Variable Tensor.
  701. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  702. * @li alpha: A Tensor of the same type as "var". Must be a scalar.
  703. * @li sign_decay: A Tensor of the same type as "var". Must be a scalar.
  704. * @li beta: A Tensor of the same type as "var". Must be a scalar.
  705. * @li grad: A Tensor of the same type as "var", for the gradient.
  706. *@par Attributes:
  707. *use_locking: An optional bool. Defaults to "False".
  708. * If "True", updating of the "var" and "m" tensors will be
  709. * protected by a lock; otherwise the behavior is undefined,
  710. * but may exhibit less contention.
  711. *@par Outputs:
  712. *var: A mutable Tensor. Has the same type as "var".
  713. *@par Third-party framework compatibility
  714. * Compatible with the TensorFlow operator ApplyAddSign.
  715. */
  716. REG_OP(ApplyAddSign)
  717. .INPUT(var, TensorType::NumberType())
  718. .INPUT(m, TensorType::NumberType())
  719. .INPUT(lr, TensorType::NumberType())
  720. .INPUT(alpha, TensorType::NumberType())
  721. .INPUT(sign_decay, TensorType::NumberType())
  722. .INPUT(beta, TensorType::NumberType())
  723. .INPUT(grad, TensorType::NumberType())
  724. .OUTPUT(var, TensorType::NumberType())
  725. .ATTR(use_locking, Bool, false)
  726. .OP_END_FACTORY_REG(ApplyAddSign)
  727. /**
  728. *@brief Updates "var" according to the AddSign update.
  729. *@par Inputs:
  730. *Seven inputs, including:
  731. * @li var: A mutable Tensor of type TensorType::NumberType().
  732. * Should be a Variable Tensor.
  733. * @li m: A mutable Tensor of the same type as "var".
  734. * Should be a Variable Tensor.
  735. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  736. * @li alpha: A Tensor of the same type as "var". Must be a scalar.
  737. * @li sign_decay: A Tensor of the same type as "var". Must be a scalar.
  738. * @li beta: A Tensor of the same type as "var". Must be a scalar.
  739. * @li grad: A Tensor of the same type as "var", for the gradient.
  740. *@par Attributes:
  741. *use_locking: An optional bool. Defaults to "False".
  742. * If "True", updating of the "var" and "m" tensors will be
  743. * protected by a lock; otherwise the behavior is undefined,
  744. * but may exhibit less contention.
  745. *@par Outputs:
  746. *@li var: A mutable Tensor. Has the same type as "var".
  747. *@li m: A mutable Tensor. Has the same type as "m".
  748. *@par Third-party framework compatibility
  749. * Compatible with the TensorFlow operator ApplyAddSign.
  750. */
  751. REG_OP(ApplyAddSignD)
  752. .INPUT(var, TensorType::NumberType())
  753. .INPUT(m, TensorType::NumberType())
  754. .INPUT(lr, TensorType::NumberType())
  755. .INPUT(alpha, TensorType::NumberType())
  756. .INPUT(sign_decay, TensorType::NumberType())
  757. .INPUT(beta, TensorType::NumberType())
  758. .INPUT(grad, TensorType::NumberType())
  759. .OUTPUT(var, TensorType::NumberType())
  760. .OUTPUT(m, TensorType::NumberType())
  761. .ATTR(use_locking, Bool, false)
  762. .OP_END_FACTORY_REG(ApplyAddSignD)
  763. /**
  764. *@brief Updates "var" according to the centered RMSProp algorithm.
  765. * The centered RMSProp algorithm uses an estimate of the centered second moment
  766. * (i.e., the variance) for normalization, as opposed to regular RMSProp, which
  767. * uses the (uncentered) second moment. This often helps with training, but is
  768. * slightly more expensive in terms of computation and memory.
  769. *
  770. * t-1 mean previous period.
  771. * mg <- rho * mg{t-1} + (1-rho) * grad\n
  772. * ms <- rho * ms{t-1} + (1-rho) * grad * grad\n
  773. * mom <- momentum * mom{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)\n
  774. * var <- var - mom\n
  775. *
  776. *@attention Constraints:
  777. *@li in dense implementation of this algorithm, mg, ms, and mom will
  778. * update even if the grad is zero, but in this sparse implementation, mg, ms,
  779. * and mom will not update in iterations during which the grad is zero.
  780. *@li the input tensors must have the same shape.
  781. *
  782. *@par Inputs:
  783. *@li var: A mutable tensor. Should be from a Variable().
  784. *@li mg: A mutable tensor. Has the same type as "var".
  785. * Should be from a Variable().
  786. *@li ms: A mutable tensor. Has the same type as "var".
  787. * Should be from a Variable().
  788. *@li mom: A mutable tensor. Has the same type as "var".
  789. * Should be from a Variable().
  790. *@li lr: A scalar. Has the same type as "var".
  791. *@li rho: A scalar. Has the same type as "var".
  792. *@li momentum: A tensor. Has the same type as "var".
  793. *@li epsilon: A scalar. Has the same type as "var".
  794. *@li grad: A tensor for the gradient. Has the same type as "var".
  795. *
  796. *@par Attributes:
  797. * use_locking: An optional bool. Defaults to "False".
  798. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  799. * by a lock; otherwise the behavior is undefined, but may exhibit less
  800. * contention.
  801. *
  802. *@par Outputs:
  803. * var: A mutable tensor. Has the same type as input "var".
  804. *
  805. *@par Third-party framework compatibility
  806. *Compatible with the TensorFlow operator ApplyCenteredRMSProp.
  807. *
  808. */
  809. REG_OP(ApplyCenteredRMSProp)
  810. .INPUT(var, TensorType::NumberType())
  811. .INPUT(mg, TensorType::NumberType())
  812. .INPUT(ms, TensorType::NumberType())
  813. .INPUT(mom, TensorType::NumberType())
  814. .INPUT(lr, TensorType::NumberType())
  815. .INPUT(rho, TensorType::NumberType())
  816. .INPUT(momentum, TensorType::NumberType())
  817. .INPUT(epsilon, TensorType::NumberType())
  818. .INPUT(grad, TensorType::NumberType())
  819. .OUTPUT(var, TensorType::NumberType())
  820. .ATTR(use_locking, Bool, false)
  821. .OP_END_FACTORY_REG(ApplyCenteredRMSProp)
  822. /**
  823. *@brief Updates "var" according to the centered RMSProp algorithm.
  824. * The centered RMSProp algorithm uses an estimate of the centered second moment
  825. * (i.e., the variance) for normalization, as opposed to regular RMSProp, which
  826. * uses the (uncentered) second moment. This often helps with training, but is
  827. * slightly more expensive in terms of computation and memory.
  828. *
  829. * t-1 mean previous period.
  830. * mg <- rho * mg{t-1} + (1-rho) * grad\n
  831. * ms <- rho * ms{t-1} + (1-rho) * grad * grad\n
  832. * mom <- momentum * mom{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)\n
  833. * var <- var - mom\n
  834. *
  835. *@attention Constraints:
  836. *@li in dense implementation of this algorithm, mg, ms, and mom will
  837. * update even if the grad is zero, but in this sparse implementation, mg, ms,
  838. * and mom will not update in iterations during which the grad is zero.
  839. *@li the input tensors must have the same shape.
  840. *
  841. *@par Inputs:
  842. *@li var: A mutable tensor. Should be from a Variable().
  843. *@li mg: A mutable tensor. Has the same type as "var".
  844. * Should be from a Variable().
  845. *@li ms: A mutable tensor. Has the same type as "var".
  846. * Should be from a Variable().
  847. *@li mom: A mutable tensor. Has the same type as "var".
  848. * Should be from a Variable().
  849. *@li lr: A scalar. Has the same type as "var".
  850. *@li rho: A scalar. Has the same type as "var".
  851. *@li momentum: A tensor. Has the same type as "var".
  852. *@li epsilon: A scalar. Has the same type as "var".
  853. *@li grad: A tensor for the gradient. Has the same type as "var".
  854. *
  855. *@par Attributes:
  856. * use_locking: An optional bool. Defaults to "False".
  857. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  858. * by a lock; otherwise the behavior is undefined, but may exhibit less
  859. * contention.
  860. *
  861. *@par Outputs:
  862. *@li var: A mutable Tensor. Has the same type as "var".
  863. *@li mg: A mutable Tensor. Has the same type as "mg".
  864. *@li ms: A mutable Tensor. Has the same type as "ms".
  865. *@li mom: A mutable Tensor. Has the same type as "mom".
  866. *@par Third-party framework compatibility
  867. *Compatible with the TensorFlow operator ApplyCenteredRMSPropD.
  868. *
  869. */
  870. REG_OP(ApplyCenteredRMSPropD)
  871. .INPUT(var, TensorType::NumberType())
  872. .INPUT(mg, TensorType::NumberType())
  873. .INPUT(ms, TensorType::NumberType())
  874. .INPUT(mom, TensorType::NumberType())
  875. .INPUT(lr, TensorType::NumberType())
  876. .INPUT(rho, TensorType::NumberType())
  877. .INPUT(momentum, TensorType::NumberType())
  878. .INPUT(epsilon, TensorType::NumberType())
  879. .INPUT(grad, TensorType::NumberType())
  880. .OUTPUT(var, TensorType::NumberType())
  881. .OUTPUT(mg, TensorType::NumberType())
  882. .OUTPUT(ms, TensorType::NumberType())
  883. .OUTPUT(mom, TensorType::NumberType())
  884. .ATTR(use_locking, Bool, false)
  885. .OP_END_FACTORY_REG(ApplyCenteredRMSPropD)
  886. /**
  887. *@brief Updates "var" by subtracting 'alpha' * 'delta' from it.
  888. * var -= delta * alpha
  889. *
  890. *@attention Constraints:
  891. * the input tensors must have the same shape.
  892. *
  893. *@par Inputs:
  894. *@li var: A mutable tensor. Should be from a Variable().
  895. *@li alpha: A scalar. Has the same type as "var".
  896. *@li delta: A tensor for the change. Has the same type as "var".
  897. *
  898. *@par Attributes:
  899. * use_locking: An optional bool. Defaults to "False".
  900. * If "True", updating of the "var" tensors is protected
  901. * by a lock; otherwise the behavior is undefined, but may exhibit less
  902. * contention.
  903. *
  904. *@par Outputs:
  905. * var: A mutable tensor. Has the same type as input "var".
  906. *
  907. *@par Third-party framework compatibility
  908. *Compatible with the TensorFlow operator ApplyGradientDescent.
  909. *
  910. */
  911. REG_OP(ApplyGradientDescent)
  912. .INPUT(var, TensorType::NumberType())
  913. .INPUT(alpha, TensorType::NumberType())
  914. .INPUT(delta, TensorType::NumberType())
  915. .OUTPUT(var, TensorType::NumberType())
  916. .ATTR(use_locking, Bool, false)
  917. .OP_END_FACTORY_REG(ApplyGradientDescent)
  918. /**
  919. *@brief Updates "var" according to the adagrad scheme.
  920. * accum += grad * grad\n
  921. * var -= lr * grad * (1 / sqrt(accum))
  922. *
  923. *@attention Constraints:
  924. * the input tensors must have the same shape.
  925. *
  926. *@par Inputs:
  927. *@li var: A mutable tensor. Should be from a Variable().
  928. *@li accum: A mutable tensor. Has the same type as "var".
  929. * Should be from a Variable().
  930. *@li lr: A scalar. Has the same type as "var".
  931. *@li grad: A tensor for the gradient. Has the same type as "var".
  932. *
  933. *@par Attributes:
  934. *@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False".
  935. *@li use_locking: An optional bool. Defaults to "False".
  936. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  937. * by a lock; otherwise the behavior is undefined, but may exhibit less
  938. * contention.
  939. *
  940. *@par Outputs:
  941. * var: A mutable tensor. Has the same type as input "var".
  942. *
  943. *@par Third-party framework compatibility
  944. *Compatible with the TensorFlow operator ApplyAdagrad.
  945. *
  946. */
  947. REG_OP(ApplyAdagrad)
  948. .INPUT(var, TensorType::NumberType())
  949. .INPUT(accum, TensorType::NumberType())
  950. .INPUT(lr, TensorType::NumberType())
  951. .INPUT(grad, TensorType::NumberType())
  952. .OUTPUT(var, TensorType::NumberType())
  953. .ATTR(update_slots, Bool, true)
  954. .ATTR(use_locking, Bool, false)
  955. .OP_END_FACTORY_REG(ApplyAdagrad)
  956. /**
  957. *@brief Updates "var" according to the adagrad scheme.
  958. * accum += grad * grad\n
  959. * var -= lr * grad * (1 / sqrt(accum))
  960. *
  961. *@attention Constraints:
  962. * the input tensors must have the same shape.
  963. *
  964. *@par Inputs:
  965. *@li var: A mutable tensor. Should be from a Variable().
  966. *@li accum: A mutable tensor. Has the same type as "var".
  967. * Should be from a Variable().
  968. *@li lr: A scalar. Has the same type as "var".
  969. *@li grad: A tensor for the gradient. Has the same type as "var".
  970. *
  971. *@par Attributes:
  972. *@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False".
  973. *@li use_locking: An optional bool. Defaults to "False".
  974. * If "True", updating of the "var", "ms", and "mom" tensors is protected
  975. * by a lock; otherwise the behavior is undefined, but may exhibit less
  976. * contention.
  977. *
  978. *@par Outputs:
  979. *@li var: A mutable tensor. Has the same type as input "var".
  980. *@li accum: A mutable tensor. Has the same type as input "var".
  981. *
  982. *@par Third-party framework compatibility
  983. *Compatible with the TensorFlow operator ApplyAdagrad.
  984. *
  985. */
  986. REG_OP(ApplyAdagradD)
  987. .INPUT(var, TensorType::NumberType())
  988. .INPUT(accum, TensorType::NumberType())
  989. .INPUT(lr, TensorType::NumberType())
  990. .INPUT(grad, TensorType::NumberType())
  991. .OUTPUT(var, TensorType::NumberType())
  992. .OUTPUT(accum, TensorType::NumberType())
  993. .ATTR(update_slots, Bool, true)
  994. .ATTR(use_locking, Bool, false)
  995. .OP_END_FACTORY_REG(ApplyAdagradD)
  996. /**
  997. * @brief Updates "var" according to the adagradv2 scheme.
  998. * accum += grad * grad \n
  999. * var -= lr * grad * (1 / sqrt(accum) + epsilon)
  1000. *
  1001. * @par Inputs:
  1002. * @li var: A mutable tensor. Must be one of the data types defined in
  1003. * TensorType::NumberType(). Should be from a Variable().
  1004. * @li accum: A mutable tensor. Has the same type as "var". Should be from a
  1005. * Variable().
  1006. * @li lr: A tensor for the learning rate. Has the same type as "var". Should be
  1007. * from a Variable().
  1008. * @li grad: A tensor for the gradient. Has the same type as "var". Should be
  1009. * from a Variable().
  1010. * @li epsilon: A scalar. Has the same type as "var".
  1011. *
  1012. * @par Attributes:
  1013. * @li update_slots: An optional bool. Defaults to "True".
  1014. * If "True", "accum" will be updated
  1015. * @li use_locking: An optional bool. Defaults to "False".
  1016. * If "True", updating of the "var" tensor is protected by a lock;
  1017. * otherwise the behavior is undefined, but may exhibit less contention.
  1018. *
  1019. * @par Outputs:
  1020. * var: A mutable tensor. Has the same type as input "var".
  1021. *
  1022. * @attention Constraints:
  1023. * The input tensors must have the same shape.
  1024. *
  1025. * @par Third-party framework compatibility
  1026. * Compatible with the TensorFlow operator ApplyAdagrad.
  1027. *
  1028. */
  1029. REG_OP(ApplyAdagradV2)
  1030. .INPUT(var, TensorType::NumberType())
  1031. .INPUT(accum, TensorType::NumberType())
  1032. .INPUT(lr, TensorType::NumberType())
  1033. .INPUT(epsilon, TensorType::NumberType())
  1034. .INPUT(grad, TensorType::NumberType())
  1035. .OUTPUT(var, TensorType::NumberType())
  1036. .ATTR(update_slots, Bool, true)
  1037. .ATTR(use_locking, Bool, false)
  1038. .OP_END_FACTORY_REG(ApplyAdagradV2)
  1039. /**
  1040. * @brief Updates "var" according to the adagradv2 scheme.
  1041. * accum += grad * grad \n
  1042. * var -= lr * grad * (1 / sqrt(accum) + epsilon)
  1043. *
  1044. * @par Inputs:
  1045. * @li var: A mutable tensor. Must be one of the data types defined in
  1046. * TensorType::NumberType(). Should be from a Variable().
  1047. * @li accum: A mutable tensor. Has the same type as "var". Should be from a
  1048. * Variable().
  1049. * @li lr: A tensor for the learning rate. Has the same type as "var". Should be
  1050. * from a Variable().
  1051. * @li grad: A tensor for the gradient. Has the same type as "var". Should be
  1052. * from a Variable().
  1053. *
  1054. * @par Attributes:
  1055. * @li epsilon: A scalar. Has the same type as "var".
  1056. * @li update_slots: An optional bool. Defaults to "True".
  1057. * If "True", "accum" will be updated
  1058. * @li use_locking: An optional bool. Defaults to "False".
  1059. * If "True", updating of the "var" tensor is protected by a lock;
  1060. * otherwise the behavior is undefined, but may exhibit less contention.
  1061. *
  1062. * @par Outputs:
  1063. * var: A mutable tensor. Has the same type as input "var".
  1064. *
  1065. * @attention Constraints:
  1066. * The input tensors must have the same shape.
  1067. *
  1068. * @par Third-party framework compatibility
  1069. * Compatible with the TensorFlow operator ApplyAdagrad.
  1070. *
  1071. *@par Restrictions:
  1072. *Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdagradV2 instead.
  1073. */
  1074. REG_OP(ApplyAdagradV2D)
  1075. .INPUT(var, TensorType::NumberType())
  1076. .INPUT(accum, TensorType::NumberType())
  1077. .INPUT(lr, TensorType::NumberType())
  1078. .INPUT(grad, TensorType::NumberType())
  1079. .OUTPUT(var, TensorType::NumberType())
  1080. .OUTPUT(accum, TensorType::NumberType())
  1081. .REQUIRED_ATTR(epsilon, Float)
  1082. .ATTR(update_slots, Bool, true)
  1083. .ATTR(use_locking, Bool, false)
  1084. .OP_END_FACTORY_REG(ApplyAdagradV2D)
  1085. /**
  1086. *@brief Updates "var" according to the proximal adagrad scheme.
  1087. *@par Inputs:
  1088. *Eight inputs, including:
  1089. * @li var: A mutable Tensor. Must be one of the following types:
  1090. * TensorType::NumberType(). Should be a Variable Tensor.
  1091. * @li gradient_accumulator: A mutable Tensor. Must have the same
  1092. * type as "var". Should be a Variable Tensor.
  1093. * @li gradient_squared_accumulator: A mutable Tensor of the same type as "var".
  1094. * Should be a Variable Tensor.
  1095. * @li grad: A Tensor of the same type as "var", for the gradient.
  1096. * @li lr: A Tensor of the same type as "var".
  1097. * Scaling factor. Must be a scalar.
  1098. * @li l1: A Tensor of the same type as "var".
  1099. * L1 regulariation. Must be a scalar.
  1100. * @li l2: A Tensor of the same type as "var".
  1101. * L2 regulariation. Must be a scalar.
  1102. * @li global_step: A Tensor of type int32 or int64.
  1103. * Training step number. Must be a scalar.
  1104. *@par Attributes:
  1105. *use_locking: An optional bool. Defaults to "False".
  1106. * If "True", updating of the var and accum tensors will be
  1107. * protected by a lock; otherwise the behavior is undefined,
  1108. * but may exhibit less contention.
  1109. *@par Outputs:
  1110. *var: A mutable Tensor. Has the same type as "var".
  1111. *@par Third-party framework compatibility
  1112. *Compatible with the TensorFlow operator ApplyAdagradDA.
  1113. */
  1114. REG_OP(ApplyAdagradDA)
  1115. .INPUT(var, TensorType::NumberType())
  1116. .INPUT(gradient_accumulator, TensorType::NumberType())
  1117. .INPUT(gradient_squared_accumulator, TensorType::NumberType())
  1118. .INPUT(grad, TensorType::NumberType())
  1119. .INPUT(lr, TensorType::NumberType())
  1120. .INPUT(l1, TensorType::NumberType())
  1121. .INPUT(l2, TensorType::NumberType())
  1122. .INPUT(global_step, TensorType({DT_INT32, DT_INT64}))
  1123. .OUTPUT(var, TensorType::NumberType())
  1124. .ATTR(use_locking, Bool, false)
  1125. .OP_END_FACTORY_REG(ApplyAdagradDA)
  1126. /**
  1127. *@brief Updates "var" according to the proximal adagrad scheme.
  1128. *@par Inputs:
  1129. *Eight inputs, including:
  1130. * @li var: A mutable Tensor. Must be one of the following types:
  1131. * TensorType::NumberType(). Should be a Variable Tensor.
  1132. * @li gradient_accumulator: A mutable Tensor. Must have the same
  1133. * type as "var". Should be a Variable Tensor.
  1134. * @li gradient_squared_accumulator: A mutable Tensor of the same type as "var".
  1135. * Should be a Variable Tensor.
  1136. * @li grad: A Tensor of the same type as "var", for the gradient.
  1137. * @li lr: A Tensor of the same type as "var".
  1138. * Scaling factor. Must be a scalar.
  1139. * @li l1: A Tensor of the same type as "var".
  1140. * L1 regulariation. Must be a scalar.
  1141. * @li l2: A Tensor of the same type as "var".
  1142. * L2 regulariation. Must be a scalar.
  1143. * @li global_step: A Tensor of type int32 or int64.
  1144. * Training step number. Must be a scalar.
  1145. *@par Attributes:
  1146. *use_locking: An optional bool. Defaults to "False".
  1147. * If "True", updating of the var and accum tensors will be
  1148. * protected by a lock; otherwise the behavior is undefined,
  1149. * but may exhibit less contention.
  1150. *@par Outputs:
  1151. *var: A mutable Tensor. Has the same type as "var".
  1152. *gradient_accumulator: A mutable Tensor. Has the same type as "var".
  1153. *gradient_squared_accumulator: A mutable Tensor. Has the same type as "var".
  1154. *@par Third-party framework compatibility
  1155. *Compatible with the TensorFlow operator ApplyAdagradDA.
  1156. */
  1157. REG_OP(ApplyAdagradDAD)
  1158. .INPUT(var, TensorType::NumberType())
  1159. .INPUT(gradient_accumulator, TensorType::NumberType())
  1160. .INPUT(gradient_squared_accumulator, TensorType::NumberType())
  1161. .INPUT(grad, TensorType::NumberType())
  1162. .INPUT(lr, TensorType::NumberType())
  1163. .INPUT(l1, TensorType::NumberType())
  1164. .INPUT(l2, TensorType::NumberType())
  1165. .INPUT(global_step, TensorType({DT_INT32, DT_INT64}))
  1166. .OUTPUT(var, TensorType::NumberType())
  1167. .OUTPUT(gradient_accumulator, TensorType::NumberType())
  1168. .OUTPUT(gradient_squared_accumulator, TensorType::NumberType())
  1169. .ATTR(use_locking, Bool, false)
  1170. .OP_END_FACTORY_REG(ApplyAdagradDAD)
  1171. /**
  1172. *@brief Returns the dimension index in the destination data format given the one in
  1173. * the source data format.
  1174. *
  1175. *@par Inputs:
  1176. * x: A tensor of type int32 or int64.
  1177. * A Tensor with each element as a dimension index in source data format.
  1178. * Must be in the range [-4, 4).
  1179. *
  1180. *@par Attributes:
  1181. *@li src_format: An optional string. Defaults to NHWC.
  1182. * source data format. Must of length 4.
  1183. *@li dst_format: An optional string. Defaults to NCHW.
  1184. * destination data format. Must of length 4.
  1185. *
  1186. *@par Outputs:
  1187. * y: A tensor. Has the same type as "x". Must be in the range [0, 4).
  1188. *
  1189. *@par Third-party framework compatibility
  1190. *Compatible with the TensorFlow operator DataFormatDimMap.
  1191. *
  1192. */
  1193. REG_OP(DataFormatDimMap)
  1194. .INPUT(x, TensorType::IndexNumberType())
  1195. .ATTR(src_format, String, "NHWC")
  1196. .ATTR(dst_format, String, "NCHW")
  1197. .OUTPUT(y, TensorType::IndexNumberType())
  1198. .OP_END_FACTORY_REG(DataFormatDimMap)
  1199. /**
  1200. * @brief Implements stochastic gradient descent (optionally with momentum).
  1201. * Nesterov momentum is based on the formula from
  1202. * On the importance of initialization and momentum in deep learning.\n
  1203. * @par Inputs:
  1204. * @li parameters: A mutable tensor of type float16 or float32.\n
  1205. * Specifies the iterable of parameters to optimize or dicts defining parameter
  1206. * groups.
  1207. * @li gradient: A tensor of type float16 or float32.\n
  1208. * Specifies the gradient of training step.
  1209. * @li learning_rate: A tensor of type float16 or float32.\n
  1210. * Specifies the learing_rate of training step.
  1211. * @li accum: A tensor of type float16 or float32.
  1212. * Specifies the velocity of training step.
  1213. * @li momentum: A tensor of type float16 or float32.
  1214. * Specifies the momentum factor.
  1215. * @li stat: A tensor of type float16 or float32.
  1216. * Specifies the status representing the first step or not.
  1217. * @par Attributes:
  1218. * @li dampening: An optional float, specifying the dampening for momentum.
  1219. * Defaults to "0.0".
  1220. * @li weight_decay: An optional float, specifying the L2 penalty. Defaults to
  1221. * "0.0".
  1222. * @li nesterov: An optional bool, specifying whether to enable Nesterov
  1223. * momentum. Defaults to "False".
  1224. * @par Outputs:
  1225. * parameters: A mutable tensor same as input "parameters".
  1226. * @see ApplyMomentum()
  1227. * @par Third-party framework compatibility
  1228. * @li Compatible with the PyTorch operator SGD.
  1229. */
  1230. REG_OP(SGD)
  1231. .INPUT(parameters, TensorType(DT_FLOAT, DT_FLOAT16))
  1232. .INPUT(gradient, TensorType(DT_FLOAT, DT_FLOAT16))
  1233. .INPUT(learning_rate, TensorType(DT_FLOAT, DT_FLOAT16))
  1234. .INPUT(accum, TensorType(DT_FLOAT, DT_FLOAT16))
  1235. .INPUT(momentum, TensorType(DT_FLOAT, DT_FLOAT16))
  1236. .INPUT(stat, TensorType(DT_FLOAT, DT_FLOAT16))
  1237. .OUTPUT(parameters, TensorType(DT_FLOAT, DT_FLOAT16))
  1238. .ATTR(dampening, Float, 0.0)
  1239. .ATTR(weight_decay, Float, 0.0)
  1240. .ATTR(nesterov, Bool, false)
  1241. .OP_END_FACTORY_REG(SGD)
  1242. /**
  1243. * @brief Updates "var" according to the RMSProp algorithm.
  1244. * mean_square = decay * mean_square + (1-decay) * gradient ** 2\n
  1245. * Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n
  1246. * ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n
  1247. * mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n
  1248. * var <- var - mom\n
  1249. *
  1250. * @par Inputs:
  1251. * @li var: A mutable tensor. Must be one of the data types defined in
  1252. * TensorType::NumberType(). Should be from a Variable().
  1253. * @li ms: A mutable tensor. Must have the same type as "var". Should be from a
  1254. * Variable().
  1255. * @li mom: A mutable tensor. Must have the same type as "var". Should be from a
  1256. * Variable().
  1257. * @li lr: A scalar. Must have the same type as "var".
  1258. * @li rho: A scalar. Must have the same type as "var".
  1259. * @li momentum: A scalar. Must have the same type as "var".
  1260. * @li epsilon: A scalar. Must have the same type as "var".
  1261. * @li grad: A tensor, specifying the gradient. Must have the same type as "var".
  1262. *
  1263. * @par Attributes:
  1264. * use_locking: An optional "bool". Defaults to "False". If "True", updating of
  1265. * the "var", "ms", and "mom" tensors will be protected by a lock; otherwise the
  1266. * behavior is undefined, but may exhibit less contention.
  1267. *
  1268. * @par Outputs:
  1269. * var: A mutable tensor. Has the same type as input "var".
  1270. *
  1271. * @attention Constraints:
  1272. * @li Note that in dense implementation of this algorithm, "ms" and "mom" will
  1273. * update even if "grad" is 0, but in this sparse implementation, "ms" and "mom"
  1274. * will not update in iterations during which "grad" is 0.
  1275. * @li The input tensors "var", "ms", "mom" and "grad" must have the same shape.
  1276. *
  1277. * @par Third-party framework compatibility
  1278. * @li Compatible with the TensorFlow operator ApplyRMSProp.
  1279. */
  1280. REG_OP(ApplyRMSProp)
  1281. .INPUT(var, TensorType::NumberType())
  1282. .INPUT(ms, TensorType::NumberType())
  1283. .INPUT(mom, TensorType::NumberType())
  1284. .INPUT(lr, TensorType::NumberType())
  1285. .INPUT(rho, TensorType::NumberType())
  1286. .INPUT(momentum, TensorType::NumberType())
  1287. .INPUT(epsilon, TensorType::NumberType())
  1288. .INPUT(grad, TensorType::NumberType())
  1289. .OUTPUT(var, TensorType::NumberType())
  1290. .ATTR(use_locking, Bool, false)
  1291. .OP_END_FACTORY_REG(ApplyRMSProp)
  1292. /**
  1293. * @brief Updates "var" according to the RMSProp algorithm, a const input will be
  1294. * considered as an attribute.
  1295. * mean_square = decay * mean_square + (1-decay) * gradient ** 2\n
  1296. * Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n
  1297. * ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n
  1298. * mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n
  1299. * var <- var - mom
  1300. *
  1301. * @par Inputs:
  1302. * @li var: A mutable tensor. Must be one of the data types defined in
  1303. * TensorType::NumberType(). Should be from a Variable().
  1304. * @li ms: A mutable tensor. Must have the same type as "var". Should be from a
  1305. * Variable().
  1306. * @li mom: A mutable tensor. Must have the same type as "var". Should be from a
  1307. * Variable().
  1308. * @li lr: A scalar. Must have the same type as "var".
  1309. * @li grad: A tensor, specifying the gradient. Must have the same type as "var".
  1310. *
  1311. * @par Attributes:
  1312. * @li use_locking: An optional "bool". Defaults to "False". If "True", updating
  1313. * of the "var", "ms", and "mom" tensors will be protected by a lock;
  1314. * otherwise the behavior is undefined, but may exhibit less contention.
  1315. * @li rho: A required scalar. Must have the same type as "var".
  1316. * @li momentum: A required scalar. Must have the same type as "var".
  1317. * @li epsilon: A required scalar. Must have the same type as "var".
  1318. *
  1319. * @par Outputs:
  1320. * var: A mutable tensor. Must have the same type as input "var".
  1321. *
  1322. * @attention Constraints:
  1323. * @li Note that in dense implementation of this algorithm, "ms" and "mom" will
  1324. * update even if "grad" is 0, but in this sparse implementation, "ms" and "mom"
  1325. * will not update in iterations during which "grad" is 0.
  1326. * @li The input tensors "var", "ms", "mom" and "grad" must have the same shape.
  1327. *
  1328. * @par Third-party framework compatibility
  1329. * @li Compatible with the TensorFlow operator ApplyRMSProp.
  1330. *
  1331. *@par Restrictions:
  1332. *Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyRMSProp instead.
  1333. */
  1334. REG_OP(ApplyRMSPropD)
  1335. .INPUT(var, TensorType::NumberType())
  1336. .INPUT(ms, TensorType::NumberType())
  1337. .INPUT(mom, TensorType::NumberType())
  1338. .INPUT(lr, TensorType::NumberType())
  1339. .INPUT(grad, TensorType::NumberType())
  1340. .OUTPUT(var, TensorType::NumberType())
  1341. .OUTPUT(ms, TensorType::NumberType())
  1342. .OUTPUT(mom, TensorType::NumberType())
  1343. .REQUIRED_ATTR(rho, Float)
  1344. .REQUIRED_ATTR(momentum, Float)
  1345. .REQUIRED_ATTR(epsilon, Float)
  1346. .ATTR(use_locking, Bool, false)
  1347. .OP_END_FACTORY_REG(ApplyRMSPropD)
  1348. /**
  1349. *@brief Update "var" and "accum" according to FOBOS with Adagrad learning rate.
  1350. *@par Inputs:
  1351. *Six inputs, including:
  1352. * @li var: A mutable Tensor of type TensorType::NumberType().
  1353. * Should be from a Variable().
  1354. * @li accum: A mutable Tensor of the same type as "var". Should be from a Variable().
  1355. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1356. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1357. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1358. * @li grad: A Tensor of the same type as "var", for the gradient.
  1359. *@par Attributes:
  1360. *use_locking: An optional bool. Defaults to "False". If "True", updating of the "var" and "accum" *tensors will be protected by a lock; otherwise the behavior is undefined, but may exhibit less *contention.
  1361. *@par Outputs:
  1362. *var: A mutable tensor. Must have the same type as input "var".
  1363. *@par Third-party framework compatibility
  1364. *Compatible with the TensorFlow operator ApplyProximalAdagrad.
  1365. */
  1366. REG_OP(ApplyProximalAdagrad)
  1367. .INPUT(var, TensorType::NumberType())
  1368. .INPUT(accum, TensorType::NumberType())
  1369. .INPUT(lr, TensorType::NumberType())
  1370. .INPUT(l1, TensorType::NumberType())
  1371. .INPUT(l2, TensorType::NumberType())
  1372. .INPUT(grad, TensorType::NumberType())
  1373. .OUTPUT(var, TensorType::NumberType())
  1374. .ATTR(use_locking, Bool, false)
  1375. .OP_END_FACTORY_REG(ApplyProximalAdagrad)
  1376. /**
  1377. *@brief Update "var" and "accum" according to FOBOS with Adagrad learning rate.
  1378. *@par Inputs:
  1379. *Six inputs, including:
  1380. * @li var: A mutable Tensor of type TensorType::NumberType().
  1381. * Should be from a Variable().
  1382. * @li accum: A mutable Tensor of the same type as "var". Should be from a Variable().
  1383. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1384. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1385. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1386. * @li grad: A Tensor of the same type as "var", for the gradient.
  1387. *@par Attributes:
  1388. *use_locking: An optional bool. Defaults to "False". If "True", updating of the "var" and "accum" *tensors will be protected by a lock; otherwise the behavior is undefined, but may exhibit less *contention.
  1389. *@par Outputs:
  1390. * @li var: A mutable Tensor. Has the same type as "var".
  1391. * @li accum: A mutable Tensor. Has the same type as "var".
  1392. *@par Third-party framework compatibility
  1393. *Compatible with the TensorFlow operator ApplyProximalAdagradD.
  1394. */
  1395. REG_OP(ApplyProximalAdagradD)
  1396. .INPUT(var, TensorType::NumberType())
  1397. .INPUT(accum, TensorType::NumberType())
  1398. .INPUT(lr, TensorType::NumberType())
  1399. .INPUT(l1, TensorType::NumberType())
  1400. .INPUT(l2, TensorType::NumberType())
  1401. .INPUT(grad, TensorType::NumberType())
  1402. .OUTPUT(var, TensorType::NumberType())
  1403. .OUTPUT(accum, TensorType::NumberType())
  1404. .ATTR(use_locking, Bool, false)
  1405. .OP_END_FACTORY_REG(ApplyProximalAdagradD)
  1406. /**
  1407. *@brief Updates entries in 'var' and 'accum' according to the Proximal Adagrad algorithm.
  1408. * Compared with op ApplyProximalAdagrad, an additional index tensor is input,
  1409. * Only the indices into the first dimensions of "var" and "accum" are updated.
  1410. *@par Inputs:
  1411. * Seven inputs, including:\n
  1412. * @li var: A mutable Tensor.\n
  1413. * TensorType::NumberType(). Should be a Variable Tensor.
  1414. * @li accum: A mutable Tensor of the same type as "var".\n
  1415. * Should be a Variable Tensor. Should be greater than or equal to zero.\n
  1416. * Accum and grad cannot be equal to zero at the same time.
  1417. * @li lr: A Tensor of the same type as "var".\n
  1418. * Scaling factor. Must be a scalar. Should be greater than zero.
  1419. * @li l1: A Tensor of the same type as "var".\n
  1420. * L1 regulariation. Must be a scalar. Should be greater than or equal to zero.
  1421. * @li l2: A Tensor of the same type as "var".\n
  1422. * L2 regulariation. Must be a scalar. Should be greater than or equal to zero.
  1423. * @li grad: A Tensor. Has the same type as "var".\n
  1424. * The gradient.
  1425. * @li indices: A vector of indices into the first dimension of "var" and "accum".\n
  1426. * TensorType::IndexNumberType(). Can contain duplicate values.
  1427. *@par Attributes:
  1428. *use_locking: An optional bool. Defaults to "False".\n
  1429. * If "True", updating of the var and accum tensors will be protected by a lock; \n
  1430. * If "False", the behavior is undefined, but may exhibit less contention.
  1431. *@par Outputs:
  1432. *var: A mutable Tensor. Has the same type as "var".
  1433. *@par Third-party framework compatibility
  1434. *Compatible with the TensorFlow operator SparseApplyProximalAdagrad.
  1435. */
  1436. REG_OP(SparseApplyProximalAdagrad)
  1437. .INPUT(var, TensorType::NumberType())
  1438. .INPUT(accum, TensorType::NumberType())
  1439. .INPUT(lr, TensorType::NumberType())
  1440. .INPUT(l1, TensorType::NumberType())
  1441. .INPUT(l2, TensorType::NumberType())
  1442. .INPUT(grad, TensorType::NumberType())
  1443. .INPUT(indices, TensorType::IndexNumberType())
  1444. .OUTPUT(var, TensorType::NumberType())
  1445. .ATTR(use_locking, Bool, false)
  1446. .OP_END_FACTORY_REG(SparseApplyProximalAdagrad)
  1447. /**
  1448. *@brief Updates entries in 'var' and 'accum' according to the Proximal Adagrad algorithm.\ n
  1449. * Compared with op ApplyProximalAdagrad, an additional index tensor is input,
  1450. * Only the indices into the first dimensions of "var" and "accum" are updated.
  1451. *@par Inputs:
  1452. * Seven inputs, including:\n
  1453. * @li var: A mutable Tensor.\n
  1454. * TensorType::NumberType(). Should be a Variable Tensor.
  1455. * @li accum: A mutable Tensor of the same type as "var".\n
  1456. * Should be a Variable Tensor. Should be greater than or equal to zero.\n
  1457. * Accum and grad cannot be equal to zero at the same time.
  1458. * @li lr: A Tensor of the same type as "var".\n
  1459. * Scaling factor. Must be a scalar. Should be greater than zero.
  1460. * @li l1: A Tensor of the same type as "var".\n
  1461. * L1 regulariation. Must be a scalar. Should be greater than or equal to zero.
  1462. * @li l2: A Tensor of the same type as "var".\n
  1463. * L2 regulariation. Must be a scalar. Should be greater than or equal to zero.
  1464. * @li grad: A Tensor. Has the same type as "var". \n
  1465. * The gradient.
  1466. * @li indices: A vector of indices into the first dimension of "var" and "accum".\n
  1467. * TensorType::IndexNumberType(). Can contain duplicate values.
  1468. *@par Attributes:
  1469. *use_locking: An optional bool. Defaults to "False".\n
  1470. * If "True", updating of the var and accum tensors will be protected by a lock; \n
  1471. * If "False", the behavior is undefined, but may exhibit less contention.
  1472. *@par Outputs:
  1473. *@li var: A mutable Tensor. Has the same type as "var".
  1474. *@li accum: A mutable Tensor. Has the same type as "var".
  1475. *@par Third-party framework compatibility
  1476. *Compatible with the TensorFlow operator SparseApplyProximalAdagrad.
  1477. */
  1478. REG_OP(SparseApplyProximalAdagradD)
  1479. .INPUT(var, TensorType::NumberType())
  1480. .INPUT(accum, TensorType::NumberType())
  1481. .INPUT(lr, TensorType::NumberType())
  1482. .INPUT(l1, TensorType::NumberType())
  1483. .INPUT(l2, TensorType::NumberType())
  1484. .INPUT(grad, TensorType::NumberType())
  1485. .INPUT(indices, TensorType::IndexNumberType())
  1486. .OUTPUT(var, TensorType::NumberType())
  1487. .OUTPUT(accum, TensorType::NumberType())
  1488. .ATTR(use_locking, Bool, false)
  1489. .OP_END_FACTORY_REG(SparseApplyProximalAdagradD)
  1490. /**
  1491. *@brief Updates "var" according to the Ftrl-proximal scheme.
  1492. *@par Inputs:
  1493. *Eight inputs, including:
  1494. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1495. * Should be a Variable Tensor.
  1496. * @li accum: A mutable Tensor of the same type as "var".
  1497. * Should be a Variable Tensor.
  1498. * @li linear: A mutable Tensor of the same type as "var".
  1499. * Should be a Variable Tensor.
  1500. * @li grad: A Tensor of the same type as "var", for the gradient.
  1501. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1502. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1503. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1504. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1505. *@par Attributes:
  1506. *use_locking: An optional bool. Defaults to "False".
  1507. * If "True", updating of the "var" and "accum" tensors will be
  1508. * protected by a lock; otherwise the behavior is undefined,
  1509. * but may exhibit less contention.
  1510. *@par Outputs:
  1511. *var: A mutable Tensor. Has the same type as "var".
  1512. *@par Third-party framework compatibility
  1513. *Compatible with the TensorFlow operator ApplyFtrl.
  1514. */
  1515. REG_OP(ApplyFtrl)
  1516. .INPUT(var, TensorType::NumberType())
  1517. .INPUT(accum, TensorType::NumberType())
  1518. .INPUT(linear, TensorType::NumberType())
  1519. .INPUT(grad, TensorType::NumberType())
  1520. .INPUT(lr, TensorType::NumberType())
  1521. .INPUT(l1, TensorType::NumberType())
  1522. .INPUT(l2, TensorType::NumberType())
  1523. .INPUT(lr_power, TensorType::NumberType())
  1524. .OUTPUT(var, TensorType::NumberType())
  1525. .ATTR(use_locking, Bool, false)
  1526. .OP_END_FACTORY_REG(ApplyFtrl)
  1527. /**
  1528. *@brief Updates "var" according to the Ftrl-proximal scheme.
  1529. *@par Inputs:
  1530. *Eight inputs, including:
  1531. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1532. * Should be a Variable Tensor.
  1533. * @li accum: A mutable Tensor of the same type as "var".
  1534. * Should be a Variable Tensor.
  1535. * @li linear: A mutable Tensor of the same type as "var".
  1536. * Should be a Variable Tensor.
  1537. * @li grad: A Tensor of the same type as "var", for the gradient.
  1538. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1539. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1540. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1541. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1542. *@par Attributes:
  1543. *use_locking: An optional bool. Defaults to "False".
  1544. * If "True", updating of the "var" and "accum" tensors will be
  1545. * protected by a lock; otherwise the behavior is undefined,
  1546. * but may exhibit less contention.
  1547. *@par Outputs:
  1548. *@li var: A mutable Tensor. Has the same type as "var".
  1549. *@li accum: A mutable Tensor. Has the same type as "accum".
  1550. *@li linear: A mutable Tensor. Has the same type as "linear".
  1551. *@par Third-party framework compatibility
  1552. *Compatible with the TensorFlow operator ApplyFtrl.
  1553. */
  1554. REG_OP(ApplyFtrlD)
  1555. .INPUT(var, TensorType::NumberType())
  1556. .INPUT(accum, TensorType::NumberType())
  1557. .INPUT(linear, TensorType::NumberType())
  1558. .INPUT(grad, TensorType::NumberType())
  1559. .INPUT(lr, TensorType::NumberType())
  1560. .INPUT(l1, TensorType::NumberType())
  1561. .INPUT(l2, TensorType::NumberType())
  1562. .INPUT(lr_power, TensorType::NumberType())
  1563. .OUTPUT(var, TensorType::NumberType())
  1564. .OUTPUT(accum, TensorType::NumberType())
  1565. .OUTPUT(linear, TensorType::NumberType())
  1566. .ATTR(use_locking, Bool, false)
  1567. .OP_END_FACTORY_REG(ApplyFtrlD)
  1568. /**
  1569. *@brief Update "var" according to the Ftrl-proximal scheme.
  1570. *@par Inputs:
  1571. *Nine inputs, including:
  1572. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1573. * Should be a Variable Tensor.
  1574. * @li accum: A mutable Tensor of the same type as "var".
  1575. * Should be a Variable Tensor.
  1576. * @li linear: A mutable Tensor of the same type as "var".
  1577. * Should be a Variable Tensor.
  1578. * @li grad: A Tensor of the same type as "var", for the gradient.
  1579. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1580. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1581. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1582. * @li l2_shrinkage: A Tensor of the same type as "var".
  1583. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1584. *@par Attributes:
  1585. *use_locking: An optional bool. Defaults to "False".
  1586. * If "True", updating of the "var" and "accum" tensors will be
  1587. * protected by a lock; otherwise the behavior is undefined,
  1588. * but may exhibit less contention.
  1589. *@par Outputs:
  1590. *var: A mutable Tensor. Has the same type as "var".
  1591. *@par Third-party framework compatibility
  1592. *Compatible with the TensorFlow operator ApplyFtrlV2.
  1593. */
  1594. REG_OP(ApplyFtrlV2)
  1595. .INPUT(var, TensorType::NumberType())
  1596. .INPUT(accum, TensorType::NumberType())
  1597. .INPUT(linear, TensorType::NumberType())
  1598. .INPUT(grad, TensorType::NumberType())
  1599. .INPUT(lr, TensorType::NumberType())
  1600. .INPUT(l1, TensorType::NumberType())
  1601. .INPUT(l2, TensorType::NumberType())
  1602. .INPUT(l2_shrinkage, TensorType::NumberType())
  1603. .INPUT(lr_power, TensorType::NumberType())
  1604. .OUTPUT(var, TensorType::NumberType())
  1605. .ATTR(use_locking, Bool, false)
  1606. .OP_END_FACTORY_REG(ApplyFtrlV2)
  1607. /**
  1608. *@brief Update "var" according to the Ftrl-proximal scheme.
  1609. *@par Inputs:
  1610. *Nine inputs, including:
  1611. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1612. * Should be a Variable Tensor.
  1613. * @li accum: A mutable Tensor of the same type as "var".
  1614. * Should be a Variable Tensor.
  1615. * @li linear: A mutable Tensor of the same type as "var".
  1616. * Should be a Variable Tensor.
  1617. * @li grad: A Tensor of the same type as "var", for the gradient.
  1618. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1619. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  1620. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  1621. * @li l2_shrinkage: A Tensor of the same type as "var".
  1622. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  1623. *@par Attributes:
  1624. *use_locking: An optional bool. Defaults to "False".
  1625. * If "True", updating of the "var" and "accum" tensors will be
  1626. * protected by a lock; otherwise the behavior is undefined,
  1627. * but may exhibit less contention.
  1628. *@par Outputs:
  1629. *var: A mutable Tensor. Has the same type as "var".
  1630. *accum: A mutable Tensor. Has the same type as "accum".
  1631. *linear: A mutable Tensor. Has the same type as "linear".
  1632. *@par Third-party framework compatibility
  1633. *Compatible with the TensorFlow operator ApplyFtrlV2.
  1634. */
  1635. REG_OP(ApplyFtrlV2D)
  1636. .INPUT(var, TensorType::NumberType())
  1637. .INPUT(accum, TensorType::NumberType())
  1638. .INPUT(linear, TensorType::NumberType())
  1639. .INPUT(grad, TensorType::NumberType())
  1640. .INPUT(lr, TensorType::NumberType())
  1641. .INPUT(l1, TensorType::NumberType())
  1642. .INPUT(l2, TensorType::NumberType())
  1643. .INPUT(l2_shrinkage, TensorType::NumberType())
  1644. .INPUT(lr_power, TensorType::NumberType())
  1645. .OUTPUT(var, TensorType::NumberType())
  1646. .OUTPUT(accum, TensorType::NumberType())
  1647. .OUTPUT(linear, TensorType::NumberType())
  1648. .ATTR(use_locking, Bool, false)
  1649. .OP_END_FACTORY_REG(ApplyFtrlV2D)
  1650. /**
  1651. *@brief Updates "var" according to the Adam algorithm.
  1652. * lr_t <- text{learning\_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)\n
  1653. * m_t <- beta_1 * m_{t-1} + (1 - beta_1) * g\n
  1654. * v_t <- max(beta2 * v{t-1}, abs(g))\n
  1655. * variable <- variable - lr_t * m_t / (sqrt{v_t} + epsilon)
  1656. *
  1657. *@attention Constraints:
  1658. * *The input tensors must have the same shape.*
  1659. *
  1660. *@par Inputs:
  1661. *@li var: A mutable Tensor of the type TensorType::NumberType().
  1662. * Should be from a Variable().
  1663. *@li m: A mutable Tensor of the same type as "var".
  1664. * Should be from a Variable().
  1665. *@li v: A mutable Tensor of the same type as "var".
  1666. * Should be from a Variable().
  1667. *@li beta1_power: A scalar of the same type as "var".
  1668. *@li beta2_power: A scalar of the same type as "var".
  1669. *@li lr: learning_rate. A scalar of the same type as "var".
  1670. *@li beta1: A scalar of the same type as "var".
  1671. *@li beta2: A scalar of the same type as "var".
  1672. *@li epsilon: A scalar of the same type as "var".
  1673. *@li grad: A Tensor of the same type as "var", for the gradient.
  1674. *
  1675. *@par Attributes:
  1676. *@li use_locking: An optional bool. Defaults to "False".
  1677. * If "True", updating of the "var", m", and "v" tensors will be protected
  1678. * by a lock; otherwise the behavior is undefined, but may exhibit less
  1679. * contention.
  1680. *@li use_nesterov: An optional bool. Defaults to "False".
  1681. If "True", uses the nesterov update.
  1682. *
  1683. *@par Outputs:
  1684. * var: A mutable Tensor. Has the same type as intput "var".
  1685. *@par Third-party framework compatibility
  1686. *Compatible with the TensorFlow operator ApplyAdam.
  1687. */
  1688. REG_OP(ApplyAdam)
  1689. .INPUT(var, TensorType::NumberType())
  1690. .INPUT(m, TensorType::NumberType())
  1691. .INPUT(v, TensorType::NumberType())
  1692. .INPUT(beta1_power, TensorType::NumberType())
  1693. .INPUT(beta2_power, TensorType::NumberType())
  1694. .INPUT(lr, TensorType::NumberType())
  1695. .INPUT(beta1, TensorType::NumberType())
  1696. .INPUT(beta2, TensorType::NumberType())
  1697. .INPUT(epsilon, TensorType::NumberType())
  1698. .INPUT(grad, TensorType::NumberType())
  1699. .OUTPUT(var, TensorType::NumberType())
  1700. .ATTR(use_locking, Bool, false)
  1701. .ATTR(use_nesterov, Bool, false)
  1702. .OP_END_FACTORY_REG(ApplyAdam)
  1703. /**
  1704. *@brief Updates "var" according to the Adam algorithm.
  1705. * lr_t <- text{learning\_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)\n
  1706. * m_t <- beta_1 * m_{t-1} + (1 - beta_1) * g\n
  1707. * v_t <- max(beta2 * v{t-1}, abs(g))\n
  1708. * variable <- variable - lr_t * m_t / (sqrt{v_t} + epsilon)
  1709. *
  1710. *@attention Constraints:
  1711. * *The input tensors must have the same shape.*
  1712. *
  1713. *@par Inputs:
  1714. *@li var: A mutable Tensor of the type TensorType::NumberType().
  1715. * Should be from a Variable().
  1716. *@li m: A mutable Tensor of the same type as "var".
  1717. * Should be from a Variable().
  1718. *@li v: A mutable Tensor of the same type as "var".
  1719. * Should be from a Variable().
  1720. *@li beta1_power: A scalar of the same type as "var".
  1721. *@li beta2_power: A scalar of the same type as "var".
  1722. *@li lr: learning_rate. A scalar of the same type as "var".
  1723. *@li beta1: A scalar of the same type as "var".
  1724. *@li beta2: A scalar of the same type as "var".
  1725. *@li epsilon: A scalar of the same type as "var".
  1726. *@li grad: A Tensor of the same type as "var", for the gradient.
  1727. *
  1728. *@par Attributes:
  1729. *@li use_locking: An optional bool. Defaults to "False".
  1730. * If "True", updating of the "var", m", and "v" tensors will be protected
  1731. * by a lock; otherwise the behavior is undefined, but may exhibit less
  1732. * contention.
  1733. *@li use_nesterov: An optional bool. Defaults to "False".
  1734. If "True", uses the nesterov update.
  1735. *
  1736. *@par Outputs:
  1737. *@li var: A mutable tensor. Has the same type as input "var".
  1738. *@li m: A mutable tensor. Has the same type as input "m".
  1739. *@li v: A mutable tensor. Has the same type as input "v".
  1740. *@par Third-party framework compatibility
  1741. *Compatible with the TensorFlow operator ApplyAdam.
  1742. */
  1743. REG_OP(ApplyAdamD)
  1744. .INPUT(var, TensorType::NumberType())
  1745. .INPUT(m, TensorType::NumberType())
  1746. .INPUT(v, TensorType::NumberType())
  1747. .INPUT(beta1_power, TensorType::NumberType())
  1748. .INPUT(beta2_power, TensorType::NumberType())
  1749. .INPUT(lr, TensorType::NumberType())
  1750. .INPUT(beta1, TensorType::NumberType())
  1751. .INPUT(beta2, TensorType::NumberType())
  1752. .INPUT(epsilon, TensorType::NumberType())
  1753. .INPUT(grad, TensorType::NumberType())
  1754. .OUTPUT(var, TensorType::NumberType())
  1755. .OUTPUT(m, TensorType::NumberType())
  1756. .OUTPUT(v, TensorType::NumberType())
  1757. .ATTR(use_locking, Bool, false)
  1758. .ATTR(use_nesterov, Bool, false)
  1759. .OP_END_FACTORY_REG(ApplyAdamD)
  1760. /**
  1761. *@brief Updates "var" according to the proximal adadelta scheme.
  1762. *@par Inputs:
  1763. *Seven inputs, including:
  1764. * @li var: A mutable Tensor of type TensorType::NumberType().
  1765. * Should be a Variable Tensor.
  1766. * @li accum: A mutable Tensor of the same type as "var".
  1767. * Should be a Variable Tensor.
  1768. * @li accum_update: A mutable Tensor of the same type as "var".
  1769. * Should be a Variable Tensor.
  1770. * @li lr: A scalar of the same type as "var", for the scaling factor.
  1771. * @li rho: A scalar of the same type as "var", for the decay factor.
  1772. * @li epsilon: A scalar of the same type as "var", for the constant factor.
  1773. * @li grad: A Tensor of the same type as "var", for the gradient.
  1774. *@par Attributes:
  1775. *use_locking: An optional bool. Defaults to "False".
  1776. * If "True", updating of the "var", "accum" and "accum_update" tensors will be
  1777. * protected by a lock; otherwise the behavior is undefined,
  1778. * but may exhibit less contention.
  1779. *@par Outputs:
  1780. *var: A mutable Tensor. Has the same type as "var".
  1781. *@par Third-party framework compatibility
  1782. * Compatible with the TensorFlow operator ApplyAdadelta.
  1783. */
  1784. REG_OP(ApplyAdadelta)
  1785. .INPUT(var, TensorType::NumberType())
  1786. .INPUT(accum, TensorType::NumberType())
  1787. .INPUT(accum_update, TensorType::NumberType())
  1788. .INPUT(lr, TensorType::NumberType())
  1789. .INPUT(rho, TensorType::NumberType())
  1790. .INPUT(epsilon, TensorType::NumberType())
  1791. .INPUT(grad, TensorType::NumberType())
  1792. .OUTPUT(var, TensorType::NumberType())
  1793. .ATTR(use_locking, Bool, false)
  1794. .OP_END_FACTORY_REG(ApplyAdadelta)
  1795. /**
  1796. *@brief Updates "var" according to the proximal adadelta scheme.
  1797. *@par Inputs:
  1798. *Seven inputs, including:
  1799. * @li var: A mutable Tensor of type TensorType::NumberType().
  1800. * Should be a Variable Tensor.
  1801. * @li accum: A mutable Tensor of the same type as "var".
  1802. * Should be a Variable Tensor.
  1803. * @li accum_update: A mutable Tensor of the same type as "var".
  1804. * Should be a Variable Tensor.
  1805. * @li lr: A scalar of the same type as "var", for the scaling factor.
  1806. * @li rho: A scalar of the same type as "var", for the decay factor.
  1807. * @li epsilon: A scalar of the same type as "var", for the constant factor.
  1808. * @li grad: A Tensor of the same type as "var", for the gradient.
  1809. *@par Attributes:
  1810. *use_locking: An optional bool. Defaults to "False".
  1811. * If "True", updating of the "var", "accum" and "accum_update" tensors will be
  1812. * protected by a lock; otherwise the behavior is undefined,
  1813. * but may exhibit less contention.
  1814. *@par Outputs:
  1815. *@li var: A mutable Tensor. Has the same type as "var".
  1816. *@li accum: A mutable Tensor. Has the same type as "var".
  1817. *@li accum_update: A mutable Tensor. Has the same type as "var".
  1818. *@par Third-party framework compatibility
  1819. * Compatible with the TensorFlow operator ApplyAdadelta.
  1820. */
  1821. REG_OP(ApplyAdadeltaD)
  1822. .INPUT(var, TensorType::NumberType())
  1823. .INPUT(accum, TensorType::NumberType())
  1824. .INPUT(accum_update, TensorType::NumberType())
  1825. .INPUT(lr, TensorType::NumberType())
  1826. .INPUT(rho, TensorType::NumberType())
  1827. .INPUT(epsilon, TensorType::NumberType())
  1828. .INPUT(grad, TensorType::NumberType())
  1829. .OUTPUT(var, TensorType::NumberType())
  1830. .OUTPUT(accum, TensorType::NumberType())
  1831. .OUTPUT(accum_update, TensorType::NumberType())
  1832. .ATTR(use_locking, Bool, false)
  1833. .OP_END_FACTORY_REG(ApplyAdadeltaD)
  1834. /**
  1835. * @brief Updates "var" according to the ApplyMomentum algorithm. \n
  1836. * accum = accum * momentum + x1 * x2 \n
  1837. * if use_nesterov is True: \n
  1838. * var -= x1 * x2 * lr + accum * momentum * lr \n
  1839. * else:\n
  1840. * var -= accum * lr
  1841. *
  1842. * @par Inputs:
  1843. * Six inputs, including:
  1844. * @li var: A mutable Tensor has type TensorType::NumberType().
  1845. * Should be a Variable Tensor.
  1846. * @li accum: A mutable Tensor has the same type as "var".
  1847. * Should be a Variable Tensor.
  1848. * @li lr: A scalar has the same type as "var", for the scaling factor.
  1849. * @li x1: A Tensor has type TensorType::NumberType().
  1850. * @li momentum: A scalar has the same type as "var".
  1851. * @li x2: A scalar has the same type as "var".
  1852. *
  1853. * @par Attributes:
  1854. * Two attributes, including:
  1855. * @li use_nesterov: An optional bool. Defaults to "False". \n
  1856. * If True, the tensor passed to compute grad will be var - lr * momentum * accum, \n
  1857. * so in the end, the var you get is actually var - lr * momentum * accum.
  1858. * @li use_locking: An optional bool. Defaults to "False". \n
  1859. * If "True", updating of the "var", m", and "v" tensors will be protected \n
  1860. * by a lock; otherwise the behavior is undefined, but may exhibit less contention.
  1861. *
  1862. * @par Outputs:
  1863. * Two outputs, including:
  1864. * @li var: A mutable Tensor has the same type as "var".
  1865. * @li accum: A mutable Tensor has the same type as "var".
  1866. */
  1867. REG_OP(FusedMulApplyMomentum)
  1868. .INPUT(var, TensorType::NumberType())
  1869. .INPUT(accum, TensorType::NumberType())
  1870. .INPUT(lr, TensorType::NumberType())
  1871. .INPUT(x1, TensorType::NumberType())
  1872. .INPUT(momentum, TensorType::NumberType())
  1873. .INPUT(x2, TensorType::NumberType())
  1874. .OUTPUT(var, TensorType::NumberType())
  1875. .OUTPUT(accum, TensorType::NumberType())
  1876. .ATTR(use_nesterov, Bool, false)
  1877. .ATTR(use_locking, Bool, false)
  1878. .OP_END_FACTORY_REG(FusedMulApplyMomentum)
  1879. /**
  1880. * @brief Updates "var" according to the ApplyMomentum algorithm. \n
  1881. * accum = accum * momentum + x1 * x2 \n
  1882. * if use_nesterov is True: \n
  1883. * var -= x1 * x2 * lr + accum * momentum * lr \n
  1884. * else: \n
  1885. * var -= accum * lr
  1886. *
  1887. * @par Inputs:
  1888. * Seven inputs, including:
  1889. * @li var: A mutable Tensor of type float32.
  1890. * Should be a Variable Tensor.
  1891. * @li accum: A mutable Tensor has type TensorType::NumberType().
  1892. * Should be a Variable Tensor.
  1893. * @li lr: A scalar has the same type as "accum", for the scaling factor.
  1894. * @li x1: A Tensor has the same type as "accum".
  1895. * @li momentum: A scalar has the same type as "accum".
  1896. * @li x2: A scalar has the same type as "accum".
  1897. * @li var_copy: A Tensor has type float16.
  1898. *
  1899. * @par Attributes:
  1900. * Two Attributes, including:
  1901. * @li use_nesterov: An optional bool. Defaults to "False". \n
  1902. * If True, the tensor passed to compute grad will be var - lr * momentum * accum, \n
  1903. * so in the end, the var you get is actually var - lr * momentum * accum.
  1904. * @li use_locking: An optional bool. Defaults to "False". \n
  1905. * If "True", updating of the "var", m", and "v" tensors will be protected \n
  1906. * by a lock; otherwise the behavior is undefined, but may exhibit less contention.
  1907. *
  1908. * @par Outputs:
  1909. * Three outputs, including:
  1910. * @li var: A Tensor has the type float32.
  1911. * @li var_copy: A Tensor has the type float16.
  1912. * @li accum: A Tensor has the same type as input "accum".
  1913. */
  1914. REG_OP(FusedMulApplyMomentumExtern)
  1915. .INPUT(var, TensorType(DT_FLOAT))
  1916. .INPUT(accum, TensorType::NumberType())
  1917. .INPUT(lr, TensorType::NumberType())
  1918. .INPUT(x1, TensorType::NumberType())
  1919. .INPUT(momentum, TensorType::NumberType())
  1920. .INPUT(x2, TensorType::NumberType())
  1921. .INPUT(var_copy, TensorType(DT_FLOAT16))
  1922. .OUTPUT(var, TensorType(DT_FLOAT))
  1923. .OUTPUT(var_copy, TensorType(DT_FLOAT16))
  1924. .OUTPUT(accum, TensorType::NumberType())
  1925. .ATTR(use_nesterov, Bool, false)
  1926. .ATTR(use_locking, Bool, false)
  1927. .OP_END_FACTORY_REG(FusedMulApplyMomentumExtern)
  1928. /**
  1929. *@brief Update "g" according to the LARS algorithm.
  1930. *@par Inputs:
  1931. *Four inputs, including:
  1932. * @li w: A Tensor. Must be of type TensorType::DT_FLOAT.
  1933. * @li g: A Tensor of the same type and shape as "w".
  1934. * @li weight_decay: A Tensor of the same type as "w", Must be a scalar.
  1935. * @li learning_rate: A Tensor of the same type as "w", Must be a scalar.
  1936. *@par Attributes:
  1937. *Three Attributes, including:
  1938. * @li hyperpara: An optional float. Default value is 0.001.
  1939. * @li epsilon: An optional float. Default value is 1e-5.Avoid denominator is 0.
  1940. * @li use_clip: An optional bool. Defaults to "False".\n
  1941. * If "True", updating learning rate.
  1942. *@par Outputs:
  1943. *g_new: Tensor of the same type as "w".
  1944. */
  1945. REG_OP(LarsV2)
  1946. .INPUT(w, TensorType(DT_FLOAT))
  1947. .INPUT(g, TensorType(DT_FLOAT))
  1948. .INPUT(weight_decay, TensorType(DT_FLOAT))
  1949. .INPUT(learning_rate, TensorType(DT_FLOAT))
  1950. .OUTPUT(g_new, TensorType(DT_FLOAT))
  1951. .ATTR(hyperpara, Float, 0.001)
  1952. .ATTR(epsilon, Float, 0.00001)
  1953. .ATTR(use_clip, Bool, false)
  1954. .OP_END_FACTORY_REG(LarsV2)
  1955. /**
  1956. *@brief Update "g" according to the LARS algorithm.
  1957. *@par Inputs:
  1958. *Six inputs, including:
  1959. * @li w: A Tensor. Must be of type TensorType::DT_FLOAT.
  1960. * @li g: A Tensor of the same type and shape as "w".
  1961. * @li w_square_sum: A Tensor of square_sum(w), has the same type as "w", Must be a scalar.
  1962. * @li g_square_sum: A Tensor of square(g), has the same type as "w", Must be a scalar.
  1963. * @li weight_decay: A Tensor of the same type as "w", Must be a scalar.
  1964. * @li learning_rate: A Tensor of the same type as "w", Must be a scalar.
  1965. *@par Attributes:
  1966. *Three Attributes, including:
  1967. * @li hyperpara: An optional float. Default value is 0.001.
  1968. * @li epsilon: An optional float. Default value is 1e-5.Avoid denominator is 0.
  1969. * @li use_clip: An optional bool. Defaults to "False".\n
  1970. * If "True", updating learning rate.
  1971. *@par Outputs:
  1972. *g_new: Tensor of the same type as "w".
  1973. */
  1974. REG_OP(LarsV2Update)
  1975. .INPUT(w, TensorType(DT_FLOAT))
  1976. .INPUT(g, TensorType(DT_FLOAT))
  1977. .INPUT(w_square_sum, TensorType(DT_FLOAT))
  1978. .INPUT(g_square_sum, TensorType(DT_FLOAT))
  1979. .INPUT(weight_decay, TensorType(DT_FLOAT))
  1980. .INPUT(learning_rate, TensorType(DT_FLOAT))
  1981. .OUTPUT(g_new, TensorType(DT_FLOAT))
  1982. .ATTR(hyperpara, Float, 0.001)
  1983. .ATTR(epsilon, Float, 0.00001)
  1984. .ATTR(use_clip, Bool, false)
  1985. .OP_END_FACTORY_REG(LarsV2Update)
  1986. /**
  1987. * @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme.
  1988. * @par Inputs:
  1989. * Nine inputs, including:
  1990. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  1991. * Should be a Variable Tensor.
  1992. * @li accum: A mutable Tensor of the same type as "var".
  1993. * Should be a Variable Tensor. The value of accum must be greater than 0.
  1994. * @li linear: A mutable Tensor of the same type as "var".
  1995. * Should be a Variable Tensor.
  1996. * @li grad: A Tensor of the same type as "var", for the gradient.
  1997. * @li indices: A vector of indices into the first dimension of var and accum.
  1998. * The value of indices must be unique. Otherwise, the result is unpredictable.
  1999. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  2000. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  2001. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  2002. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  2003. * @par Attributes:
  2004. * use_locking: An optional bool. Defaults to "False".
  2005. * If "True", updating of the "var" and "accum" tensors will be
  2006. * protected by a lock; otherwise the behavior is undefined,
  2007. * but may exhibit less contention.
  2008. * @par Outputs:
  2009. * var: A Tensor. Has the same type and format as input "var".
  2010. * @par Third-party framework compatibility
  2011. * Compatible with the TensorFlow operator SparseApplyFtrl.
  2012. */
  2013. REG_OP(SparseApplyFtrl)
  2014. .INPUT(var, TensorType({DT_FLOAT}))
  2015. .INPUT(accum, TensorType({DT_FLOAT}))
  2016. .INPUT(linear, TensorType({DT_FLOAT}))
  2017. .INPUT(grad, TensorType({DT_FLOAT}))
  2018. .INPUT(indices, TensorType({DT_INT32}))
  2019. .INPUT(lr, TensorType({DT_FLOAT}))
  2020. .INPUT(l1, TensorType({DT_FLOAT}))
  2021. .INPUT(l2, TensorType({DT_FLOAT}))
  2022. .INPUT(lr_power, TensorType({DT_FLOAT}))
  2023. .OUTPUT(var, TensorType({DT_FLOAT}))
  2024. .ATTR(use_locking, Bool, false)
  2025. .OP_END_FACTORY_REG(SparseApplyFtrl)
  2026. /**
  2027. * @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme.
  2028. * @par Inputs:
  2029. * Five inputs, including:
  2030. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  2031. * Should be a Variable Tensor.
  2032. * @li accum: A mutable Tensor of the same type as "var".
  2033. * Should be a Variable Tensor. The value of accum must be greater than 0.
  2034. * @li linear: A mutable Tensor of the same type as "var".
  2035. * Should be a Variable Tensor.
  2036. * @li grad: A Tensor of the same type as "var", for the gradient.
  2037. * @li indices: A vector of indices into the first dimension of var and accum.
  2038. * The value of indices must be unique. Otherwise, the result is unpredictable.
  2039. * @par Attributes:
  2040. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  2041. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  2042. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  2043. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  2044. * @li use_locking: An optional bool. Defaults to "False".
  2045. * If "True", updating of the "var" and "accum" tensors will be
  2046. * protected by a lock; otherwise the behavior is undefined,
  2047. * but may exhibit less contention.
  2048. * @par Outputs:
  2049. * @li var: A Tensor. Has the same type and format as input "var".
  2050. * @li accum: A Tensor. Has the same type and format as input "accum".
  2051. * @li linear: A Tensor. Has the same type and format as input "linear".
  2052. * @par Third-party framework compatibility
  2053. * Compatible with the TensorFlow operator SparseApplyFtrl.
  2054. *
  2055. *@par Restrictions:
  2056. *Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyFtrl instead.
  2057. */
  2058. REG_OP(SparseApplyFtrlD)
  2059. .INPUT(var, TensorType({DT_FLOAT}))
  2060. .INPUT(accum, TensorType({DT_FLOAT}))
  2061. .INPUT(linear, TensorType({DT_FLOAT}))
  2062. .INPUT(grad, TensorType({DT_FLOAT}))
  2063. .INPUT(indices, TensorType({DT_INT32}))
  2064. .OUTPUT(var, TensorType({DT_FLOAT}))
  2065. .OUTPUT(accum, TensorType({DT_FLOAT}))
  2066. .OUTPUT(linear, TensorType({DT_FLOAT}))
  2067. .REQUIRED_ATTR(lr, Float)
  2068. .REQUIRED_ATTR(l1, Float)
  2069. .REQUIRED_ATTR(l2, Float)
  2070. .REQUIRED_ATTR(lr_power, Float)
  2071. .ATTR(use_locking, Bool, false)
  2072. .OP_END_FACTORY_REG(SparseApplyFtrlD)
  2073. /**
  2074. * @brief Updates relevant entries in '*var' according to the Ftrl-proximal scheme.
  2075. * That is for rows we have grad for, "var", "accum" and "linear" are updated.
  2076. * @par Inputs:
  2077. * Ten inputs, including:
  2078. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  2079. * Should be a Variable Tensor.
  2080. * @li accum: A mutable Tensor of the same type as "var".
  2081. * Should be a Variable Tensor.
  2082. * @li linear: A mutable Tensor of the same type as "var".
  2083. * Should be a Variable Tensor.
  2084. * @li grad: A Tensor of the same type as "var", for the gradient.
  2085. * @li indices: A vector of indices into the first dimension of "var" and "accum".
  2086. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  2087. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  2088. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  2089. * @li l2_shrinkage: A Tensor of the same type as "var", L2 shrinkage regulariation. Must be a scalar.
  2090. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  2091. * @par Attributes:
  2092. * use_locking: An optional bool. Defaults to "False".
  2093. * If "True", updating of the "var" and "accum" tensors will be
  2094. * protected by a lock; otherwise the behavior is undefined,
  2095. * but may exhibit less contention.
  2096. * @par Outputs:
  2097. * var: A Tensor. Has the same type and format as input "var".
  2098. * @par Third-party framework compatibility
  2099. * Compatible with the TensorFlow operator SparseApplyFtrlV2.
  2100. */
  2101. REG_OP(SparseApplyFtrlV2)
  2102. .INPUT(var, TensorType({DT_FLOAT}))
  2103. .INPUT(accum, TensorType({DT_FLOAT}))
  2104. .INPUT(linear, TensorType({DT_FLOAT}))
  2105. .INPUT(grad, TensorType({DT_FLOAT}))
  2106. .INPUT(indices, TensorType({DT_INT32}))
  2107. .INPUT(lr, TensorType({DT_FLOAT}))
  2108. .INPUT(l1, TensorType({DT_FLOAT}))
  2109. .INPUT(l2, TensorType({DT_FLOAT}))
  2110. .INPUT(l2_shrinkage, TensorType({DT_FLOAT}))
  2111. .INPUT(lr_power, TensorType({DT_FLOAT}))
  2112. .OUTPUT(var, TensorType({DT_FLOAT}))
  2113. .ATTR(use_locking, Bool, false)
  2114. .OP_END_FACTORY_REG(SparseApplyFtrlV2)
  2115. /**
  2116. * @brief Updates relevant entries in '*var' according to the Ftrl-proximal scheme.
  2117. * That is for rows we have grad for, "var", "accum" and "linear" are updated.
  2118. * @par Inputs:
  2119. * Five inputs, including:
  2120. * @li var: A mutable Tensor. Must be of type TensorType::NumberType().
  2121. * Should be a Variable Tensor.
  2122. * @li accum: A mutable Tensor of the same type as "var".
  2123. * Should be a Variable Tensor.
  2124. * @li linear: A mutable Tensor of the same type as "var".
  2125. * Should be a Variable Tensor.
  2126. * @li grad: A Tensor of the same type as "var", for the gradient.
  2127. * @li indices: A vector of indices into the first dimension of "var" and "accum".
  2128. * @par Attributes:
  2129. * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  2130. * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
  2131. * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
  2132. * @li l2_shrinkage: A Tensor of the same type as "var", L2 shrinkage regulariation. Must be a scalar.
  2133. * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
  2134. * @li use_locking: An optional bool. Defaults to "False".
  2135. * If "True", updating of the "var" and "accum" tensors will be
  2136. * protected by a lock; otherwise the behavior is undefined,
  2137. * but may exhibit less contention.
  2138. * @par Outputs:
  2139. * @li var: A Tensor. Has the same type and format as input "var".
  2140. * @li accum: A Tensor. Has the same type and format as input "accum".
  2141. * @li linear: A Tensor. Has the same type and format as input "linear".
  2142. * @par Third-party framework compatibility
  2143. * Compatible with the TensorFlow operator SparseApplyFtrlV2D.
  2144. *
  2145. * @par Restrictions:
  2146. * Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyFtrlV2 instead.
  2147. */
  2148. REG_OP(SparseApplyFtrlV2D)
  2149. .INPUT(var, TensorType({DT_FLOAT}))
  2150. .INPUT(accum, TensorType({DT_FLOAT}))
  2151. .INPUT(linear, TensorType({DT_FLOAT}))
  2152. .INPUT(grad, TensorType({DT_FLOAT}))
  2153. .INPUT(indices, TensorType({DT_INT32}))
  2154. .OUTPUT(var, TensorType({DT_FLOAT}))
  2155. .OUTPUT(accum, TensorType({DT_FLOAT}))
  2156. .OUTPUT(linear, TensorType({DT_FLOAT}))
  2157. .REQUIRED_ATTR(lr, Float)
  2158. .REQUIRED_ATTR(l1, Float)
  2159. .REQUIRED_ATTR(l2, Float)
  2160. .REQUIRED_ATTR(l2_shrinkage, Float)
  2161. .REQUIRED_ATTR(lr_power, Float)
  2162. .ATTR(use_locking, Bool, false)
  2163. .OP_END_FACTORY_REG(SparseApplyFtrlV2D)
  2164. /**
  2165. * @brief Updates "var" in specified index according to the RMSProp algorithm.
  2166. * mean_square = decay * mean_square + (1-decay) * gradient ** 2\n
  2167. * Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n
  2168. * ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n
  2169. * mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n
  2170. * var <- var - mom\n
  2171. *
  2172. * @par Inputs:
  2173. * Nine inputs, including:
  2174. * @li var: A mutable tensor. Must be one of the data types defined in\n
  2175. * TensorType::NumberType(). Should be from a Variable().
  2176. * @li ms: A mutable tensor. Must have the same type as "var". Should be from a
  2177. * Variable().
  2178. * @li mom: A mutable tensor. Must have the same type as "var". Should be from a
  2179. * Variable().
  2180. * @li lr: A scalar. Must have the same type as "var".
  2181. * @li rho: A scalar. Must have the same type as "var".
  2182. * @li momentum: A scalar. Must have the same type as "var".
  2183. * @li epsilon: A scalar. Must have the same type as "var".
  2184. * @li grad: A tensor, specifying the gradient.
  2185. * @li indices: A vector of indices into the first dimension of "var", "mom" and "ms".
  2186. *
  2187. * @par Attributes:
  2188. * use_locking: An optional "bool". Defaults to "False". If "True", updating of
  2189. * the "var", "ms", and "mom" tensors will be protected by a lock; otherwise the
  2190. * behavior is undefined, but may exhibit less contention.
  2191. *
  2192. * @par Outputs:
  2193. * var: A mutable tensor. Has the same type as input "var".
  2194. *
  2195. * @attention Constraints:
  2196. * @li Note that in this sparse implementation, "ms" and "mom" will not update
  2197. * in iterations during which "grad" is 0.
  2198. * @li The input tensors "var", "ms", and "mom" must have the same shape.
  2199. *
  2200. * @par Third-party framework compatibility
  2201. * Compatible with the TensorFlow operator SparseApplyRMSProp.
  2202. */
  2203. REG_OP(SparseApplyRMSProp)
  2204. .INPUT(var, TensorType::NumberType())
  2205. .INPUT(ms, TensorType::NumberType())
  2206. .INPUT(mom, TensorType::NumberType())
  2207. .INPUT(lr, TensorType::NumberType())
  2208. .INPUT(rho, TensorType::NumberType())
  2209. .INPUT(momentum, TensorType::NumberType())
  2210. .INPUT(epsilon, TensorType::NumberType())
  2211. .INPUT(grad, TensorType::NumberType())
  2212. .INPUT(indices, TensorType::IndexNumberType())
  2213. .OUTPUT(var, TensorType::NumberType())
  2214. .ATTR(use_locking, Bool, false)
  2215. .OP_END_FACTORY_REG(SparseApplyRMSProp)
  2216. /**
  2217. * @brief Updates "var" in specified index according to the RMSProp algorithm.
  2218. * a const input will be considered as an attribute.\n
  2219. * mean_square = decay * mean_square + (1-decay) * gradient ** 2\n
  2220. * Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n
  2221. * ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n
  2222. * mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n
  2223. * var <- var - mom
  2224. *
  2225. * @par Inputs:
  2226. * Six inputs, including:
  2227. * @li var: A mutable tensor. Must be one of the data types defined in
  2228. * TensorType::NumberType(). Should be from a Variable().
  2229. * @li ms: A mutable tensor. Must have the same type as "var". Should be from a
  2230. * Variable().
  2231. * @li mom: A mutable tensor. Must have the same type as "var". Should be from a
  2232. * Variable().
  2233. * @li lr: A scalar. Must have the same type as "var".
  2234. * @li grad: A tensor, specifying the gradient.
  2235. *
  2236. * @par Attributes:
  2237. * @li use_locking: An optional "bool". Defaults to "False". If "True",
  2238. * updating of the "var", "ms", and "mom" tensors will be protected by a lock;
  2239. * otherwise the behavior is undefined, but may exhibit less contention.
  2240. * @li rho: A required scalar. Must have the same type as "var".
  2241. * @li momentum: A required scalar. Must have the same type as "var".
  2242. * @li epsilon: A required scalar. Must have the same type as "var".
  2243. *
  2244. * @par Outputs:
  2245. * @li var: A mutable tensor. Must have the same type as input "var".
  2246. * @li ms: A mutable tensor. Must have the same type as input "ms".
  2247. * @li mom: A mutable tensor. Must have the same type as input "mom".
  2248. *
  2249. * @attention Constraints:
  2250. * @li Note that in this sparse implementation, "ms" and "mom" will not update
  2251. * in iterations during which "grad" is 0.
  2252. * @li The input tensors "var", "ms" and "mom" must have the same shape.
  2253. *
  2254. * @par Restrictions:
  2255. * Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyRMSProp instead.
  2256. */
  2257. REG_OP(SparseApplyRMSPropD)
  2258. .INPUT(var, TensorType::NumberType())
  2259. .INPUT(ms, TensorType::NumberType())
  2260. .INPUT(mom, TensorType::NumberType())
  2261. .INPUT(lr, TensorType::NumberType())
  2262. .INPUT(grad, TensorType::NumberType())
  2263. .INPUT(indices, TensorType::IndexNumberType())
  2264. .OUTPUT(var, TensorType::NumberType())
  2265. .OUTPUT(ms, TensorType::NumberType())
  2266. .OUTPUT(mom, TensorType::NumberType())
  2267. .REQUIRED_ATTR(rho, Float)
  2268. .REQUIRED_ATTR(momentum, Float)
  2269. .REQUIRED_ATTR(epsilon, Float)
  2270. .ATTR(use_locking, Bool, false)
  2271. .OP_END_FACTORY_REG(SparseApplyRMSPropD)
  2272. /**
  2273. * @brief Updates "var" in specified index according to the Adadelta algorithm.
  2274. * accum <- rho * accum + (1 - rho) * grad.square()\n
  2275. * update <- (accum_update + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad\n
  2276. * var <- var - update * lr\n
  2277. * accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n
  2278. *
  2279. * @par Inputs:
  2280. * Eight inputs, including:
  2281. * @li var: A mutable tensor. Must be one of the data types defined in\n
  2282. * TensorType::NumberType(). Should be from a Variable().
  2283. * @li accum: A mutable tensor. Must have the same type as "var". Should be from a
  2284. * Variable().
  2285. * @li accum_update: A mutable tensor. Must have the same type as "var". Should be from a
  2286. * Variable().
  2287. * @li lr: A scalar. Must have the same type as "var".
  2288. * @li rho: A scalar. Must have the same type as "var".
  2289. * @li epsilon: A scalar. Must have the same type as "var".
  2290. * @li grad: A tensor, specifying the gradient.
  2291. * @li indices: A vector of indices into the first dimension of "var", "accum" and "accum_update".
  2292. *
  2293. * @par Attributes:
  2294. * use_locking: An optional "bool". Defaults to "False". If "True", updating of
  2295. * the "var", "accum", and "accum_update" tensors will be protected by a lock; otherwise the
  2296. * behavior is undefined, but may exhibit less contention.
  2297. *
  2298. * @par Outputs:
  2299. * var: A mutable tensor. Has the same type as input "var".
  2300. *
  2301. * @attention Constraints:
  2302. * @li Note that in this sparse implementation, "accum" and "accum_update" will not update
  2303. * in iterations during which "grad" is 0.
  2304. * @li The input tensors "var", "accum", and "accum_update" must have the same shape.
  2305. *
  2306. * @par Third-party framework compatibility
  2307. * Compatible with the TensorFlow operator SparseApplyAdadelta.
  2308. */
  2309. REG_OP(SparseApplyAdadelta)
  2310. .INPUT(var, TensorType::NumberType())
  2311. .INPUT(accum, TensorType::NumberType())
  2312. .INPUT(accum_update, TensorType::NumberType())
  2313. .INPUT(lr, TensorType::NumberType())
  2314. .INPUT(rho, TensorType::NumberType())
  2315. .INPUT(epsilon, TensorType::NumberType())
  2316. .INPUT(grad, TensorType::NumberType())
  2317. .INPUT(indices, TensorType::IndexNumberType())
  2318. .OUTPUT(var, TensorType::NumberType())
  2319. .ATTR(use_locking, Bool, false)
  2320. .OP_END_FACTORY_REG(SparseApplyAdadelta)
  2321. /**
  2322. * @brief Updates "var" in specified index according to the Adadelta algorithm.
  2323. * a const input will be considered as an attribute.\n
  2324. * accum <- rho * accum + (1 - rho) * grad.square()\n
  2325. * update <- (accum_update + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad\n
  2326. * var <- var - update * lr\n
  2327. * accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n
  2328. *
  2329. * @par Inputs:
  2330. * Seven inputs, including:
  2331. * @li var: A mutable tensor. Must be one of the data types defined in
  2332. * TensorType::NumberType(). Should be from a Variable().
  2333. * @li accum: A mutable tensor. Must have the same type as "var". Should be from a
  2334. * Variable().
  2335. * @li accum_update: A mutable tensor. Must have the same type as "var". Should be from a
  2336. * Variable().
  2337. * @li lr: A scalar. Must have the same type as "var".
  2338. * @li rho: A scalar. Must have the same type as "var".
  2339. * @li grad: A tensor, specifying the gradient.
  2340. * @li indices: A vector of indices into the first dimension of "var", "accum" and "accum_update".
  2341. *
  2342. * @par Attributes:
  2343. * @li use_locking: An optional "bool". Defaults to "False". If "True",
  2344. * updating of the "var", "accum", and "accum_update" tensors will be protected by a lock;
  2345. * otherwise the behavior is undefined, but may exhibit less contention.
  2346. * @li epsilon: A required scalar. Must have the same type as "var".
  2347. *
  2348. * @par Outputs:
  2349. * @li var: A mutable tensor. Must have the same type as input "var".
  2350. * @li accum: A mutable tensor. Must have the same type as input "accum".
  2351. * @li accum_update: A mutable tensor. Must have the same type as input "accum_update".
  2352. *
  2353. * @attention Constraints:
  2354. * @li Note that in this sparse implementation, "accum" and "accum_update" will not update
  2355. * in iterations during which "grad" is 0.
  2356. * @li The input tensors "var", "accum" and "accum_update" must have the same shape.
  2357. *
  2358. * @par Restrictions:
  2359. * Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyAdadelta instead.
  2360. */
  2361. REG_OP(SparseApplyAdadeltaD)
  2362. .INPUT(var, TensorType::NumberType())
  2363. .INPUT(accum, TensorType::NumberType())
  2364. .INPUT(accum_update, TensorType::NumberType())
  2365. .INPUT(lr, TensorType::NumberType())
  2366. .INPUT(rho, TensorType::NumberType())
  2367. .INPUT(grad, TensorType::NumberType())
  2368. .INPUT(indices, TensorType::IndexNumberType())
  2369. .OUTPUT(var, TensorType::NumberType())
  2370. .OUTPUT(accum, TensorType::NumberType())
  2371. .OUTPUT(accum_update, TensorType::NumberType())
  2372. .REQUIRED_ATTR(epsilon, Float)
  2373. .ATTR(use_locking, Bool, false)
  2374. .OP_END_FACTORY_REG(SparseApplyAdadeltaD)
  2375. /**
  2376. *@brief Clean memory of workspace list.
  2377. *@par Attributes:
  2378. * @li automic_add_mem_size: sizes of workspaces.
  2379. */
  2380. REG_OP(AtomicAddrClean)
  2381. .ATTR(automic_add_mem_size, ListInt, {})
  2382. .OP_END_FACTORY_REG(AtomicAddrClean)
  2383. } // namespace ge
  2384. #endif // GE_OP_TRAINING_OPS_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示