You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensorflow_nn.py 64 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915
  1. #! /usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import tensorflow as tf
  4. from tensorflow.python.framework import ops
  5. from tensorflow.python.ops import math_ops
  6. from tensorflow.python.training import moving_averages
  7. from math import floor, ceil
  8. # loss function
  9. sparse_softmax_cross_entropy_with_logits = tf.nn.sparse_softmax_cross_entropy_with_logits
  10. sigmoid_cross_entropy_with_logits = tf.nn.sigmoid_cross_entropy_with_logits
  11. def padding_format(padding):
  12. """
  13. Checks that the padding format correspond format.
  14. Parameters
  15. ----------
  16. padding : str
  17. Must be one of the following:"same", "SAME", "VALID", "valid"
  18. Returns
  19. -------
  20. str "SAME" or "VALID"
  21. """
  22. if padding in ["SAME", "same"]:
  23. padding = "SAME"
  24. elif padding in ["VALID", "valid"]:
  25. padding = "VALID"
  26. elif padding == None:
  27. padding = None
  28. else:
  29. raise Exception("Unsupported padding: " + str(padding))
  30. return padding
  31. def preprocess_1d_format(data_format, padding):
  32. """
  33. Checks that the 1-D dataformat format correspond format.
  34. Parameters
  35. ----------
  36. data_format : str
  37. Must be one of the following:"channels_last","NWC","NCW","channels_first"
  38. padding : str
  39. Must be one of the following:"same","valid","SAME","VALID"
  40. Returns
  41. -------
  42. str "NWC" or "NCW" and "SAME" or "VALID"
  43. """
  44. if data_format in ["channels_last", "NWC"]:
  45. data_format = "NWC"
  46. elif data_format in ["channels_first", "NCW"]:
  47. data_format = "NCW"
  48. elif data_format == None:
  49. data_format = None
  50. else:
  51. raise Exception("Unsupported data format: " + str(data_format))
  52. padding = padding_format(padding)
  53. return data_format, padding
  54. def preprocess_2d_format(data_format, padding):
  55. """
  56. Checks that the 2-D dataformat format correspond format.
  57. Parameters
  58. ----------
  59. data_format : str
  60. Must be one of the following:"channels_last","NHWC","NCHW","channels_first"
  61. padding : str
  62. Must be one of the following:"same","valid","SAME","VALID"
  63. Returns
  64. -------
  65. str "NHWC" or "NCHW" and "SAME" or "VALID"
  66. """
  67. if data_format in ["channels_last", "NHWC"]:
  68. data_format = "NHWC"
  69. elif data_format in ["channels_first", "NCHW"]:
  70. data_format = "NCHW"
  71. elif data_format == None:
  72. data_format = None
  73. else:
  74. raise Exception("Unsupported data format: " + str(data_format))
  75. padding = padding_format(padding)
  76. return data_format, padding
  77. def preprocess_3d_format(data_format, padding):
  78. """
  79. Checks that the 3-D dataformat format correspond format.
  80. Parameters
  81. ----------
  82. data_format : str
  83. Must be one of the following:"channels_last","NDHWC","NCDHW","channels_first"
  84. padding : str
  85. Must be one of the following:"same","valid","SAME","VALID"
  86. Returns
  87. -------
  88. str "NDHWC" or "NCDHW" and "SAME" or "VALID"
  89. """
  90. if data_format in ['channels_last', 'NDHWC']:
  91. data_format = 'NDHWC'
  92. elif data_format in ['channels_first', 'NCDHW']:
  93. data_format = 'NCDHW'
  94. elif data_format == None:
  95. data_format = None
  96. else:
  97. raise Exception("Unsupported data format: " + str(data_format))
  98. padding = padding_format(padding)
  99. return data_format, padding
  100. def nchw_to_nhwc(x):
  101. """
  102. Channels first to channels last
  103. Parameters
  104. ----------
  105. x : tensor
  106. channels first tensor data
  107. Returns
  108. -------
  109. channels last tensor data
  110. """
  111. if len(x.shape) == 3:
  112. x = tf.transpose(x, (0, 2, 1))
  113. elif len(x.shape) == 4:
  114. x = tf.transpose(x, (0, 2, 3, 1))
  115. elif len(x.shape) == 5:
  116. x = tf.transpose(x, (0, 2, 3, 4, 1))
  117. else:
  118. raise Exception("Unsupported dimensions")
  119. return x
  120. def nhwc_to_nchw(x):
  121. """
  122. Channles last to channels first
  123. Parameters
  124. ----------
  125. x : tensor
  126. channels last tensor data
  127. Returns
  128. -------
  129. channels first tensor data
  130. """
  131. if len(x.shape) == 3:
  132. x = tf.transpose(x, (0, 2, 1))
  133. elif len(x.shape) == 4:
  134. x = tf.transpose(x, (0, 3, 1, 2))
  135. elif len(x.shape) == 5:
  136. x = tf.transpose(x, (0, 4, 1, 2, 3))
  137. else:
  138. raise Exception("Unsupported dimensions")
  139. return x
  140. class ReLU(object):
  141. def __init__(self):
  142. pass
  143. def __call__(self, x):
  144. return tf.nn.relu(x)
  145. def relu(x):
  146. """
  147. Computes rectified linear: max(features, 0).
  148. Parameters
  149. ----------
  150. x : tensor
  151. Must be one of the following types: float32, float64, int32, uint8, int16,
  152. int8, int64, bfloat16, uint16, half, uint32, uint64, qint8.
  153. Returns
  154. -------
  155. A Tensor. Has the same type as features.
  156. """
  157. return tf.nn.relu(x)
  158. class ReLU6(object):
  159. def __init__(self):
  160. pass
  161. def __call__(self, x):
  162. return tf.nn.relu6(x)
  163. def relu6(x):
  164. """
  165. Computes Rectified Linear 6: min(max(features, 0), 6).
  166. Parameters
  167. ----------
  168. x : tensor
  169. Must be one of the following types: float32, float64, int32, uint8, int16,
  170. int8, int64, bfloat16, uint16, half, uint32, uint64, qint8.
  171. Returns
  172. -------
  173. A Tensor with the same type as features.
  174. """
  175. return tf.nn.relu6(x)
  176. class LeakyReLU(object):
  177. def __init__(self, alpha=0.2):
  178. self.alpha = alpha
  179. def __call__(self, x):
  180. return tf.nn.leaky_relu(x, alpha=self.alpha)
  181. def leaky_relu(x, alpha=0.2):
  182. """
  183. Compute the Leaky ReLU activation function.
  184. Parameters
  185. ----------
  186. x : tensor
  187. representing preactivation values. Must be one of the following types:
  188. float16, float32, float64, int32, int64.
  189. Returns
  190. -------
  191. The activation value.
  192. """
  193. return tf.nn.leaky_relu(x, alpha=alpha)
  194. class Softplus(object):
  195. def __init__(self):
  196. pass
  197. def __call__(self, x):
  198. return tf.nn.softplus(x)
  199. def softplus(x):
  200. """
  201. Computes softplus: log(exp(features) + 1).
  202. Parameters
  203. ----------
  204. x : tensor
  205. Must be one of the following types: half, bfloat16, float32, float64.
  206. Returns
  207. -------
  208. A Tensor. Has the same type as features.
  209. """
  210. return tf.nn.softplus(x)
  211. class Tanh(object):
  212. def __init__(self):
  213. pass
  214. def __call__(self, x):
  215. return tf.nn.tanh(x)
  216. def tanh(x):
  217. """
  218. Computes hyperbolic tangent of x element-wise.
  219. Parameters
  220. ----------
  221. x : tensor
  222. Must be one of the following types: bfloat16, half, float32, float64, complex64, complex128.
  223. Returns
  224. -------
  225. A Tensor. Has the same type as x.
  226. """
  227. return tf.nn.tanh(x)
  228. class Sigmoid(object):
  229. def __init__(self):
  230. pass
  231. def __call__(self, x):
  232. return tf.nn.sigmoid(x)
  233. def sigmoid(x):
  234. """
  235. Computes sigmoid of x element-wise.
  236. Parameters
  237. ----------
  238. x : tensor
  239. A Tensor with type float16, float32, float64, complex64, or complex128.
  240. Returns
  241. -------
  242. A Tensor with the same type as x.
  243. """
  244. return tf.nn.sigmoid(x)
  245. class Softmax(object):
  246. def __init__(self):
  247. pass
  248. def __call__(self, x):
  249. return tf.nn.softmax(x)
  250. def softmax(logits, axis=None):
  251. """
  252. Computes softmax activations.
  253. Parameters
  254. ----------
  255. logits : tensor
  256. Must be one of the following types: half, float32, float64.
  257. axis : int
  258. The dimension softmax would be performed on. The default is -1 which indicates the last dimension.
  259. Returns
  260. -------
  261. A Tensor. Has the same type and shape as logits.
  262. """
  263. return tf.nn.softmax(logits, axis)
  264. class Dropout(object):
  265. def __init__(self, keep, seed=0):
  266. self.keep = keep
  267. self.seed = seed
  268. def __call__(self, inputs, *args, **kwargs):
  269. outputs = tf.nn.dropout(inputs, rate=1 - (self.keep), seed=self.seed)
  270. return outputs
  271. class BiasAdd(object):
  272. """
  273. Adds bias to value.
  274. Parameters
  275. ----------
  276. x : tensor
  277. A Tensor with type float, double, int64, int32, uint8, int16, int8, complex64, or complex128.
  278. bias : tensor
  279. Must be the same type as value unless value is a quantized type,
  280. in which case a different quantized type may be used.
  281. Returns
  282. -------
  283. A Tensor with the same type as value.
  284. """
  285. def __init__(self, data_format=None):
  286. self.data_format = data_format
  287. def __call__(self, x, bias):
  288. return tf.nn.bias_add(x, bias, data_format=self.data_format)
  289. def bias_add(x, bias, data_format=None, name=None):
  290. """
  291. Adds bias to value.
  292. Parameters
  293. ----------
  294. x : tensor
  295. A Tensor with type float, double, int64, int32, uint8, int16, int8, complex64, or complex128.
  296. bias : tensor
  297. Must be the same type as value unless value is a quantized type,
  298. in which case a different quantized type may be used.
  299. data_format : A string.
  300. 'N...C' and 'NC...' are supported.
  301. name : str
  302. A name for the operation (optional).
  303. Returns
  304. -------
  305. A Tensor with the same type as value.
  306. """
  307. x = tf.nn.bias_add(x, bias, data_format=data_format, name=name)
  308. return x
  309. class Conv1D(object):
  310. def __init__(self, stride, padding, data_format='NWC', dilations=None, out_channel=None, k_size=None):
  311. self.stride = stride
  312. self.dilations = dilations
  313. self.data_format, self.padding = preprocess_1d_format(data_format, padding)
  314. def __call__(self, input, filters):
  315. outputs = tf.nn.conv1d(
  316. input=input,
  317. filters=filters,
  318. stride=self.stride,
  319. padding=self.padding,
  320. data_format=self.data_format,
  321. dilations=self.dilations,
  322. # name=name
  323. )
  324. return outputs
  325. def conv1d(input, filters, stride, padding, data_format='NWC', dilations=None):
  326. """
  327. Computes a 1-D convolution given 3-D input and filter tensors.
  328. Parameters
  329. ----------
  330. input : tensor
  331. A 3D Tensor. Must be of type float16, float32, or float64
  332. filters : tensor
  333. A 3D Tensor. Must have the same type as input.
  334. stride : int of list
  335. An int or list of ints that has length 1 or 3. The number of entries by which the filter is moved right at each step.
  336. padding : string
  337. 'SAME' or 'VALID'
  338. data_format : string
  339. An optional string from "NWC", "NCW". Defaults to "NWC", the data is stored in the order of
  340. [batch, in_width, in_channels]. The "NCW" format stores data as [batch, in_channels, in_width].
  341. dilations : int or list
  342. An int or list of ints that has length 1 or 3 which defaults to 1.
  343. The dilation factor for each dimension of input. If set to k > 1,
  344. there will be k-1 skipped cells between each filter element on that dimension.
  345. Dilations in the batch and depth dimensions must be 1.
  346. name : string
  347. A name for the operation (optional).
  348. Returns
  349. -------
  350. A Tensor. Has the same type as input.
  351. """
  352. data_format, padding = preprocess_1d_format(data_format, padding)
  353. outputs = tf.nn.conv1d(
  354. input=input,
  355. filters=filters,
  356. stride=stride,
  357. padding=padding,
  358. data_format=data_format,
  359. dilations=dilations,
  360. # name=name
  361. )
  362. return outputs
  363. class Conv2D(object):
  364. def __init__(self, strides, padding, data_format='NHWC', dilations=None, out_channel=None, k_size=None):
  365. self.strides = strides
  366. self.dilations = dilations
  367. self.data_format, self.padding = preprocess_2d_format(data_format, padding)
  368. def __call__(self, input, filters):
  369. outputs = tf.nn.conv2d(
  370. input=input,
  371. filters=filters,
  372. strides=self.strides,
  373. padding=self.padding,
  374. data_format=self.data_format,
  375. dilations=self.dilations,
  376. )
  377. return outputs
  378. def conv2d(input, filters, strides, padding, data_format='NHWC', dilations=None):
  379. """
  380. Computes a 2-D convolution given 4-D input and filters tensors.
  381. Parameters
  382. ----------
  383. input : tensor
  384. Must be one of the following types: half, bfloat16, float32, float64. A 4-D tensor.
  385. The dimension order is interpreted according to the value of data_format, see below for details.
  386. filters : tensor
  387. Must have the same type as input. A 4-D tensor of shape [filter_height, filter_width, in_channels, out_channels]
  388. strides : int of list
  389. The stride of the sliding window for each dimension of input. If a single value is given it is replicated in the H and W dimension.
  390. By default the N and C dimensions are set to 1. The dimension order is determined by the value of data_format, see below for details.
  391. padding : string
  392. "SAME" or "VALID"
  393. data_format : string
  394. "NHWC", "NCHW". Defaults to "NHWC".
  395. dilations : list or ints
  396. list of ints that has length 1, 2 or 4, defaults to 1. The dilation factor for each dimension ofinput.
  397. name : string
  398. A name for the operation (optional).
  399. Returns
  400. -------
  401. A Tensor. Has the same type as input.
  402. """
  403. data_format, padding = preprocess_2d_format(data_format, padding)
  404. outputs = tf.nn.conv2d(
  405. input=input,
  406. filters=filters,
  407. strides=strides,
  408. padding=padding,
  409. data_format=data_format,
  410. dilations=dilations,
  411. )
  412. return outputs
  413. class Conv3D(object):
  414. def __init__(self, strides, padding, data_format='NDHWC', dilations=None, out_channel=None, k_size=None):
  415. self.strides = strides
  416. self.dilations = dilations
  417. self.data_format, self.padding = preprocess_3d_format(data_format, padding)
  418. def __call__(self, input, filters):
  419. outputs = tf.nn.conv3d(
  420. input=input,
  421. filters=filters,
  422. strides=self.strides,
  423. padding=self.padding,
  424. data_format=self.data_format,
  425. dilations=self.dilations,
  426. )
  427. return outputs
  428. def conv3d(input, filters, strides, padding, data_format='NDHWC', dilations=None):
  429. """
  430. Computes a 3-D convolution given 5-D input and filters tensors.
  431. Parameters
  432. ----------
  433. input : tensor
  434. Must be one of the following types: half, bfloat16, float32, float64.
  435. Shape [batch, in_depth, in_height, in_width, in_channels].
  436. filters : tensor
  437. Must have the same type as input. Shape [filter_depth, filter_height, filter_width, in_channels, out_channels].
  438. in_channels must match between input and filters.
  439. strides : list of ints
  440. A list of ints that has length >= 5. 1-D tensor of length 5.
  441. The stride of the sliding window for each dimension of input.
  442. Must have strides[0] = strides[4] = 1.
  443. padding : string
  444. A string from: "SAME", "VALID". The type of padding algorithm to use.
  445. data_format : string
  446. An optional string from: "NDHWC", "NCDHW". Defaults to "NDHWC". The data format of the input and output data.
  447. With the default format "NDHWC", the data is stored in the order of: [batch, in_depth, in_height, in_width, in_channels].
  448. Alternatively, the format could be "NCDHW", the data storage order is: [batch, in_channels, in_depth, in_height, in_width].
  449. dilations : list of ints
  450. Defaults to [1, 1, 1, 1, 1]. 1-D tensor of length 5. The dilation factor for each dimension of input.
  451. If set to k > 1, there will be k-1 skipped cells between each filter element on that dimension.
  452. The dimension order is determined by the value of data_format, see above for details.
  453. Dilations in the batch and depth dimensions must be 1.
  454. name : string
  455. A name for the operation (optional).
  456. Returns
  457. -------
  458. A Tensor. Has the same type as input.
  459. """
  460. data_format, padding = preprocess_3d_format(data_format, padding)
  461. outputs = tf.nn.conv3d(
  462. input=input,
  463. filters=filters,
  464. strides=strides,
  465. padding=padding,
  466. data_format=data_format, # 'NDHWC',
  467. dilations=dilations, # [1, 1, 1, 1, 1],
  468. # name=name,
  469. )
  470. return outputs
  471. def lrn(inputs, depth_radius, bias, alpha, beta):
  472. """
  473. Local Response Normalization.
  474. Parameters
  475. ----------
  476. inputs : tensor
  477. Must be one of the following types: half, bfloat16, float32. 4-D.
  478. depth_radius : int
  479. Defaults to 5. 0-D. Half-width of the 1-D normalization window.
  480. bias : float
  481. Defaults to 1. An offset (usually positive to avoid dividing by 0).
  482. alpha : float
  483. Defaults to 1. A scale factor, usually positive.
  484. beta : float
  485. Defaults to 0.5. An exponent.
  486. Returns
  487. -------
  488. A Tensor. Has the same type as input.
  489. """
  490. outputs = tf.nn.lrn(inputs, depth_radius=depth_radius, bias=bias, alpha=alpha, beta=beta)
  491. return outputs
  492. def moments(x, axes, shift=None, keepdims=False):
  493. """
  494. Calculates the mean and variance of x.
  495. Parameters
  496. ----------
  497. x : tensor
  498. A Tensor
  499. axes : list or ints
  500. Axes along which to compute mean and variance.
  501. shift : int
  502. Not used in the current implementation.
  503. keepdims : bool
  504. produce moments with the same dimensionality as the input.
  505. Returns
  506. -------
  507. Two Tensor objects: mean and variance.
  508. """
  509. outputs = tf.nn.moments(x, axes, shift, keepdims)
  510. return outputs
  511. class MaxPool1d(object):
  512. def __init__(self, ksize, strides, padding, data_format=None):
  513. self.data_format, self.padding = preprocess_1d_format(data_format=data_format, padding=padding)
  514. self.ksize = ksize
  515. self.strides = strides
  516. def __call__(self, inputs):
  517. outputs = tf.nn.max_pool(
  518. input=inputs, ksize=self.ksize, strides=self.strides, padding=self.padding, data_format=self.data_format
  519. )
  520. return outputs
  521. class MaxPool(object):
  522. def __init__(self, ksize, strides, padding, data_format=None):
  523. self.ksize = ksize
  524. self.strides = strides
  525. self.data_format = data_format
  526. self.padding = padding
  527. def __call__(self, inputs):
  528. if inputs.ndim == 3:
  529. self.data_format, self.padding = preprocess_1d_format(data_format=self.data_format, padding=self.padding)
  530. elif inputs.ndim == 4:
  531. self.data_format, self.padding = preprocess_2d_format(data_format=self.data_format, padding=self.padding)
  532. elif inputs.ndim == 5:
  533. self.data_format, self.padding = preprocess_3d_format(data_format=self.data_format, padding=self.padding)
  534. outputs = tf.nn.max_pool(
  535. input=inputs, ksize=self.ksize, strides=self.strides, padding=self.padding, data_format=self.data_format
  536. )
  537. return outputs
  538. def max_pool(input, ksize, strides, padding, data_format=None):
  539. """
  540. Performs the max pooling on the input.
  541. Parameters
  542. ----------
  543. input : tensor
  544. Tensor of rank N+2, of shape [batch_size] + input_spatial_shape + [num_channels] if data_format does not start
  545. with "NC" (default), or [batch_size, num_channels] + input_spatial_shape if data_format starts with "NC".
  546. Pooling happens over the spatial dimensions only.
  547. ksize : int or list of ints
  548. An int or list of ints that has length 1, N or N+2.
  549. The size of the window for each dimension of the input tensor.
  550. strides : int or list of ints
  551. An int or list of ints that has length 1, N or N+2.
  552. The stride of the sliding window for each dimension of the input tensor.
  553. padding : string
  554. 'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
  555. name : string
  556. A name for the operation (optional).
  557. Returns
  558. -------
  559. A Tensor of format specified by data_format. The max pooled output tensor.
  560. """
  561. if input.ndim == 3:
  562. data_format, padding = preprocess_1d_format(data_format=data_format, padding=padding)
  563. elif input.ndim == 4:
  564. data_format, padding = preprocess_2d_format(data_format=data_format, padding=padding)
  565. elif input.ndim == 5:
  566. data_format, padding = preprocess_3d_format(data_format=data_format, padding=padding)
  567. outputs = tf.nn.max_pool(input=input, ksize=ksize, strides=strides, padding=padding, data_format=data_format)
  568. return outputs
  569. class AvgPool1d(object):
  570. def __init__(self, ksize, strides, padding, data_format=None):
  571. self.data_format, self.padding = preprocess_1d_format(data_format=data_format, padding=padding)
  572. self.ksize = ksize
  573. self.strides = strides
  574. def __call__(self, inputs):
  575. outputs = tf.nn.pool(
  576. input=inputs,
  577. window_shape=self.ksize,
  578. pooling_type="AVG",
  579. strides=self.strides,
  580. padding=self.padding,
  581. data_format=self.data_format,
  582. )
  583. return outputs
  584. class AvgPool(object):
  585. def __init__(self, ksize, strides, padding, data_format=None):
  586. self.ksize = ksize
  587. self.strides = strides
  588. self.data_format = data_format
  589. self.padding = padding_format(padding)
  590. def __call__(self, inputs):
  591. outputs = tf.nn.avg_pool(
  592. input=inputs, ksize=self.ksize, strides=self.strides, padding=self.padding, data_format=self.data_format
  593. )
  594. return outputs
  595. def avg_pool(input, ksize, strides, padding):
  596. """
  597. Performs the avg pooling on the input.
  598. Parameters
  599. ----------
  600. input : tensor
  601. Tensor of rank N+2, of shape [batch_size] + input_spatial_shape + [num_channels]
  602. if data_format does not start with "NC" (default), or [batch_size, num_channels] + input_spatial_shape
  603. if data_format starts with "NC". Pooling happens over the spatial dimensions only.
  604. ksize : int or list of ints
  605. An int or list of ints that has length 1, N or N+2.
  606. The size of the window for each dimension of the input tensor.
  607. strides : int or list of ints
  608. An int or list of ints that has length 1, N or N+2.
  609. The stride of the sliding window for each dimension of the input tensor.
  610. padding : string
  611. 'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
  612. name : string
  613. Optional name for the operation.
  614. Returns
  615. -------
  616. A Tensor of format specified by data_format. The average pooled output tensor.
  617. """
  618. padding = padding_format(padding)
  619. outputs = tf.nn.avg_pool(
  620. input=input,
  621. ksize=ksize,
  622. strides=strides,
  623. padding=padding,
  624. )
  625. return outputs
  626. class MaxPool3d(object):
  627. def __init__(self, ksize, strides, padding, data_format=None):
  628. self.data_format, self.padding = preprocess_3d_format(data_format, padding)
  629. self.ksize = ksize
  630. self.strides = strides
  631. def __call__(self, inputs):
  632. outputs = tf.nn.max_pool3d(
  633. input=inputs,
  634. ksize=self.ksize,
  635. strides=self.strides,
  636. padding=self.padding,
  637. data_format=self.data_format,
  638. )
  639. return outputs
  640. def max_pool3d(input, ksize, strides, padding, data_format=None):
  641. """
  642. Performs the max pooling on the input.
  643. Parameters
  644. ----------
  645. input : tensor
  646. A 5-D Tensor of the format specified by data_format.
  647. ksize : int or list of ints
  648. An int or list of ints that has length 1, 3 or 5.
  649. The size of the window for each dimension of the input tensor.
  650. strides : int or list of ints
  651. An int or list of ints that has length 1, 3 or 5.
  652. The stride of the sliding window for each dimension of the input tensor.
  653. padding : string
  654. 'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
  655. data_format : string
  656. "NDHWC", "NCDHW". Defaults to "NDHWC". The data format of the input and output data.
  657. With the default format "NDHWC", the data is stored in the order of: [batch, in_depth, in_height, in_width, in_channels].
  658. Alternatively, the format could be "NCDHW", the data storage order is: [batch, in_channels, in_depth, in_height, in_width].
  659. name : string
  660. A name for the operation (optional).
  661. Returns
  662. -------
  663. A Tensor of format specified by data_format. The max pooled output tensor.
  664. """
  665. data_format, padding = preprocess_3d_format(data_format, padding)
  666. outputs = tf.nn.max_pool3d(
  667. input=input,
  668. ksize=ksize,
  669. strides=strides,
  670. padding=padding,
  671. data_format=data_format,
  672. )
  673. return outputs
  674. class AvgPool3d(object):
  675. def __init__(self, ksize, strides, padding, data_format=None):
  676. self.data_format, self.padding = preprocess_3d_format(data_format, padding)
  677. self.ksize = ksize
  678. self.strides = strides
  679. def __call__(self, inputs):
  680. outputs = tf.nn.avg_pool3d(
  681. input=inputs,
  682. ksize=self.ksize,
  683. strides=self.strides,
  684. padding=self.padding,
  685. data_format=self.data_format,
  686. )
  687. return outputs
  688. def avg_pool3d(input, ksize, strides, padding, data_format=None):
  689. """
  690. Performs the average pooling on the input.
  691. Parameters
  692. ----------
  693. input : tensor
  694. A 5-D Tensor of shape [batch, height, width, channels] and type float32, float64, qint8, quint8, or qint32.
  695. ksize : int or list of ints
  696. An int or list of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor.
  697. strides : int or list of ints
  698. An int or list of ints that has length 1, 3 or 5.
  699. The stride of the sliding window for each dimension of the input tensor.
  700. padding : string
  701. 'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
  702. data_format : string
  703. 'NDHWC' and 'NCDHW' are supported.
  704. name : string
  705. Optional name for the operation.
  706. Returns
  707. -------
  708. A Tensor with the same type as value. The average pooled output tensor.
  709. """
  710. data_format, padding = preprocess_3d_format(data_format, padding)
  711. outputs = tf.nn.avg_pool3d(
  712. input=input,
  713. ksize=ksize,
  714. strides=strides,
  715. padding=padding,
  716. data_format=data_format,
  717. )
  718. return outputs
  719. def pool(input, window_shape, pooling_type, strides=None, padding='VALID', data_format=None, dilations=None, name=None):
  720. """
  721. Performs an N-D pooling operation.
  722. Parameters
  723. ----------
  724. input : tensor
  725. Tensor of rank N+2, of shape [batch_size] + input_spatial_shape + [num_channels]
  726. if data_format does not start with "NC" (default), or [batch_size, num_channels] + input_spatial_shape
  727. if data_format starts with "NC". Pooling happens over the spatial dimensions only.
  728. window_shape : int
  729. Sequence of N ints >= 1.
  730. pooling_type : string
  731. Specifies pooling operation, must be "AVG" or "MAX".
  732. strides : ints
  733. Sequence of N ints >= 1. Defaults to [1]*N. If any value of strides is > 1, then all values of dilation_rate must be 1.
  734. padding : string
  735. The padding algorithm, must be "SAME" or "VALID". Defaults to "SAME".
  736. See the "returns" section of tf.ops.convolution for details.
  737. data_format : string
  738. Specifies whether the channel dimension of the input and output is the last dimension (default, or if data_format does not start with "NC"),
  739. or the second dimension (if data_format starts with "NC").
  740. For N=1, the valid values are "NWC" (default) and "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW".
  741. For N=3, the valid values are "NDHWC" (default) and "NCDHW".
  742. dilations : list of ints
  743. Dilation rate. List of N ints >= 1. Defaults to [1]*N. If any value of dilation_rate is > 1, then all values of strides must be 1.
  744. name : string
  745. Optional. Name of the op.
  746. Returns
  747. -------
  748. Tensor of rank N+2, of shape [batch_size] + output_spatial_shape + [num_channels]
  749. """
  750. if pooling_type in ["MAX", "max"]:
  751. pooling_type = "MAX"
  752. elif pooling_type in ["AVG", "avg"]:
  753. pooling_type = "AVG"
  754. else:
  755. raise ValueError('Unsupported pool_mode: ' + str(pooling_type))
  756. padding = padding_format(padding)
  757. outputs = tf.nn.pool(
  758. input=input,
  759. window_shape=window_shape,
  760. pooling_type=pooling_type,
  761. strides=strides,
  762. padding=padding,
  763. data_format=data_format,
  764. dilations=dilations,
  765. name=name,
  766. )
  767. return outputs
  768. class DepthwiseConv2d(object):
  769. def __init__(self, strides, padding, data_format=None, dilations=None, ksize=None, channel_multiplier=1):
  770. self.data_format, self.padding = preprocess_2d_format(data_format, padding)
  771. self.strides = strides
  772. self.dilations = dilations
  773. def __call__(self, input, filter):
  774. outputs = tf.nn.depthwise_conv2d(
  775. input=input,
  776. filter=filter,
  777. strides=self.strides,
  778. padding=self.padding,
  779. data_format=self.data_format,
  780. dilations=self.dilations,
  781. )
  782. return outputs
  783. def depthwise_conv2d(input, filter, strides, padding, data_format=None, dilations=None, name=None):
  784. """
  785. Depthwise 2-D convolution.
  786. Parameters
  787. ----------
  788. input : tensor
  789. 4-D with shape according to data_format.
  790. filter : tensor
  791. 4-D with shape [filter_height, filter_width, in_channels, channel_multiplier].
  792. strides : list
  793. 1-D of size 4. The stride of the sliding window for each dimension of input.
  794. padding : string
  795. 'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
  796. data_format : string
  797. The data format for input. Either "NHWC" (default) or "NCHW".
  798. dilations : list
  799. 1-D of size 2. The dilation rate in which we sample input values across the height and width dimensions in atrous convolution.
  800. If it is greater than 1, then all values of strides must be 1.
  801. name : string
  802. A name for this operation (optional).
  803. Returns
  804. -------
  805. A 4-D Tensor with shape according to data_format.
  806. E.g., for "NHWC" format, shape is [batch, out_height, out_width, in_channels * channel_multiplier].
  807. """
  808. data_format, padding = preprocess_2d_format(data_format, padding)
  809. outputs = tf.nn.depthwise_conv2d(
  810. input=input,
  811. filter=filter,
  812. strides=strides,
  813. padding=padding,
  814. data_format=data_format,
  815. dilations=dilations,
  816. name=name,
  817. )
  818. return outputs
  819. class Conv1d_transpose(object):
  820. def __init__(
  821. self, stride, padding, data_format='NWC', dilations=None, out_channel=None, k_size=None, in_channels=None
  822. ):
  823. self.stride = stride
  824. self.dilations = dilations
  825. self.data_format, self.padding = preprocess_1d_format(data_format, padding)
  826. def __call__(self, input, filters):
  827. batch_size = input.shape[0]
  828. if self.data_format == 'NWC':
  829. w_axis, c_axis = 1, 2
  830. else:
  831. w_axis, c_axis = 2, 1
  832. input_shape = input.shape.as_list()
  833. filters_shape = filters.shape.as_list()
  834. input_w = input_shape[w_axis]
  835. filters_w = filters_shape[0]
  836. output_channels = filters_shape[1]
  837. dilations_w = 1
  838. if isinstance(self.stride, int):
  839. strides_w = self.stride
  840. else:
  841. strides_list = list(self.stride)
  842. strides_w = strides_list[w_axis]
  843. if self.dilations is not None:
  844. if isinstance(self.dilations, int):
  845. dilations_w = self.dilations
  846. else:
  847. dilations_list = list(self.dilations)
  848. dilations_w = dilations_list[w_axis]
  849. filters_w = filters_w + (filters_w - 1) * (dilations_w - 1)
  850. assert self.padding in {'SAME', 'VALID'}
  851. if self.padding == 'VALID':
  852. output_w = input_w * strides_w + max(filters_w - strides_w, 0)
  853. elif self.padding == 'SAME':
  854. output_w = input_w * strides_w
  855. if self.data_format == 'NCW':
  856. output_shape = (batch_size, output_channels, output_w)
  857. else:
  858. output_shape = (batch_size, output_w, output_channels)
  859. output_shape = tf.stack(output_shape)
  860. outputs = tf.nn.conv1d_transpose(
  861. input=input,
  862. filters=filters,
  863. output_shape=output_shape,
  864. strides=self.stride,
  865. padding=self.padding,
  866. data_format=self.data_format,
  867. dilations=self.dilations,
  868. )
  869. return outputs
  870. def conv1d_transpose(
  871. input, filters, output_shape, strides, padding='SAME', data_format='NWC', dilations=None, name=None
  872. ):
  873. """
  874. The transpose of conv1d.
  875. Parameters
  876. ----------
  877. input : tensor
  878. A 3-D Tensor of type float and shape [batch, in_width, in_channels]
  879. for NWC data format or [batch, in_channels, in_width] for NCW data format.
  880. filters : tensor
  881. A 3-D Tensor with the same type as value and shape [filter_width, output_channels, in_channels].
  882. filter's in_channels dimension must match that of value.
  883. output_shape : tensor
  884. A 1-D Tensor, containing three elements, representing the output shape of the deconvolution op.
  885. strides : list
  886. An int or list of ints that has length 1 or 3. The number of entries by which the filter is moved right at each step.
  887. padding : string
  888. 'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
  889. data_format : string
  890. 'NWC' and 'NCW' are supported.
  891. dilations : list
  892. An int or list of ints that has length 1 or 3 which defaults to 1.
  893. The dilation factor for each dimension of input. If set to k > 1,
  894. there will be k-1 skipped cells between each filter element on that dimension.
  895. Dilations in the batch and depth dimensions must be 1.
  896. name : string
  897. Optional name for the returned tensor.
  898. Returns
  899. -------
  900. A Tensor with the same type as value.
  901. """
  902. data_format, padding = preprocess_1d_format(data_format, padding)
  903. outputs = tf.nn.conv1d_transpose(
  904. input=input,
  905. filters=filters,
  906. output_shape=output_shape,
  907. strides=strides,
  908. padding=padding,
  909. data_format=data_format,
  910. dilations=dilations,
  911. name=name,
  912. )
  913. return outputs
  914. class Conv2d_transpose(object):
  915. def __init__(
  916. self, strides, padding, data_format='NHWC', dilations=None, name=None, out_channel=None, k_size=None,
  917. in_channels=None
  918. ):
  919. self.strides = strides
  920. self.dilations = dilations
  921. self.name = name
  922. self.data_format, self.padding = preprocess_2d_format(data_format, padding)
  923. def __call__(self, input, filters):
  924. if self.data_format == 'NHWC':
  925. h_axis, w_axis = 1, 2
  926. else:
  927. h_axis, w_axis = 2, 3
  928. input_shape = input.shape.as_list()
  929. filters_shape = filters.shape.as_list()
  930. batch_size = input.shape[0]
  931. input_h, input_w = input_shape[h_axis], input_shape[w_axis]
  932. kernel_h, kernel_w = filters_shape[0], filters_shape[1]
  933. output_channels = filters_shape[2]
  934. dilations_h, dilations_w = 1, 1
  935. if isinstance(self.strides, int):
  936. strides_h = self.strides
  937. strides_w = self.strides
  938. else:
  939. strides_list = list(self.strides)
  940. if len(strides_list) == 2:
  941. strides_h = strides_list[0]
  942. strides_w = strides_list[1]
  943. elif len(strides_list) == 4:
  944. strides_h = strides_list[h_axis]
  945. strides_w = strides_list[w_axis]
  946. if self.dilations is not None:
  947. if isinstance(self.dilations, int):
  948. dilations_h = self.dilations
  949. dilations_w = self.dilations
  950. else:
  951. dilations_list = list(self.dilations)
  952. if len(dilations_list) == 2:
  953. dilations_h = dilations_list[0]
  954. dilations_w = dilations_list[1]
  955. elif len(dilations_list) == 4:
  956. dilations_h = dilations_list[h_axis]
  957. dilations_w = dilations_list[w_axis]
  958. kernel_h = kernel_h + (kernel_h - 1) * (dilations_h - 1)
  959. kernel_w = kernel_w + (kernel_w - 1) * (dilations_w - 1)
  960. assert self.padding in {'SAME', 'VALID'}
  961. if self.padding == 'VALID':
  962. output_h = input_h * strides_h + max(kernel_h - strides_h, 0)
  963. output_w = input_w * strides_w + max(kernel_w - strides_w, 0)
  964. elif self.padding == 'SAME':
  965. output_h = input_h * strides_h
  966. output_w = input_w * strides_w
  967. if self.data_format == 'NCHW':
  968. out_shape = (batch_size, output_channels, output_h, output_w)
  969. else:
  970. out_shape = (batch_size, output_h, output_w, output_channels)
  971. output_shape = tf.stack(out_shape)
  972. outputs = tf.nn.conv2d_transpose(
  973. input=input, filters=filters, output_shape=output_shape, strides=self.strides, padding=self.padding,
  974. data_format=self.data_format, dilations=self.dilations, name=self.name
  975. )
  976. return outputs
  977. def conv2d_transpose(
  978. input, filters, output_shape, strides, padding='SAME', data_format='NHWC', dilations=None, name=None
  979. ):
  980. """
  981. The transpose of conv2d.
  982. Parameters
  983. ----------
  984. input : tensor
  985. A 4-D Tensor of type float and shape [batch, height, width, in_channels]
  986. for NHWC data format or [batch, in_channels, height, width] for NCHW data format.
  987. filters : tensor
  988. A 4-D Tensor with the same type as input and shape [height, width,
  989. output_channels, in_channels]. filter's in_channels dimension must match that of input.
  990. output_shape : tensor
  991. A 1-D Tensor representing the output shape of the deconvolution op.
  992. strides : list
  993. An int or list of ints that has length 1, 2 or 4. The stride of the sliding window for each dimension of input.
  994. If a single value is given it is replicated in the H and W dimension.
  995. By default the N and C dimensions are set to 0.
  996. The dimension order is determined by the value of data_format, see below for details.
  997. padding : string
  998. 'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
  999. data_format : string
  1000. 'NHWC' and 'NCHW' are supported.
  1001. dilations : list
  1002. An int or list of ints that has length 1, 2 or 4, defaults to 1.
  1003. name : string
  1004. Optional name for the returned tensor.
  1005. Returns
  1006. -------
  1007. A Tensor with the same type as input.
  1008. """
  1009. data_format, padding = preprocess_2d_format(data_format, padding)
  1010. outputs = tf.nn.conv2d_transpose(
  1011. input=input,
  1012. filters=filters,
  1013. output_shape=output_shape,
  1014. strides=strides,
  1015. padding=padding,
  1016. data_format=data_format,
  1017. dilations=dilations,
  1018. name=name,
  1019. )
  1020. return outputs
  1021. class Conv3d_transpose(object):
  1022. def __init__(
  1023. self, strides, padding, data_format='NDHWC', dilations=None, name=None, out_channel=None, k_size=None,
  1024. in_channels=None
  1025. ):
  1026. self.strides = strides
  1027. self.dilations = dilations
  1028. self.name = name
  1029. self.out_channel = out_channel
  1030. self.data_format, self.padding = preprocess_3d_format(data_format, padding)
  1031. def __call__(self, input, filters):
  1032. if self.data_format == 'NDHWC':
  1033. d_axis, h_axis, w_axis = 1, 2, 3
  1034. else:
  1035. d_axis, h_axis, w_axis = 2, 3, 4
  1036. input_shape = input.shape.as_list()
  1037. filters_shape = filters.shape.as_list()
  1038. batch_size = input_shape[0]
  1039. input_d, input_h, input_w = input_shape[d_axis], input_shape[h_axis], input_shape[w_axis]
  1040. kernel_d, kernel_h, kernel_w = filters_shape[0], filters_shape[1], filters_shape[2]
  1041. dilations_d, dilations_h, dilations_w = 1, 1, 1
  1042. if isinstance(self.strides, int):
  1043. strides_d, strides_h, strides_w = self.strides
  1044. else:
  1045. strides_list = list(self.strides)
  1046. if len(strides_list) == 3:
  1047. strides_d, strides_h, strides_w = \
  1048. strides_list[0], \
  1049. strides_list[1], \
  1050. strides_list[2]
  1051. elif len(strides_list) == 5:
  1052. strides_d, strides_h, strides_w = \
  1053. strides_list[d_axis], \
  1054. strides_list[h_axis], \
  1055. strides_list[w_axis]
  1056. if self.dilations is not None:
  1057. if isinstance(self.dilations, int):
  1058. dilations_d, dilations_h, dilations_w = self.dilations
  1059. else:
  1060. dilations_list = list(self.dilations)
  1061. if len(dilations_list) == 3:
  1062. dilations_d, dilations_h, dilations_w = \
  1063. dilations_list[0], \
  1064. dilations_list[1], \
  1065. dilations_list[2]
  1066. elif len(dilations_list) == 5:
  1067. dilations_d, dilations_h, dilations_w = \
  1068. dilations_list[d_axis],\
  1069. dilations_list[h_axis], \
  1070. dilations_list[w_axis]
  1071. assert self.padding in {'VALID', 'SAME'}
  1072. kernel_d = kernel_d + (kernel_d - 1) * (dilations_d - 1)
  1073. kernel_h = kernel_h + (kernel_h - 1) * (dilations_h - 1)
  1074. kernel_w = kernel_w + (kernel_w - 1) * (dilations_w - 1)
  1075. if self.padding == 'VALID':
  1076. output_d = input_d * strides_d + max(kernel_d - strides_d, 0)
  1077. output_h = input_h * strides_h + max(kernel_h - strides_h, 0)
  1078. output_w = input_w * strides_w + max(kernel_w - strides_w, 0)
  1079. elif self.padding == 'SAME':
  1080. output_d = input_d * strides_d
  1081. output_h = input_h * strides_h
  1082. output_w = input_w * strides_w
  1083. if self.data_format == 'NDHWC':
  1084. output_shape = (batch_size, output_d, output_h, output_w, self.out_channel)
  1085. else:
  1086. output_shape = (batch_size, self.out_channel, output_d, output_h, output_w)
  1087. output_shape = tf.stack(output_shape)
  1088. outputs = tf.nn.conv3d_transpose(
  1089. input=input, filters=filters, output_shape=output_shape, strides=self.strides, padding=self.padding,
  1090. data_format=self.data_format, dilations=self.dilations, name=self.name
  1091. )
  1092. return outputs
  1093. def conv3d_transpose(
  1094. input, filters, output_shape, strides, padding='SAME', data_format='NDHWC', dilations=None, name=None
  1095. ):
  1096. """
  1097. The transpose of conv3d.
  1098. Parameters
  1099. ----------
  1100. input : tensor
  1101. A 5-D Tensor of type float and shape [batch, height, width, in_channels] for
  1102. NHWC data format or [batch, in_channels, height, width] for NCHW data format.
  1103. filters : tensor
  1104. A 5-D Tensor with the same type as value and shape [height, width, output_channels, in_channels].
  1105. filter's in_channels dimension must match that of value.
  1106. output_shape : tensor
  1107. A 1-D Tensor representing the output shape of the deconvolution op.
  1108. strides : list
  1109. An int or list of ints that has length 1, 3 or 5.
  1110. padding : string
  1111. 'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
  1112. data_format : string
  1113. 'NDHWC' and 'NCDHW' are supported.
  1114. dilations : list of ints
  1115. An int or list of ints that has length 1, 3 or 5, defaults to 1.
  1116. name : string
  1117. Optional name for the returned tensor.
  1118. Returns
  1119. -------
  1120. A Tensor with the same type as value.
  1121. """
  1122. data_format, padding = preprocess_3d_format(data_format, padding)
  1123. outputs = tf.nn.conv3d_transpose(
  1124. input=input, filters=filters, output_shape=output_shape, strides=strides, padding=padding,
  1125. data_format=data_format, dilations=dilations, name=name
  1126. )
  1127. return outputs
  1128. def depthwise_conv2d(input, filters, strides, padding='SAME', data_format='NHWC', dilations=None, name=None):
  1129. """
  1130. Depthwise 2-D convolution.
  1131. Parameters
  1132. ----------
  1133. input : tensor
  1134. 4-D with shape according to data_format.
  1135. filters : tensor
  1136. 4-D with shape [filter_height, filter_width, in_channels, channel_multiplier].
  1137. strides : tuple
  1138. 1-D of size 4. The stride of the sliding window for each dimension of input.
  1139. padding : string
  1140. 'VALID' or 'SAME'
  1141. data_format : string
  1142. "NHWC" (default) or "NCHW".
  1143. dilations : tuple
  1144. The dilation rate in which we sample input values across the height and width dimensions in atrous convolution.
  1145. If it is greater than 1, then all values of strides must be 1.
  1146. name : string
  1147. A name for this operation (optional).
  1148. Returns
  1149. -------
  1150. A 4-D Tensor with shape according to data_format.
  1151. """
  1152. data_format, padding = preprocess_2d_format(data_format, padding)
  1153. outputs = tf.nn.depthwise_conv2d(
  1154. input=input,
  1155. filter=filters,
  1156. strides=strides,
  1157. padding=padding,
  1158. data_format=data_format,
  1159. dilations=dilations,
  1160. name=name,
  1161. )
  1162. return outputs
  1163. def _to_channel_first_bias(b):
  1164. """Reshape [c] to [c, 1, 1]."""
  1165. channel_size = int(b.shape[0])
  1166. new_shape = (channel_size, 1, 1)
  1167. return tf.reshape(b, new_shape)
  1168. def _bias_scale(x, b, data_format):
  1169. """The multiplication counter part of tf.nn.bias_add."""
  1170. if data_format == 'NHWC':
  1171. return x * b
  1172. elif data_format == 'NCHW':
  1173. return x * _to_channel_first_bias(b)
  1174. else:
  1175. raise ValueError('invalid data_format: %s' % data_format)
  1176. def _bias_add(x, b, data_format):
  1177. """Alternative implementation of tf.nn.bias_add which is compatiable with tensorRT."""
  1178. if data_format == 'NHWC':
  1179. return tf.add(x, b)
  1180. elif data_format == 'NCHW':
  1181. return tf.add(x, _to_channel_first_bias(b))
  1182. else:
  1183. raise ValueError('invalid data_format: %s' % data_format)
  1184. def batch_normalization(x, mean, variance, offset, scale, variance_epsilon, data_format, name=None):
  1185. """Data Format aware version of tf.nn.batch_normalization."""
  1186. if data_format == 'channels_last':
  1187. mean = tf.reshape(mean, [1] * (len(x.shape) - 1) + [-1])
  1188. variance = tf.reshape(variance, [1] * (len(x.shape) - 1) + [-1])
  1189. offset = tf.reshape(offset, [1] * (len(x.shape) - 1) + [-1])
  1190. scale = tf.reshape(scale, [1] * (len(x.shape) - 1) + [-1])
  1191. elif data_format == 'channels_first':
  1192. mean = tf.reshape(mean, [1] + [-1] + [1] * (len(x.shape) - 2))
  1193. variance = tf.reshape(variance, [1] + [-1] + [1] * (len(x.shape) - 2))
  1194. offset = tf.reshape(offset, [1] + [-1] + [1] * (len(x.shape) - 2))
  1195. scale = tf.reshape(scale, [1] + [-1] + [1] * (len(x.shape) - 2))
  1196. else:
  1197. raise ValueError('invalid data_format: %s' % data_format)
  1198. with ops.name_scope(name, 'batchnorm', [x, mean, variance, scale, offset]):
  1199. inv = math_ops.rsqrt(variance + variance_epsilon)
  1200. if scale is not None:
  1201. inv *= scale
  1202. a = math_ops.cast(inv, x.dtype)
  1203. b = math_ops.cast(offset - mean * inv if offset is not None else -mean * inv, x.dtype)
  1204. # Return a * x + b with customized data_format.
  1205. # Currently TF doesn't have bias_scale, and tensorRT has bug in converting tf.nn.bias_add
  1206. # So we reimplemted them to allow make the model work with tensorRT.
  1207. # See https://github.com/tensorlayer/openpose-plus/issues/75 for more details.
  1208. # df = {'channels_first': 'NCHW', 'channels_last': 'NHWC'}
  1209. # return _bias_add(_bias_scale(x, a, df[data_format]), b, df[data_format])
  1210. return a * x + b
  1211. class BatchNorm(object):
  1212. """
  1213. The :class:`BatchNorm` is a batch normalization layer for both fully-connected and convolution outputs.
  1214. See ``tf.nn.batch_normalization`` and ``tf.nn.moments``.
  1215. Parameters
  1216. ----------
  1217. decay : float
  1218. A decay factor for `ExponentialMovingAverage`.
  1219. Suggest to use a large value for large dataset.
  1220. epsilon : float
  1221. Eplison.
  1222. act : activation function
  1223. The activation function of this layer.
  1224. is_train : boolean
  1225. Is being used for training or inference.
  1226. beta_init : initializer or None
  1227. The initializer for initializing beta, if None, skip beta.
  1228. Usually you should not skip beta unless you know what happened.
  1229. gamma_init : initializer or None
  1230. The initializer for initializing gamma, if None, skip gamma.
  1231. When the batch normalization layer is use instead of 'biases', or the next layer is linear, this can be
  1232. disabled since the scaling can be done by the next layer. see `Inception-ResNet-v2 <https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_resnet_v2.py>`__
  1233. moving_mean_init : initializer or None
  1234. The initializer for initializing moving mean, if None, skip moving mean.
  1235. moving_var_init : initializer or None
  1236. The initializer for initializing moving var, if None, skip moving var.
  1237. num_features: int
  1238. Number of features for input tensor. Useful to build layer if using BatchNorm1d, BatchNorm2d or BatchNorm3d,
  1239. but should be left as None if using BatchNorm. Default None.
  1240. data_format : str
  1241. channels_last 'channel_last' (default) or channels_first.
  1242. name : None or str
  1243. A unique layer name.
  1244. Examples
  1245. ---------
  1246. With TensorLayer
  1247. >>> net = tl.layers.Input([None, 50, 50, 32], name='input')
  1248. >>> net = tl.layers.BatchNorm()(net)
  1249. Notes
  1250. -----
  1251. The :class:`BatchNorm` is universally suitable for 3D/4D/5D input in static model, but should not be used
  1252. in dynamic model where layer is built upon class initialization. So the argument 'num_features' should only be used
  1253. for subclasses :class:`BatchNorm1d`, :class:`BatchNorm2d` and :class:`BatchNorm3d`. All the three subclasses are
  1254. suitable under all kinds of conditions.
  1255. References
  1256. ----------
  1257. - `Source <https://github.com/ry/tensorflow-resnet/blob/master/resnet.py>`__
  1258. - `stackoverflow <http://stackoverflow.com/questions/38312668/how-does-one-do-inference-with-batch-normalization-with-tensor-flow>`__
  1259. """
  1260. def __init__(
  1261. self, decay=0.9, epsilon=0.00001, beta=None, gamma=None, moving_mean=None, moving_var=None, num_features=None,
  1262. data_format='channels_last', is_train=False
  1263. ):
  1264. self.decay = decay
  1265. self.epsilon = epsilon
  1266. self.data_format = data_format
  1267. self.beta = beta
  1268. self.gamma = gamma
  1269. self.moving_mean = moving_mean
  1270. self.moving_var = moving_var
  1271. self.num_features = num_features
  1272. self.is_train = is_train
  1273. self.axes = None
  1274. if self.decay < 0.0 or 1.0 < self.decay:
  1275. raise ValueError("decay should be between 0 to 1")
  1276. def _get_param_shape(self, inputs_shape):
  1277. if self.data_format == 'channels_last':
  1278. axis = -1
  1279. elif self.data_format == 'channels_first':
  1280. axis = 1
  1281. else:
  1282. raise ValueError('data_format should be either %s or %s' % ('channels_last', 'channels_first'))
  1283. channels = inputs_shape[axis]
  1284. params_shape = [channels]
  1285. return params_shape
  1286. def _check_input_shape(self, inputs):
  1287. if inputs.ndim <= 1:
  1288. raise ValueError('expected input at least 2D, but got {}D input'.format(inputs.ndim))
  1289. def __call__(self, inputs):
  1290. self._check_input_shape(inputs)
  1291. self.channel_axis = len(inputs.shape) - 1 if self.data_format == 'channels_last' else 1
  1292. if self.axes is None:
  1293. self.axes = [i for i in range(len(inputs.shape)) if i != self.channel_axis]
  1294. mean, var = tf.nn.moments(inputs, self.axes, keepdims=False)
  1295. if self.is_train:
  1296. # update moving_mean and moving_var
  1297. self.moving_mean = moving_averages.assign_moving_average(
  1298. self.moving_mean, mean, self.decay, zero_debias=False
  1299. )
  1300. self.moving_var = moving_averages.assign_moving_average(self.moving_var, var, self.decay, zero_debias=False)
  1301. outputs = batch_normalization(inputs, mean, var, self.beta, self.gamma, self.epsilon, self.data_format)
  1302. else:
  1303. outputs = batch_normalization(
  1304. inputs, self.moving_mean, self.moving_var, self.beta, self.gamma, self.epsilon, self.data_format
  1305. )
  1306. return outputs
  1307. class GroupConv2D(object):
  1308. def __init__(self, strides, padding, data_format, dilations, out_channel, k_size, groups):
  1309. self.data_format, self.padding = preprocess_2d_format(data_format, padding)
  1310. self.strides = strides
  1311. self.dilations = dilations
  1312. self.groups = groups
  1313. if self.data_format == 'NHWC':
  1314. self.channels_axis = 3
  1315. else:
  1316. self.channels_axis = 1
  1317. def __call__(self, input, filters):
  1318. if self.groups == 1:
  1319. outputs = tf.nn.conv2d(
  1320. input=input,
  1321. filters=filters,
  1322. strides=self.strides,
  1323. padding=self.padding,
  1324. data_format=self.data_format,
  1325. dilations=self.dilations,
  1326. )
  1327. else:
  1328. inputgroups = tf.split(input, num_or_size_splits=self.groups, axis=self.channels_axis)
  1329. weightsgroups = tf.split(filters, num_or_size_splits=self.groups, axis=self.channels_axis)
  1330. convgroups = []
  1331. for i, k in zip(inputgroups, weightsgroups):
  1332. convgroups.append(
  1333. tf.nn.conv2d(
  1334. input=i,
  1335. filters=k,
  1336. strides=self.strides,
  1337. padding=self.padding,
  1338. data_format=self.data_format,
  1339. dilations=self.dilations,
  1340. )
  1341. )
  1342. outputs = tf.concat(axis=self.channels_axis, values=convgroups)
  1343. return outputs
  1344. class SeparableConv1D(object):
  1345. def __init__(self, stride, padding, data_format, dilations, out_channel, k_size, in_channel, depth_multiplier):
  1346. self.data_format, self.padding = preprocess_1d_format(data_format, padding)
  1347. if self.data_format == 'NWC':
  1348. self.spatial_start_dim = 1
  1349. self.strides = (1, stride, stride, 1)
  1350. self.data_format = 'NHWC'
  1351. else:
  1352. self.spatial_start_dim = 2
  1353. self.strides = (1, 1, stride, stride)
  1354. self.data_format = 'NCHW'
  1355. self.dilation_rate = (1, dilations)
  1356. def __call__(self, inputs, depthwise_filters, pointwise_filters):
  1357. inputs = tf.expand_dims(inputs, axis=self.spatial_start_dim)
  1358. depthwise_filters = tf.expand_dims(depthwise_filters, 0)
  1359. pointwise_filters = tf.expand_dims(pointwise_filters, 0)
  1360. outputs = tf.nn.separable_conv2d(
  1361. inputs, depthwise_filters, pointwise_filters, strides=self.strides, padding=self.padding,
  1362. dilations=self.dilation_rate, data_format=self.data_format
  1363. )
  1364. outputs = tf.squeeze(outputs, axis=self.spatial_start_dim)
  1365. return outputs
  1366. class SeparableConv2D(object):
  1367. def __init__(self, strides, padding, data_format, dilations, out_channel, k_size, in_channel, depth_multiplier):
  1368. self.data_format, self.padding = preprocess_2d_format(data_format, padding)
  1369. self.strides = strides
  1370. self.dilations = (dilations[2], dilations[2])
  1371. def __call__(self, inputs, depthwise_filters, pointwise_filters):
  1372. outputs = tf.nn.separable_conv2d(
  1373. inputs, depthwise_filters, pointwise_filters, strides=self.strides, padding=self.padding,
  1374. dilations=self.dilations, data_format=self.data_format
  1375. )
  1376. return outputs
  1377. class AdaptiveMeanPool1D(object):
  1378. def __init__(self, output_size, data_format):
  1379. self.data_format, _ = preprocess_1d_format(data_format, None)
  1380. self.output_size = output_size
  1381. def __call__(self, input):
  1382. if self.data_format == 'NWC':
  1383. n, w, c = input.shape
  1384. else:
  1385. n, c, w = input.shape
  1386. stride = floor(w / self.output_size)
  1387. kernel = w - (self.output_size - 1) * stride
  1388. output = tf.nn.avg_pool1d(input, ksize=kernel, strides=stride, data_format=self.data_format, padding='VALID')
  1389. return output
  1390. class AdaptiveMeanPool2D(object):
  1391. def __init__(self, output_size, data_format):
  1392. self.data_format, _ = preprocess_2d_format(data_format, None)
  1393. self.output_size = output_size
  1394. def __call__(self, inputs):
  1395. if self.data_format == 'NHWC':
  1396. n, h, w, c = inputs.shape
  1397. else:
  1398. n, c, h, w = inputs.shape
  1399. out_h, out_w = self.output_size
  1400. stride_h = floor(h / out_h)
  1401. kernel_h = h - (out_h - 1) * stride_h
  1402. stride_w = floor(w / out_w)
  1403. kernel_w = w - (out_w - 1) * stride_w
  1404. outputs = tf.nn.avg_pool2d(
  1405. inputs, ksize=(kernel_h, kernel_w), strides=(stride_h, stride_w), data_format=self.data_format,
  1406. padding='VALID'
  1407. )
  1408. return outputs
  1409. class AdaptiveMeanPool3D(object):
  1410. def __init__(self, output_size, data_format):
  1411. self.data_format, _ = preprocess_3d_format(data_format, None)
  1412. self.output_size = output_size
  1413. def __call__(self, inputs):
  1414. if self.data_format == 'NDHWC':
  1415. n, d, h, w, c = inputs.shape
  1416. else:
  1417. n, c, d, h, w = inputs.shape
  1418. out_d, out_h, out_w = self.output_size
  1419. stride_d = floor(d / out_d)
  1420. kernel_d = d - (out_d - 1) * stride_d
  1421. stride_h = floor(h / out_h)
  1422. kernel_h = h - (out_h - 1) * stride_h
  1423. stride_w = floor(w / out_w)
  1424. kernel_w = w - (out_w - 1) * stride_w
  1425. outputs = tf.nn.avg_pool3d(
  1426. inputs, ksize=(kernel_d, kernel_h, kernel_w), strides=(stride_d, stride_h, stride_w),
  1427. data_format=self.data_format, padding='VALID'
  1428. )
  1429. return outputs
  1430. class AdaptiveMaxPool1D(object):
  1431. def __init__(self, output_size, data_format):
  1432. self.data_format, _ = preprocess_1d_format(data_format, None)
  1433. self.output_size = output_size
  1434. def __call__(self, input):
  1435. if self.data_format == 'NWC':
  1436. n, w, c = input.shape
  1437. else:
  1438. n, c, w = input.shape
  1439. stride = floor(w / self.output_size)
  1440. kernel = w - (self.output_size - 1) * stride
  1441. output = tf.nn.max_pool1d(input, ksize=kernel, strides=stride, data_format=self.data_format, padding='VALID')
  1442. return output
  1443. class AdaptiveMaxPool2D(object):
  1444. def __init__(self, output_size, data_format):
  1445. self.data_format, _ = preprocess_2d_format(data_format, None)
  1446. self.output_size = output_size
  1447. def __call__(self, inputs):
  1448. if self.data_format == 'NHWC':
  1449. n, h, w, c = inputs.shape
  1450. else:
  1451. n, c, h, w = inputs.shape
  1452. out_h, out_w = self.output_size
  1453. stride_h = floor(h / out_h)
  1454. kernel_h = h - (out_h - 1) * stride_h
  1455. stride_w = floor(w / out_w)
  1456. kernel_w = w - (out_w - 1) * stride_w
  1457. outputs = tf.nn.max_pool2d(
  1458. inputs, ksize=(kernel_h, kernel_w), strides=(stride_h, stride_w), data_format=self.data_format,
  1459. padding='VALID'
  1460. )
  1461. return outputs
  1462. class AdaptiveMaxPool3D(object):
  1463. def __init__(self, output_size, data_format):
  1464. self.data_format, _ = preprocess_3d_format(data_format, None)
  1465. self.output_size = output_size
  1466. def __call__(self, inputs):
  1467. if self.data_format == 'NDHWC':
  1468. n, d, h, w, c = inputs.shape
  1469. else:
  1470. n, c, d, h, w = inputs.shape
  1471. out_d, out_h, out_w = self.output_size
  1472. stride_d = floor(d / out_d)
  1473. kernel_d = d - (out_d - 1) * stride_d
  1474. stride_h = floor(h / out_h)
  1475. kernel_h = h - (out_h - 1) * stride_h
  1476. stride_w = floor(w / out_w)
  1477. kernel_w = w - (out_w - 1) * stride_w
  1478. outputs = tf.nn.max_pool3d(
  1479. inputs, ksize=(kernel_d, kernel_h, kernel_w), strides=(stride_d, stride_h, stride_w),
  1480. data_format=self.data_format, padding='VALID'
  1481. )
  1482. return outputs
  1483. class BinaryConv2D(object):
  1484. def __init__(self, strides, padding, data_format, dilations, out_channel, k_size, in_channel):
  1485. self.data_format, self.padding = preprocess_2d_format(data_format, padding)
  1486. self.strides = strides
  1487. self.dilations = dilations
  1488. # @tf.RegisterGradient("TL_Sign_QuantizeGrad")
  1489. # def _quantize_grad(op, grad):
  1490. # """Clip and binarize tensor using the straight through estimator (STE) for the gradient."""
  1491. # return tf.clip_by_value(grad, -1, 1)
  1492. def quantize(self, x):
  1493. # ref: https://github.com/AngusG/tensorflow-xnor-bnn/blob/master/models/binary_net.py#L70
  1494. # https://github.com/itayhubara/BinaryNet.tf/blob/master/nnUtils.py
  1495. with tf.compat.v1.get_default_graph().gradient_override_map({"Sign": "TL_Sign_QuantizeGrad"}):
  1496. return tf.sign(x)
  1497. def __call__(self, inputs, filters):
  1498. filters = self.quantize(filters)
  1499. outputs = tf.nn.conv2d(
  1500. input=inputs, filters=filters, strides=self.strides, padding=self.padding, data_format=self.data_format,
  1501. dilations=self.dilations
  1502. )
  1503. return outputs
  1504. class DorefaConv2D(object):
  1505. def __init__(self, bitW, bitA, strides, padding, data_format, dilations, out_channel, k_size, in_channel):
  1506. self.data_format, self.padding = preprocess_2d_format(data_format, padding)
  1507. self.strides = strides
  1508. self.dilations = dilations
  1509. self.bitW = bitW
  1510. self.bitA = bitA
  1511. def _quantize_dorefa(self, x, k):
  1512. G = tf.compat.v1.get_default_graph()
  1513. n = float(2**k - 1)
  1514. with G.gradient_override_map({"Round": "Identity"}):
  1515. return tf.round(x * n) / n
  1516. def cabs(self, x):
  1517. return tf.minimum(1.0, tf.abs(x), name='cabs')
  1518. def quantize_active(self, x, bitA):
  1519. if bitA == 32:
  1520. return x
  1521. return self._quantize_dorefa(x, bitA)
  1522. def quantize_weight(self, x, bitW, force_quantization=False):
  1523. G = tf.compat.v1.get_default_graph()
  1524. if bitW == 32 and not force_quantization:
  1525. return x
  1526. if bitW == 1: # BWN
  1527. with G.gradient_override_map({"Sign": "Identity"}):
  1528. E = tf.stop_gradient(tf.reduce_mean(input_tensor=tf.abs(x)))
  1529. return tf.sign(x / E) * E
  1530. x = tf.clip_by_value(
  1531. x * 0.5 + 0.5, 0.0, 1.0
  1532. ) # it seems as though most weights are within -1 to 1 region anyways
  1533. return 2 * self._quantize_dorefa(x, bitW) - 1
  1534. def __call__(self, inputs, filters):
  1535. inputs = self.quantize_active(self.cabs(inputs), self.bitA)
  1536. filters = self.quantize_weight(filters, self.bitW)
  1537. outputs = tf.nn.conv2d(
  1538. input=inputs,
  1539. filters=filters,
  1540. strides=self.strides,
  1541. padding=self.padding,
  1542. data_format=self.data_format,
  1543. dilations=self.dilations,
  1544. )
  1545. return outputs

TensorLayer3.0 是一款兼容多种深度学习框架为计算后端的深度学习库。计划兼容TensorFlow, Pytorch, MindSpore, Paddle.