You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

prepro.py 139 kB

4 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114
  1. #! /usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import copy
  4. import math
  5. import random
  6. import threading
  7. import time
  8. import numpy as np
  9. import PIL
  10. import scipy
  11. import scipy.ndimage as ndi
  12. import skimage
  13. from scipy import linalg
  14. from scipy.ndimage.filters import gaussian_filter
  15. from scipy.ndimage.interpolation import map_coordinates
  16. from six.moves import range
  17. from skimage import exposure, transform
  18. from skimage.morphology import binary_dilation as _binary_dilation
  19. from skimage.morphology import binary_erosion as _binary_erosion
  20. from skimage.morphology import disk
  21. from skimage.morphology import erosion as _erosion
  22. from skimage.transform import resize
  23. import tensorlayer as tl
  24. from tensorlayer.lazy_imports import LazyImport
  25. cv2 = LazyImport("cv2")
  26. # linalg https://docs.scipy.org/doc/scipy/reference/linalg.html
  27. # ndimage https://docs.scipy.org/doc/scipy/reference/ndimage.html
  28. __all__ = [
  29. 'threading_data',
  30. 'affine_rotation_matrix',
  31. 'affine_horizontal_flip_matrix',
  32. 'affine_shift_matrix',
  33. 'affine_shear_matrix',
  34. 'affine_zoom_matrix',
  35. 'affine_respective_zoom_matrix',
  36. 'transform_matrix_offset_center',
  37. 'affine_transform',
  38. 'affine_transform_cv2',
  39. 'affine_transform_keypoints',
  40. 'projective_transform_by_points',
  41. 'rotation',
  42. 'rotation_multi',
  43. 'crop',
  44. 'crop_multi',
  45. 'flip_axis',
  46. 'flip_axis_multi',
  47. 'shift',
  48. 'shift_multi',
  49. 'shear',
  50. 'shear_multi',
  51. 'shear2',
  52. 'shear_multi2',
  53. 'swirl',
  54. 'swirl_multi',
  55. 'elastic_transform',
  56. 'elastic_transform_multi',
  57. 'zoom',
  58. 'respective_zoom',
  59. 'zoom_multi',
  60. 'brightness',
  61. 'brightness_multi',
  62. 'illumination',
  63. 'rgb_to_hsv',
  64. 'hsv_to_rgb',
  65. 'adjust_hue',
  66. 'imresize',
  67. 'pixel_value_scale',
  68. 'samplewise_norm',
  69. 'featurewise_norm',
  70. 'get_zca_whitening_principal_components_img',
  71. 'zca_whitening',
  72. 'channel_shift',
  73. 'channel_shift_multi',
  74. 'drop',
  75. 'array_to_img',
  76. 'find_contours',
  77. 'pt2map',
  78. 'binary_dilation',
  79. 'dilation',
  80. 'binary_erosion',
  81. 'erosion',
  82. 'obj_box_coords_rescale',
  83. 'obj_box_coord_rescale',
  84. 'obj_box_coord_scale_to_pixelunit',
  85. 'obj_box_coord_centroid_to_upleft_butright',
  86. 'obj_box_coord_upleft_butright_to_centroid',
  87. 'obj_box_coord_centroid_to_upleft',
  88. 'obj_box_coord_upleft_to_centroid',
  89. 'parse_darknet_ann_str_to_list',
  90. 'parse_darknet_ann_list_to_cls_box',
  91. 'obj_box_left_right_flip',
  92. 'obj_box_imresize',
  93. 'obj_box_crop',
  94. 'obj_box_shift',
  95. 'obj_box_zoom',
  96. 'pad_sequences',
  97. 'remove_pad_sequences',
  98. 'process_sequences',
  99. 'sequences_add_start_id',
  100. 'sequences_add_end_id',
  101. 'sequences_add_end_id_after_pad',
  102. 'sequences_get_mask',
  103. 'keypoint_random_crop',
  104. 'keypoint_resize_random_crop',
  105. 'keypoint_random_rotate',
  106. 'keypoint_random_flip',
  107. 'keypoint_random_resize',
  108. 'keypoint_random_resize_shortestedge',
  109. ]
  110. def threading_data(data=None, fn=None, thread_count=None, **kwargs):
  111. """Process a batch of data by given function by threading.
  112. Usually be used for data augmentation.
  113. Parameters
  114. -----------
  115. data : numpy.array or others
  116. The data to be processed.
  117. thread_count : int
  118. The number of threads to use.
  119. fn : function
  120. The function for data processing.
  121. more args : the args for `fn`
  122. Ssee Examples below.
  123. Examples
  124. --------
  125. Process images.
  126. >>> images, _, _, _ = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3))
  127. >>> images = tl.prepro.threading_data(images[0:32], tl.prepro.zoom, zoom_range=[0.5, 1])
  128. Customized image preprocessing function.
  129. >>> def distort_img(x):
  130. >>> x = tl.prepro.flip_axis(x, axis=0, is_random=True)
  131. >>> x = tl.prepro.flip_axis(x, axis=1, is_random=True)
  132. >>> x = tl.prepro.crop(x, 100, 100, is_random=True)
  133. >>> return x
  134. >>> images = tl.prepro.threading_data(images, distort_img)
  135. Process images and masks together (Usually be used for image segmentation).
  136. >>> X, Y --> [batch_size, row, col, 1]
  137. >>> data = tl.prepro.threading_data([_ for _ in zip(X, Y)], tl.prepro.zoom_multi, zoom_range=[0.5, 1], is_random=True)
  138. data --> [batch_size, 2, row, col, 1]
  139. >>> X_, Y_ = data.transpose((1,0,2,3,4))
  140. X_, Y_ --> [batch_size, row, col, 1]
  141. >>> tl.vis.save_image(X_, 'images.png')
  142. >>> tl.vis.save_image(Y_, 'masks.png')
  143. Process images and masks together by using ``thread_count``.
  144. >>> X, Y --> [batch_size, row, col, 1]
  145. >>> data = tl.prepro.threading_data(X, tl.prepro.zoom_multi, 8, zoom_range=[0.5, 1], is_random=True)
  146. data --> [batch_size, 2, row, col, 1]
  147. >>> X_, Y_ = data.transpose((1,0,2,3,4))
  148. X_, Y_ --> [batch_size, row, col, 1]
  149. >>> tl.vis.save_image(X_, 'after.png')
  150. >>> tl.vis.save_image(Y_, 'before.png')
  151. Customized function for processing images and masks together.
  152. >>> def distort_img(data):
  153. >>> x, y = data
  154. >>> x, y = tl.prepro.flip_axis_multi([x, y], axis=0, is_random=True)
  155. >>> x, y = tl.prepro.flip_axis_multi([x, y], axis=1, is_random=True)
  156. >>> x, y = tl.prepro.crop_multi([x, y], 100, 100, is_random=True)
  157. >>> return x, y
  158. >>> X, Y --> [batch_size, row, col, channel]
  159. >>> data = tl.prepro.threading_data([_ for _ in zip(X, Y)], distort_img)
  160. >>> X_, Y_ = data.transpose((1,0,2,3,4))
  161. Returns
  162. -------
  163. list or numpyarray
  164. The processed results.
  165. References
  166. ----------
  167. - `python queue <https://pymotw.com/2/Queue/index.html#module-Queue>`__
  168. - `run with limited queue <http://effbot.org/librarybook/queue.htm>`__
  169. """
  170. def apply_fn(results, i, data, kwargs):
  171. results[i] = fn(data, **kwargs)
  172. if thread_count is None:
  173. results = [None] * len(data)
  174. threads = []
  175. # for i in range(len(data)):
  176. # t = threading.Thread(name='threading_and_return', target=apply_fn, args=(results, i, data[i], kwargs))
  177. for i, d in enumerate(data):
  178. t = threading.Thread(name='threading_and_return', target=apply_fn, args=(results, i, d, kwargs))
  179. t.start()
  180. threads.append(t)
  181. else:
  182. divs = np.linspace(0, len(data), thread_count + 1)
  183. divs = np.round(divs).astype(int)
  184. results = [None] * thread_count
  185. threads = []
  186. for i in range(thread_count):
  187. t = threading.Thread(
  188. name='threading_and_return', target=apply_fn, args=(results, i, data[divs[i]:divs[i + 1]], kwargs)
  189. )
  190. t.start()
  191. threads.append(t)
  192. for t in threads:
  193. t.join()
  194. if thread_count is None:
  195. try:
  196. return np.asarray(results)
  197. except Exception:
  198. return results
  199. else:
  200. return np.concatenate(results)
  201. def affine_rotation_matrix(angle=(-20, 20)):
  202. """Create an affine transform matrix for image rotation.
  203. NOTE: In OpenCV, x is width and y is height.
  204. Parameters
  205. -----------
  206. angle : int/float or tuple of two int/float
  207. Degree to rotate, usually -180 ~ 180.
  208. - int/float, a fixed angle.
  209. - tuple of 2 floats/ints, randomly sample a value as the angle between these 2 values.
  210. Returns
  211. -------
  212. numpy.array
  213. An affine transform matrix.
  214. """
  215. if isinstance(angle, tuple):
  216. theta = np.pi / 180 * np.random.uniform(angle[0], angle[1])
  217. else:
  218. theta = np.pi / 180 * angle
  219. rotation_matrix = np.array([[np.cos(theta), np.sin(theta), 0], \
  220. [-np.sin(theta), np.cos(theta), 0], \
  221. [0, 0, 1]])
  222. return rotation_matrix
  223. def affine_horizontal_flip_matrix(prob=0.5):
  224. """Create an affine transformation matrix for image horizontal flipping.
  225. NOTE: In OpenCV, x is width and y is height.
  226. Parameters
  227. ----------
  228. prob : float
  229. Probability to flip the image. 1.0 means always flip.
  230. Returns
  231. -------
  232. numpy.array
  233. An affine transform matrix.
  234. """
  235. factor = np.random.uniform(0, 1)
  236. if prob >= factor:
  237. filp_matrix = np.array([[ -1. , 0., 0. ], \
  238. [ 0., 1., 0. ], \
  239. [ 0., 0., 1. ]])
  240. return filp_matrix
  241. else:
  242. filp_matrix = np.array([[ 1. , 0., 0. ], \
  243. [ 0., 1., 0. ], \
  244. [ 0., 0., 1. ]])
  245. return filp_matrix
  246. def affine_vertical_flip_matrix(prob=0.5):
  247. """Create an affine transformation for image vertical flipping.
  248. NOTE: In OpenCV, x is width and y is height.
  249. Parameters
  250. ----------
  251. prob : float
  252. Probability to flip the image. 1.0 means always flip.
  253. Returns
  254. -------
  255. numpy.array
  256. An affine transform matrix.
  257. """
  258. factor = np.random.uniform(0, 1)
  259. if prob >= factor:
  260. filp_matrix = np.array([[ 1. , 0., 0. ], \
  261. [ 0., -1., 0. ], \
  262. [ 0., 0., 1. ]])
  263. return filp_matrix
  264. else:
  265. filp_matrix = np.array([[ 1. , 0., 0. ], \
  266. [ 0., 1., 0. ], \
  267. [ 0., 0., 1. ]])
  268. return filp_matrix
  269. def affine_shift_matrix(wrg=(-0.1, 0.1), hrg=(-0.1, 0.1), w=200, h=200):
  270. """Create an affine transform matrix for image shifting.
  271. NOTE: In OpenCV, x is width and y is height.
  272. Parameters
  273. -----------
  274. wrg : float or tuple of floats
  275. Range to shift on width axis, -1 ~ 1.
  276. - float, a fixed distance.
  277. - tuple of 2 floats, randomly sample a value as the distance between these 2 values.
  278. hrg : float or tuple of floats
  279. Range to shift on height axis, -1 ~ 1.
  280. - float, a fixed distance.
  281. - tuple of 2 floats, randomly sample a value as the distance between these 2 values.
  282. w, h : int
  283. The width and height of the image.
  284. Returns
  285. -------
  286. numpy.array
  287. An affine transform matrix.
  288. """
  289. if isinstance(wrg, tuple):
  290. tx = np.random.uniform(wrg[0], wrg[1]) * w
  291. else:
  292. tx = wrg * w
  293. if isinstance(hrg, tuple):
  294. ty = np.random.uniform(hrg[0], hrg[1]) * h
  295. else:
  296. ty = hrg * h
  297. shift_matrix = np.array([[1, 0, tx], \
  298. [0, 1, ty], \
  299. [0, 0, 1]])
  300. return shift_matrix
  301. def affine_shear_matrix(x_shear=(-0.1, 0.1), y_shear=(-0.1, 0.1)):
  302. """Create affine transform matrix for image shearing.
  303. NOTE: In OpenCV, x is width and y is height.
  304. Parameters
  305. -----------
  306. shear : tuple of two floats
  307. Percentage of shears for width and height directions.
  308. Returns
  309. -------
  310. numpy.array
  311. An affine transform matrix.
  312. """
  313. # if len(shear) != 2:
  314. # raise AssertionError(
  315. # "shear should be tuple of 2 floats, or you want to use tl.prepro.shear rather than tl.prepro.shear2 ?"
  316. # )
  317. # if isinstance(shear, tuple):
  318. # shear = list(shear)
  319. # if is_random:
  320. # shear[0] = np.random.uniform(-shear[0], shear[0])
  321. # shear[1] = np.random.uniform(-shear[1], shear[1])
  322. if isinstance(x_shear, tuple):
  323. x_shear = np.random.uniform(x_shear[0], x_shear[1])
  324. if isinstance(y_shear, tuple):
  325. y_shear = np.random.uniform(y_shear[0], y_shear[1])
  326. shear_matrix = np.array([[1, x_shear, 0], \
  327. [y_shear, 1, 0], \
  328. [0, 0, 1]])
  329. return shear_matrix
  330. def affine_zoom_matrix(zoom_range=(0.8, 1.1)):
  331. """Create an affine transform matrix for zooming/scaling an image's height and width.
  332. OpenCV format, x is width.
  333. Parameters
  334. -----------
  335. x : numpy.array
  336. An image with dimension of [row, col, channel] (default).
  337. zoom_range : float or tuple of 2 floats
  338. The zooming/scaling ratio, greater than 1 means larger.
  339. - float, a fixed ratio.
  340. - tuple of 2 floats, randomly sample a value as the ratio between these 2 values.
  341. Returns
  342. -------
  343. numpy.array
  344. An affine transform matrix.
  345. """
  346. if isinstance(zoom_range, (float, int)):
  347. scale = zoom_range
  348. elif isinstance(zoom_range, tuple):
  349. scale = np.random.uniform(zoom_range[0], zoom_range[1])
  350. else:
  351. raise Exception("zoom_range: float or tuple of 2 floats")
  352. zoom_matrix = np.array([[scale, 0, 0], \
  353. [0, scale, 0], \
  354. [0, 0, 1]])
  355. return zoom_matrix
  356. def affine_respective_zoom_matrix(w_range=0.8, h_range=1.1):
  357. """Get affine transform matrix for zooming/scaling that height and width are changed independently.
  358. OpenCV format, x is width.
  359. Parameters
  360. -----------
  361. w_range : float or tuple of 2 floats
  362. The zooming/scaling ratio of width, greater than 1 means larger.
  363. - float, a fixed ratio.
  364. - tuple of 2 floats, randomly sample a value as the ratio between 2 values.
  365. h_range : float or tuple of 2 floats
  366. The zooming/scaling ratio of height, greater than 1 means larger.
  367. - float, a fixed ratio.
  368. - tuple of 2 floats, randomly sample a value as the ratio between 2 values.
  369. Returns
  370. -------
  371. numpy.array
  372. An affine transform matrix.
  373. """
  374. if isinstance(h_range, (float, int)):
  375. zy = h_range
  376. elif isinstance(h_range, tuple):
  377. zy = np.random.uniform(h_range[0], h_range[1])
  378. else:
  379. raise Exception("h_range: float or tuple of 2 floats")
  380. if isinstance(w_range, (float, int)):
  381. zx = w_range
  382. elif isinstance(w_range, tuple):
  383. zx = np.random.uniform(w_range[0], w_range[1])
  384. else:
  385. raise Exception("w_range: float or tuple of 2 floats")
  386. zoom_matrix = np.array([[zx, 0, 0], \
  387. [0, zy, 0], \
  388. [0, 0, 1]])
  389. return zoom_matrix
  390. # affine transform
  391. def transform_matrix_offset_center(matrix, y, x):
  392. """Convert the matrix from Cartesian coordinates (the origin in the middle of image) to Image coordinates (the origin on the top-left of image).
  393. Parameters
  394. ----------
  395. matrix : numpy.array
  396. Transform matrix.
  397. x and y : 2 int
  398. Size of image.
  399. Returns
  400. -------
  401. numpy.array
  402. The transform matrix.
  403. Examples
  404. --------
  405. - See ``tl.prepro.rotation``, ``tl.prepro.shear``, ``tl.prepro.zoom``.
  406. """
  407. o_x = (x - 1) / 2.0
  408. o_y = (y - 1) / 2.0
  409. offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
  410. reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
  411. transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
  412. return transform_matrix
  413. def affine_transform(x, transform_matrix, channel_index=2, fill_mode='nearest', cval=0., order=1):
  414. """Return transformed images by given an affine matrix in Scipy format (x is height).
  415. Parameters
  416. ----------
  417. x : numpy.array
  418. An image with dimension of [row, col, channel] (default).
  419. transform_matrix : numpy.array
  420. Transform matrix (offset center), can be generated by ``transform_matrix_offset_center``
  421. channel_index : int
  422. Index of channel, default 2.
  423. fill_mode : str
  424. Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
  425. cval : float
  426. Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0
  427. order : int
  428. The order of interpolation. The order has to be in the range 0-5:
  429. - 0 Nearest-neighbor
  430. - 1 Bi-linear (default)
  431. - 2 Bi-quadratic
  432. - 3 Bi-cubic
  433. - 4 Bi-quartic
  434. - 5 Bi-quintic
  435. - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
  436. Returns
  437. -------
  438. numpy.array
  439. A processed image.
  440. Examples
  441. --------
  442. >>> M_shear = tl.prepro.affine_shear_matrix(intensity=0.2, is_random=False)
  443. >>> M_zoom = tl.prepro.affine_zoom_matrix(zoom_range=0.8)
  444. >>> M_combined = M_shear.dot(M_zoom)
  445. >>> transform_matrix = tl.prepro.transform_matrix_offset_center(M_combined, h, w)
  446. >>> result = tl.prepro.affine_transform(image, transform_matrix)
  447. """
  448. # transform_matrix = transform_matrix_offset_center()
  449. # asdihasid
  450. # asd
  451. x = np.rollaxis(x, channel_index, 0)
  452. final_affine_matrix = transform_matrix[:2, :2]
  453. final_offset = transform_matrix[:2, 2]
  454. channel_images = [
  455. ndi.interpolation.affine_transform(
  456. x_channel, final_affine_matrix, final_offset, order=order, mode=fill_mode, cval=cval
  457. ) for x_channel in x
  458. ]
  459. x = np.stack(channel_images, axis=0)
  460. x = np.rollaxis(x, 0, channel_index + 1)
  461. return x
  462. apply_transform = affine_transform
  463. def affine_transform_cv2(x, transform_matrix, flags=None, border_mode='constant'):
  464. """Return transformed images by given an affine matrix in OpenCV format (x is width). (Powered by OpenCV2, faster than ``tl.prepro.affine_transform``)
  465. Parameters
  466. ----------
  467. x : numpy.array
  468. An image with dimension of [row, col, channel] (default).
  469. transform_matrix : numpy.array
  470. A transform matrix, OpenCV format.
  471. border_mode : str
  472. - `constant`, pad the image with a constant value (i.e. black or 0)
  473. - `replicate`, the row or column at the very edge of the original is replicated to the extra border.
  474. Examples
  475. --------
  476. >>> M_shear = tl.prepro.affine_shear_matrix(intensity=0.2, is_random=False)
  477. >>> M_zoom = tl.prepro.affine_zoom_matrix(zoom_range=0.8)
  478. >>> M_combined = M_shear.dot(M_zoom)
  479. >>> result = tl.prepro.affine_transform_cv2(image, M_combined)
  480. """
  481. rows, cols = x.shape[0], x.shape[1]
  482. if flags is None:
  483. flags = cv2.INTER_AREA
  484. if border_mode is 'constant':
  485. border_mode = cv2.BORDER_CONSTANT
  486. elif border_mode is 'replicate':
  487. border_mode = cv2.BORDER_REPLICATE
  488. else:
  489. raise Exception("unsupport border_mode, check cv.BORDER_ for more details.")
  490. return cv2.warpAffine(x, transform_matrix[0:2,:], \
  491. (cols,rows), flags=flags, borderMode=border_mode)
  492. def affine_transform_keypoints(coords_list, transform_matrix):
  493. """Transform keypoint coordinates according to a given affine transform matrix.
  494. OpenCV format, x is width.
  495. Note that, for pose estimation task, flipping requires maintaining the left and right body information.
  496. We should not flip the left and right body, so please use ``tl.prepro.keypoint_random_flip``.
  497. Parameters
  498. -----------
  499. coords_list : list of list of tuple/list
  500. The coordinates
  501. e.g., the keypoint coordinates of every person in an image.
  502. transform_matrix : numpy.array
  503. Transform matrix, OpenCV format.
  504. Examples
  505. ---------
  506. >>> # 1. get all affine transform matrices
  507. >>> M_rotate = tl.prepro.affine_rotation_matrix(angle=20)
  508. >>> M_flip = tl.prepro.affine_horizontal_flip_matrix(prob=1)
  509. >>> # 2. combine all affine transform matrices to one matrix
  510. >>> M_combined = dot(M_flip).dot(M_rotate)
  511. >>> # 3. transfrom the matrix from Cartesian coordinate (the origin in the middle of image)
  512. >>> # to Image coordinate (the origin on the top-left of image)
  513. >>> transform_matrix = tl.prepro.transform_matrix_offset_center(M_combined, x=w, y=h)
  514. >>> # 4. then we can transfrom the image once for all transformations
  515. >>> result = tl.prepro.affine_transform_cv2(image, transform_matrix) # 76 times faster
  516. >>> # 5. transform keypoint coordinates
  517. >>> coords = [[(50, 100), (100, 100), (100, 50), (200, 200)], [(250, 50), (200, 50), (200, 100)]]
  518. >>> coords_result = tl.prepro.affine_transform_keypoints(coords, transform_matrix)
  519. """
  520. coords_result_list = []
  521. for coords in coords_list:
  522. coords = np.asarray(coords)
  523. coords = coords.transpose([1, 0])
  524. coords = np.insert(coords, 2, 1, axis=0)
  525. # print(coords)
  526. # print(transform_matrix)
  527. coords_result = np.matmul(transform_matrix, coords)
  528. coords_result = coords_result[0:2, :].transpose([1, 0])
  529. coords_result_list.append(coords_result)
  530. return coords_result_list
  531. def projective_transform_by_points(
  532. x, src, dst, map_args=None, output_shape=None, order=1, mode='constant', cval=0.0, clip=True, preserve_range=False
  533. ):
  534. """Projective transform by given coordinates, usually 4 coordinates.
  535. see `scikit-image <http://scikit-image.org/docs/dev/auto_examples/applications/plot_geometric.html>`__.
  536. Parameters
  537. -----------
  538. x : numpy.array
  539. An image with dimension of [row, col, channel] (default).
  540. src : list or numpy
  541. The original coordinates, usually 4 coordinates of (width, height).
  542. dst : list or numpy
  543. The coordinates after transformation, the number of coordinates is the same with src.
  544. map_args : dictionary or None
  545. Keyword arguments passed to inverse map.
  546. output_shape : tuple of 2 int
  547. Shape of the output image generated. By default the shape of the input image is preserved. Note that, even for multi-band images, only rows and columns need to be specified.
  548. order : int
  549. The order of interpolation. The order has to be in the range 0-5:
  550. - 0 Nearest-neighbor
  551. - 1 Bi-linear (default)
  552. - 2 Bi-quadratic
  553. - 3 Bi-cubic
  554. - 4 Bi-quartic
  555. - 5 Bi-quintic
  556. mode : str
  557. One of `constant` (default), `edge`, `symmetric`, `reflect` or `wrap`.
  558. Points outside the boundaries of the input are filled according to the given mode. Modes match the behaviour of numpy.pad.
  559. cval : float
  560. Used in conjunction with mode `constant`, the value outside the image boundaries.
  561. clip : boolean
  562. Whether to clip the output to the range of values of the input image. This is enabled by default, since higher order interpolation may produce values outside the given input range.
  563. preserve_range : boolean
  564. Whether to keep the original range of values. Otherwise, the input image is converted according to the conventions of img_as_float.
  565. Returns
  566. -------
  567. numpy.array
  568. A processed image.
  569. Examples
  570. --------
  571. Assume X is an image from CIFAR-10, i.e. shape == (32, 32, 3)
  572. >>> src = [[0,0],[0,32],[32,0],[32,32]] # [w, h]
  573. >>> dst = [[10,10],[0,32],[32,0],[32,32]]
  574. >>> x = tl.prepro.projective_transform_by_points(X, src, dst)
  575. References
  576. -----------
  577. - `scikit-image : geometric transformations <http://scikit-image.org/docs/dev/auto_examples/applications/plot_geometric.html>`__
  578. - `scikit-image : examples <http://scikit-image.org/docs/dev/auto_examples/index.html>`__
  579. """
  580. if map_args is None:
  581. map_args = {}
  582. # if type(src) is list:
  583. if isinstance(src, list): # convert to numpy
  584. src = np.array(src)
  585. # if type(dst) is list:
  586. if isinstance(dst, list):
  587. dst = np.array(dst)
  588. if np.max(x) > 1: # convert to [0, 1]
  589. x = x / 255
  590. m = transform.ProjectiveTransform()
  591. m.estimate(dst, src)
  592. warped = transform.warp(
  593. x, m, map_args=map_args, output_shape=output_shape, order=order, mode=mode, cval=cval, clip=clip,
  594. preserve_range=preserve_range
  595. )
  596. return warped
  597. # rotate
  598. def rotation(
  599. x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1
  600. ):
  601. """Rotate an image randomly or non-randomly.
  602. Parameters
  603. -----------
  604. x : numpy.array
  605. An image with dimension of [row, col, channel] (default).
  606. rg : int or float
  607. Degree to rotate, usually 0 ~ 180.
  608. is_random : boolean
  609. If True, randomly rotate. Default is False
  610. row_index col_index and channel_index : int
  611. Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
  612. fill_mode : str
  613. Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
  614. cval : float
  615. Value used for points outside the boundaries of the input if mode=`constant`. Default is 0.0
  616. order : int
  617. The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.affine_transform`` and `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
  618. Returns
  619. -------
  620. numpy.array
  621. A processed image.
  622. Examples
  623. ---------
  624. >>> x --> [row, col, 1]
  625. >>> x = tl.prepro.rotation(x, rg=40, is_random=False)
  626. >>> tl.vis.save_image(x, 'im.png')
  627. """
  628. if is_random:
  629. theta = np.pi / 180 * np.random.uniform(-rg, rg)
  630. else:
  631. theta = np.pi / 180 * rg
  632. rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]])
  633. h, w = x.shape[row_index], x.shape[col_index]
  634. transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
  635. x = affine_transform(x, transform_matrix, channel_index, fill_mode, cval, order)
  636. return x
  637. def rotation_multi(
  638. x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1
  639. ):
  640. """Rotate multiple images with the same arguments, randomly or non-randomly.
  641. Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
  642. Parameters
  643. -----------
  644. x : list of numpy.array
  645. List of images with dimension of [n_images, row, col, channel] (default).
  646. others : args
  647. See ``tl.prepro.rotation``.
  648. Returns
  649. -------
  650. numpy.array
  651. A list of processed images.
  652. Examples
  653. --------
  654. >>> x, y --> [row, col, 1] greyscale
  655. >>> x, y = tl.prepro.rotation_multi([x, y], rg=90, is_random=False)
  656. """
  657. if is_random:
  658. theta = np.pi / 180 * np.random.uniform(-rg, rg)
  659. else:
  660. theta = np.pi / 180 * rg
  661. rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]])
  662. h, w = x[0].shape[row_index], x[0].shape[col_index]
  663. transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
  664. results = []
  665. for data in x:
  666. results.append(affine_transform(data, transform_matrix, channel_index, fill_mode, cval, order))
  667. return np.asarray(results)
  668. # crop
  669. def crop(x, wrg, hrg, is_random=False, row_index=0, col_index=1):
  670. """Randomly or centrally crop an image.
  671. Parameters
  672. ----------
  673. x : numpy.array
  674. An image with dimension of [row, col, channel] (default).
  675. wrg : int
  676. Size of width.
  677. hrg : int
  678. Size of height.
  679. is_random : boolean,
  680. If True, randomly crop, else central crop. Default is False.
  681. row_index: int
  682. index of row.
  683. col_index: int
  684. index of column.
  685. Returns
  686. -------
  687. numpy.array
  688. A processed image.
  689. """
  690. h, w = x.shape[row_index], x.shape[col_index]
  691. if (h < hrg) or (w < wrg):
  692. raise AssertionError("The size of cropping should smaller than or equal to the original image")
  693. if is_random:
  694. h_offset = int(np.random.uniform(0, h - hrg))
  695. w_offset = int(np.random.uniform(0, w - wrg))
  696. # tl.logging.info(h_offset, w_offset, x[h_offset: hrg+h_offset ,w_offset: wrg+w_offset].shape)
  697. return x[h_offset:hrg + h_offset, w_offset:wrg + w_offset]
  698. else: # central crop
  699. h_offset = int(np.floor((h - hrg) / 2.))
  700. w_offset = int(np.floor((w - wrg) / 2.))
  701. h_end = h_offset + hrg
  702. w_end = w_offset + wrg
  703. return x[h_offset:h_end, w_offset:w_end]
  704. # old implementation
  705. # h_offset = (h - hrg)/2
  706. # w_offset = (w - wrg)/2
  707. # tl.logging.info(x[h_offset: h-h_offset ,w_offset: w-w_offset].shape)
  708. # return x[h_offset: h-h_offset ,w_offset: w-w_offset]
  709. # central crop
  710. def crop_multi(x, wrg, hrg, is_random=False, row_index=0, col_index=1):
  711. """Randomly or centrally crop multiple images.
  712. Parameters
  713. ----------
  714. x : list of numpy.array
  715. List of images with dimension of [n_images, row, col, channel] (default).
  716. others : args
  717. See ``tl.prepro.crop``.
  718. Returns
  719. -------
  720. numpy.array
  721. A list of processed images.
  722. """
  723. h, w = x[0].shape[row_index], x[0].shape[col_index]
  724. if (h < hrg) or (w < wrg):
  725. raise AssertionError("The size of cropping should smaller than or equal to the original image")
  726. if is_random:
  727. h_offset = int(np.random.uniform(0, h - hrg))
  728. w_offset = int(np.random.uniform(0, w - wrg))
  729. results = []
  730. for data in x:
  731. results.append(data[h_offset:hrg + h_offset, w_offset:wrg + w_offset])
  732. return np.asarray(results)
  733. else:
  734. # central crop
  735. h_offset = int(np.floor((h - hrg) / 2.))
  736. w_offset = int(np.floor((w - wrg) / 2.))
  737. results = []
  738. for data in x:
  739. results.append(data[h_offset:h - h_offset, w_offset:w - w_offset])
  740. return np.asarray(results)
  741. # flip
  742. def flip_axis(x, axis=1, is_random=False):
  743. """Flip the axis of an image, such as flip left and right, up and down, randomly or non-randomly,
  744. Parameters
  745. ----------
  746. x : numpy.array
  747. An image with dimension of [row, col, channel] (default).
  748. axis : int
  749. Which axis to flip.
  750. - 0, flip up and down
  751. - 1, flip left and right
  752. - 2, flip channel
  753. is_random : boolean
  754. If True, randomly flip. Default is False.
  755. Returns
  756. -------
  757. numpy.array
  758. A processed image.
  759. """
  760. if is_random:
  761. factor = np.random.uniform(-1, 1)
  762. if factor > 0:
  763. x = np.asarray(x).swapaxes(axis, 0)
  764. x = x[::-1, ...]
  765. x = x.swapaxes(0, axis)
  766. return x
  767. else:
  768. return x
  769. else:
  770. x = np.asarray(x).swapaxes(axis, 0)
  771. x = x[::-1, ...]
  772. x = x.swapaxes(0, axis)
  773. return x
  774. def flip_axis_multi(x, axis, is_random=False):
  775. """Flip the axises of multiple images together, such as flip left and right, up and down, randomly or non-randomly,
  776. Parameters
  777. -----------
  778. x : list of numpy.array
  779. List of images with dimension of [n_images, row, col, channel] (default).
  780. others : args
  781. See ``tl.prepro.flip_axis``.
  782. Returns
  783. -------
  784. numpy.array
  785. A list of processed images.
  786. """
  787. if is_random:
  788. factor = np.random.uniform(-1, 1)
  789. if factor > 0:
  790. # x = np.asarray(x).swapaxes(axis, 0)
  791. # x = x[::-1, ...]
  792. # x = x.swapaxes(0, axis)
  793. # return x
  794. results = []
  795. for data in x:
  796. data = np.asarray(data).swapaxes(axis, 0)
  797. data = data[::-1, ...]
  798. data = data.swapaxes(0, axis)
  799. results.append(data)
  800. return np.asarray(results)
  801. else:
  802. return np.asarray(x)
  803. else:
  804. # x = np.asarray(x).swapaxes(axis, 0)
  805. # x = x[::-1, ...]
  806. # x = x.swapaxes(0, axis)
  807. # return x
  808. results = []
  809. for data in x:
  810. data = np.asarray(data).swapaxes(axis, 0)
  811. data = data[::-1, ...]
  812. data = data.swapaxes(0, axis)
  813. results.append(data)
  814. return np.asarray(results)
  815. # shift
  816. def shift(
  817. x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0.,
  818. order=1
  819. ):
  820. """Shift an image randomly or non-randomly.
  821. Parameters
  822. -----------
  823. x : numpy.array
  824. An image with dimension of [row, col, channel] (default).
  825. wrg : float
  826. Percentage of shift in axis x, usually -0.25 ~ 0.25.
  827. hrg : float
  828. Percentage of shift in axis y, usually -0.25 ~ 0.25.
  829. is_random : boolean
  830. If True, randomly shift. Default is False.
  831. row_index col_index and channel_index : int
  832. Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
  833. fill_mode : str
  834. Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
  835. cval : float
  836. Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0.
  837. order : int
  838. The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.affine_transform`` and `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
  839. Returns
  840. -------
  841. numpy.array
  842. A processed image.
  843. """
  844. h, w = x.shape[row_index], x.shape[col_index]
  845. if is_random:
  846. tx = np.random.uniform(-hrg, hrg) * h
  847. ty = np.random.uniform(-wrg, wrg) * w
  848. else:
  849. tx, ty = hrg * h, wrg * w
  850. translation_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]])
  851. transform_matrix = translation_matrix # no need to do offset
  852. x = affine_transform(x, transform_matrix, channel_index, fill_mode, cval, order)
  853. return x
  854. def shift_multi(
  855. x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0.,
  856. order=1
  857. ):
  858. """Shift images with the same arguments, randomly or non-randomly.
  859. Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
  860. Parameters
  861. -----------
  862. x : list of numpy.array
  863. List of images with dimension of [n_images, row, col, channel] (default).
  864. others : args
  865. See ``tl.prepro.shift``.
  866. Returns
  867. -------
  868. numpy.array
  869. A list of processed images.
  870. """
  871. h, w = x[0].shape[row_index], x[0].shape[col_index]
  872. if is_random:
  873. tx = np.random.uniform(-hrg, hrg) * h
  874. ty = np.random.uniform(-wrg, wrg) * w
  875. else:
  876. tx, ty = hrg * h, wrg * w
  877. translation_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]])
  878. transform_matrix = translation_matrix # no need to do offset
  879. results = []
  880. for data in x:
  881. results.append(affine_transform(data, transform_matrix, channel_index, fill_mode, cval, order))
  882. return np.asarray(results)
  883. # shear
  884. def shear(
  885. x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1
  886. ):
  887. """Shear an image randomly or non-randomly.
  888. Parameters
  889. -----------
  890. x : numpy.array
  891. An image with dimension of [row, col, channel] (default).
  892. intensity : float
  893. Percentage of shear, usually -0.5 ~ 0.5 (is_random==True), 0 ~ 0.5 (is_random==False),
  894. you can have a quick try by shear(X, 1).
  895. is_random : boolean
  896. If True, randomly shear. Default is False.
  897. row_index col_index and channel_index : int
  898. Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
  899. fill_mode : str
  900. Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see and `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
  901. cval : float
  902. Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0.
  903. order : int
  904. The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.affine_transform`` and `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
  905. Returns
  906. -------
  907. numpy.array
  908. A processed image.
  909. References
  910. -----------
  911. - `Affine transformation <https://uk.mathworks.com/discovery/affine-transformation.html>`__
  912. """
  913. if is_random:
  914. shear = np.random.uniform(-intensity, intensity)
  915. else:
  916. shear = intensity
  917. shear_matrix = np.array([[1, -np.sin(shear), 0], [0, np.cos(shear), 0], [0, 0, 1]])
  918. h, w = x.shape[row_index], x.shape[col_index]
  919. transform_matrix = transform_matrix_offset_center(shear_matrix, h, w)
  920. x = affine_transform(x, transform_matrix, channel_index, fill_mode, cval, order)
  921. return x
  922. def shear_multi(
  923. x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1
  924. ):
  925. """Shear images with the same arguments, randomly or non-randomly.
  926. Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
  927. Parameters
  928. -----------
  929. x : list of numpy.array
  930. List of images with dimension of [n_images, row, col, channel] (default).
  931. others : args
  932. See ``tl.prepro.shear``.
  933. Returns
  934. -------
  935. numpy.array
  936. A list of processed images.
  937. """
  938. if is_random:
  939. shear = np.random.uniform(-intensity, intensity)
  940. else:
  941. shear = intensity
  942. shear_matrix = np.array([[1, -np.sin(shear), 0], [0, np.cos(shear), 0], [0, 0, 1]])
  943. h, w = x[0].shape[row_index], x[0].shape[col_index]
  944. transform_matrix = transform_matrix_offset_center(shear_matrix, h, w)
  945. results = []
  946. for data in x:
  947. results.append(affine_transform(data, transform_matrix, channel_index, fill_mode, cval, order))
  948. return np.asarray(results)
  949. def shear2(
  950. x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0.,
  951. order=1
  952. ):
  953. """Shear an image randomly or non-randomly.
  954. Parameters
  955. -----------
  956. x : numpy.array
  957. An image with dimension of [row, col, channel] (default).
  958. shear : tuple of two floats
  959. Percentage of shear for height and width direction (0, 1).
  960. is_random : boolean
  961. If True, randomly shear. Default is False.
  962. row_index col_index and channel_index : int
  963. Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
  964. fill_mode : str
  965. Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
  966. cval : float
  967. Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0.
  968. order : int
  969. The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.affine_transform`` and `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
  970. Returns
  971. -------
  972. numpy.array
  973. A processed image.
  974. References
  975. -----------
  976. - `Affine transformation <https://uk.mathworks.com/discovery/affine-transformation.html>`__
  977. """
  978. if len(shear) != 2:
  979. raise AssertionError(
  980. "shear should be tuple of 2 floats, or you want to use tl.prepro.shear rather than tl.prepro.shear2 ?"
  981. )
  982. if isinstance(shear, tuple):
  983. shear = list(shear)
  984. if is_random:
  985. shear[0] = np.random.uniform(-shear[0], shear[0])
  986. shear[1] = np.random.uniform(-shear[1], shear[1])
  987. shear_matrix = np.array([[1, shear[0], 0], \
  988. [shear[1], 1, 0], \
  989. [0, 0, 1]])
  990. h, w = x.shape[row_index], x.shape[col_index]
  991. transform_matrix = transform_matrix_offset_center(shear_matrix, h, w)
  992. x = affine_transform(x, transform_matrix, channel_index, fill_mode, cval, order)
  993. return x
  994. def shear_multi2(
  995. x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0.,
  996. order=1
  997. ):
  998. """Shear images with the same arguments, randomly or non-randomly.
  999. Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
  1000. Parameters
  1001. -----------
  1002. x : list of numpy.array
  1003. List of images with dimension of [n_images, row, col, channel] (default).
  1004. others : args
  1005. See ``tl.prepro.shear2``.
  1006. Returns
  1007. -------
  1008. numpy.array
  1009. A list of processed images.
  1010. """
  1011. if len(shear) != 2:
  1012. raise AssertionError(
  1013. "shear should be tuple of 2 floats, or you want to use tl.prepro.shear_multi rather than tl.prepro.shear_multi2 ?"
  1014. )
  1015. if isinstance(shear, tuple):
  1016. shear = list(shear)
  1017. if is_random:
  1018. shear[0] = np.random.uniform(-shear[0], shear[0])
  1019. shear[1] = np.random.uniform(-shear[1], shear[1])
  1020. shear_matrix = np.array([[1, shear[0], 0], [shear[1], 1, 0], [0, 0, 1]])
  1021. h, w = x[0].shape[row_index], x[0].shape[col_index]
  1022. transform_matrix = transform_matrix_offset_center(shear_matrix, h, w)
  1023. results = []
  1024. for data in x:
  1025. results.append(affine_transform(data, transform_matrix, channel_index, fill_mode, cval, order))
  1026. return np.asarray(results)
  1027. # swirl
  1028. def swirl(
  1029. x, center=None, strength=1, radius=100, rotation=0, output_shape=None, order=1, mode='constant', cval=0, clip=True,
  1030. preserve_range=False, is_random=False
  1031. ):
  1032. """Swirl an image randomly or non-randomly, see `scikit-image swirl API <http://scikit-image.org/docs/dev/api/skimage.transform.html#skimage.transform.swirl>`__
  1033. and `example <http://scikit-image.org/docs/dev/auto_examples/plot_swirl.html>`__.
  1034. Parameters
  1035. -----------
  1036. x : numpy.array
  1037. An image with dimension of [row, col, channel] (default).
  1038. center : tuple or 2 int or None
  1039. Center coordinate of transformation (optional).
  1040. strength : float
  1041. The amount of swirling applied.
  1042. radius : float
  1043. The extent of the swirl in pixels. The effect dies out rapidly beyond radius.
  1044. rotation : float
  1045. Additional rotation applied to the image, usually [0, 360], relates to center.
  1046. output_shape : tuple of 2 int or None
  1047. Shape of the output image generated (height, width). By default the shape of the input image is preserved.
  1048. order : int, optional
  1049. The order of the spline interpolation, default is 1. The order has to be in the range 0-5. See skimage.transform.warp for detail.
  1050. mode : str
  1051. One of `constant` (default), `edge`, `symmetric` `reflect` and `wrap`.
  1052. Points outside the boundaries of the input are filled according to the given mode, with `constant` used as the default. Modes match the behaviour of numpy.pad.
  1053. cval : float
  1054. Used in conjunction with mode `constant`, the value outside the image boundaries.
  1055. clip : boolean
  1056. Whether to clip the output to the range of values of the input image. This is enabled by default, since higher order interpolation may produce values outside the given input range.
  1057. preserve_range : boolean
  1058. Whether to keep the original range of values. Otherwise, the input image is converted according to the conventions of img_as_float.
  1059. is_random : boolean,
  1060. If True, random swirl. Default is False.
  1061. - random center = [(0 ~ x.shape[0]), (0 ~ x.shape[1])]
  1062. - random strength = [0, strength]
  1063. - random radius = [1e-10, radius]
  1064. - random rotation = [-rotation, rotation]
  1065. Returns
  1066. -------
  1067. numpy.array
  1068. A processed image.
  1069. Examples
  1070. ---------
  1071. >>> x --> [row, col, 1] greyscale
  1072. >>> x = tl.prepro.swirl(x, strength=4, radius=100)
  1073. """
  1074. if radius == 0:
  1075. raise AssertionError("Invalid radius value")
  1076. rotation = np.pi / 180 * rotation
  1077. if is_random:
  1078. center_h = int(np.random.uniform(0, x.shape[0]))
  1079. center_w = int(np.random.uniform(0, x.shape[1]))
  1080. center = (center_h, center_w)
  1081. strength = np.random.uniform(0, strength)
  1082. radius = np.random.uniform(1e-10, radius)
  1083. rotation = np.random.uniform(-rotation, rotation)
  1084. max_v = np.max(x)
  1085. if max_v > 1: # Note: the input of this fn should be [-1, 1], rescale is required.
  1086. x = x / max_v
  1087. swirled = skimage.transform.swirl(
  1088. x, center=center, strength=strength, radius=radius, rotation=rotation, output_shape=output_shape, order=order,
  1089. mode=mode, cval=cval, clip=clip, preserve_range=preserve_range
  1090. )
  1091. if max_v > 1:
  1092. swirled = swirled * max_v
  1093. return swirled
  1094. def swirl_multi(
  1095. x, center=None, strength=1, radius=100, rotation=0, output_shape=None, order=1, mode='constant', cval=0, clip=True,
  1096. preserve_range=False, is_random=False
  1097. ):
  1098. """Swirl multiple images with the same arguments, randomly or non-randomly.
  1099. Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
  1100. Parameters
  1101. -----------
  1102. x : list of numpy.array
  1103. List of images with dimension of [n_images, row, col, channel] (default).
  1104. others : args
  1105. See ``tl.prepro.swirl``.
  1106. Returns
  1107. -------
  1108. numpy.array
  1109. A list of processed images.
  1110. """
  1111. if radius == 0:
  1112. raise AssertionError("Invalid radius value")
  1113. rotation = np.pi / 180 * rotation
  1114. if is_random:
  1115. center_h = int(np.random.uniform(0, x[0].shape[0]))
  1116. center_w = int(np.random.uniform(0, x[0].shape[1]))
  1117. center = (center_h, center_w)
  1118. strength = np.random.uniform(0, strength)
  1119. radius = np.random.uniform(1e-10, radius)
  1120. rotation = np.random.uniform(-rotation, rotation)
  1121. results = []
  1122. for data in x:
  1123. max_v = np.max(data)
  1124. if max_v > 1: # Note: the input of this fn should be [-1, 1], rescale is required.
  1125. data = data / max_v
  1126. swirled = skimage.transform.swirl(
  1127. data, center=center, strength=strength, radius=radius, rotation=rotation, output_shape=output_shape,
  1128. order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range
  1129. )
  1130. if max_v > 1:
  1131. swirled = swirled * max_v
  1132. results.append(swirled)
  1133. return np.asarray(results)
  1134. # elastic_transform
  1135. def elastic_transform(x, alpha, sigma, mode="constant", cval=0, is_random=False):
  1136. """Elastic transformation for image as described in `[Simard2003] <http://deeplearning.cs.cmu.edu/pdfs/Simard.pdf>`__.
  1137. Parameters
  1138. -----------
  1139. x : numpy.array
  1140. A greyscale image.
  1141. alpha : float
  1142. Alpha value for elastic transformation.
  1143. sigma : float or sequence of float
  1144. The smaller the sigma, the more transformation. Standard deviation for Gaussian kernel. The standard deviations of the Gaussian filter are given for each axis as a sequence, or as a single number, in which case it is equal for all axes.
  1145. mode : str
  1146. See `scipy.ndimage.filters.gaussian_filter <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.filters.gaussian_filter.html>`__. Default is `constant`.
  1147. cval : float,
  1148. Used in conjunction with `mode` of `constant`, the value outside the image boundaries.
  1149. is_random : boolean
  1150. Default is False.
  1151. Returns
  1152. -------
  1153. numpy.array
  1154. A processed image.
  1155. Examples
  1156. ---------
  1157. >>> x = tl.prepro.elastic_transform(x, alpha=x.shape[1]*3, sigma=x.shape[1]*0.07)
  1158. References
  1159. ------------
  1160. - `Github <https://gist.github.com/chsasank/4d8f68caf01f041a6453e67fb30f8f5a>`__.
  1161. - `Kaggle <https://www.kaggle.com/pscion/ultrasound-nerve-segmentation/elastic-transform-for-data-augmentation-0878921a>`__
  1162. """
  1163. if is_random is False:
  1164. random_state = np.random.RandomState(None)
  1165. else:
  1166. random_state = np.random.RandomState(int(time.time()))
  1167. #
  1168. is_3d = False
  1169. if len(x.shape) == 3 and x.shape[-1] == 1:
  1170. x = x[:, :, 0]
  1171. is_3d = True
  1172. elif len(x.shape) == 3 and x.shape[-1] != 1:
  1173. raise Exception("Only support greyscale image")
  1174. if len(x.shape) != 2:
  1175. raise AssertionError("input should be grey-scale image")
  1176. shape = x.shape
  1177. dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode=mode, cval=cval) * alpha
  1178. dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode=mode, cval=cval) * alpha
  1179. x_, y_ = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij')
  1180. indices = np.reshape(x_ + dx, (-1, 1)), np.reshape(y_ + dy, (-1, 1))
  1181. if is_3d:
  1182. return map_coordinates(x, indices, order=1).reshape((shape[0], shape[1], 1))
  1183. else:
  1184. return map_coordinates(x, indices, order=1).reshape(shape)
  1185. def elastic_transform_multi(x, alpha, sigma, mode="constant", cval=0, is_random=False):
  1186. """Elastic transformation for images as described in `[Simard2003] <http://deeplearning.cs.cmu.edu/pdfs/Simard.pdf>`__.
  1187. Parameters
  1188. -----------
  1189. x : list of numpy.array
  1190. List of greyscale images.
  1191. others : args
  1192. See ``tl.prepro.elastic_transform``.
  1193. Returns
  1194. -------
  1195. numpy.array
  1196. A list of processed images.
  1197. """
  1198. if is_random is False:
  1199. random_state = np.random.RandomState(None)
  1200. else:
  1201. random_state = np.random.RandomState(int(time.time()))
  1202. shape = x[0].shape
  1203. if len(shape) == 3:
  1204. shape = (shape[0], shape[1])
  1205. new_shape = random_state.rand(*shape)
  1206. results = []
  1207. for data in x:
  1208. is_3d = False
  1209. if len(data.shape) == 3 and data.shape[-1] == 1:
  1210. data = data[:, :, 0]
  1211. is_3d = True
  1212. elif len(data.shape) == 3 and data.shape[-1] != 1:
  1213. raise Exception("Only support greyscale image")
  1214. if len(data.shape) != 2:
  1215. raise AssertionError("input should be grey-scale image")
  1216. dx = gaussian_filter((new_shape * 2 - 1), sigma, mode=mode, cval=cval) * alpha
  1217. dy = gaussian_filter((new_shape * 2 - 1), sigma, mode=mode, cval=cval) * alpha
  1218. x_, y_ = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij')
  1219. indices = np.reshape(x_ + dx, (-1, 1)), np.reshape(y_ + dy, (-1, 1))
  1220. # tl.logging.info(data.shape)
  1221. if is_3d:
  1222. results.append(map_coordinates(data, indices, order=1).reshape((shape[0], shape[1], 1)))
  1223. else:
  1224. results.append(map_coordinates(data, indices, order=1).reshape(shape))
  1225. return np.asarray(results)
  1226. # zoom
  1227. def zoom(x, zoom_range=(0.9, 1.1), flags=None, border_mode='constant'):
  1228. """Zooming/Scaling a single image that height and width are changed together.
  1229. Parameters
  1230. -----------
  1231. x : numpy.array
  1232. An image with dimension of [row, col, channel] (default).
  1233. zoom_range : float or tuple of 2 floats
  1234. The zooming/scaling ratio, greater than 1 means larger.
  1235. - float, a fixed ratio.
  1236. - tuple of 2 floats, randomly sample a value as the ratio between 2 values.
  1237. border_mode : str
  1238. - `constant`, pad the image with a constant value (i.e. black or 0)
  1239. - `replicate`, the row or column at the very edge of the original is replicated to the extra border.
  1240. Returns
  1241. -------
  1242. numpy.array
  1243. A processed image.
  1244. """
  1245. zoom_matrix = affine_zoom_matrix(zoom_range=zoom_range)
  1246. h, w = x.shape[0], x.shape[1]
  1247. transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
  1248. x = affine_transform_cv2(x, transform_matrix, flags=flags, border_mode=border_mode)
  1249. return x
  1250. def respective_zoom(x, h_range=(0.9, 1.1), w_range=(0.9, 1.1), flags=None, border_mode='constant'):
  1251. """Zooming/Scaling a single image that height and width are changed independently.
  1252. Parameters
  1253. -----------
  1254. x : numpy.array
  1255. An image with dimension of [row, col, channel] (default).
  1256. h_range : float or tuple of 2 floats
  1257. The zooming/scaling ratio of height, greater than 1 means larger.
  1258. - float, a fixed ratio.
  1259. - tuple of 2 floats, randomly sample a value as the ratio between 2 values.
  1260. w_range : float or tuple of 2 floats
  1261. The zooming/scaling ratio of width, greater than 1 means larger.
  1262. - float, a fixed ratio.
  1263. - tuple of 2 floats, randomly sample a value as the ratio between 2 values.
  1264. border_mode : str
  1265. - `constant`, pad the image with a constant value (i.e. black or 0)
  1266. - `replicate`, the row or column at the very edge of the original is replicated to the extra border.
  1267. Returns
  1268. -------
  1269. numpy.array
  1270. A processed image.
  1271. """
  1272. zoom_matrix = affine_respective_zoom_matrix(h_range=h_range, w_range=w_range)
  1273. h, w = x.shape[0], x.shape[1]
  1274. transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
  1275. x = affine_transform_cv2(
  1276. x, transform_matrix, flags=flags, border_mode=border_mode
  1277. ) #affine_transform(x, transform_matrix, channel_index, fill_mode, cval, order)
  1278. return x
  1279. def zoom_multi(x, zoom_range=(0.9, 1.1), flags=None, border_mode='constant'):
  1280. """Zoom in and out of images with the same arguments, randomly or non-randomly.
  1281. Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
  1282. Parameters
  1283. -----------
  1284. x : list of numpy.array
  1285. List of images with dimension of [n_images, row, col, channel] (default).
  1286. others : args
  1287. See ``tl.prepro.zoom``.
  1288. Returns
  1289. -------
  1290. numpy.array
  1291. A list of processed images.
  1292. """
  1293. zoom_matrix = affine_zoom_matrix(zoom_range=zoom_range)
  1294. results = []
  1295. for img in x:
  1296. h, w = x.shape[0], x.shape[1]
  1297. transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
  1298. results.append(affine_transform_cv2(x, transform_matrix, flags=flags, border_mode=border_mode))
  1299. return results
  1300. # image = tf.image.random_brightness(image, max_delta=32. / 255.)
  1301. # image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
  1302. # image = tf.image.random_hue(image, max_delta=0.032)
  1303. # image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
  1304. def brightness(x, gamma=1, gain=1, is_random=False):
  1305. """Change the brightness of a single image, randomly or non-randomly.
  1306. Parameters
  1307. -----------
  1308. x : numpy.array
  1309. An image with dimension of [row, col, channel] (default).
  1310. gamma : float
  1311. Non negative real number. Default value is 1.
  1312. - Small than 1 means brighter.
  1313. - If `is_random` is True, gamma in a range of (1-gamma, 1+gamma).
  1314. gain : float
  1315. The constant multiplier. Default value is 1.
  1316. is_random : boolean
  1317. If True, randomly change brightness. Default is False.
  1318. Returns
  1319. -------
  1320. numpy.array
  1321. A processed image.
  1322. References
  1323. -----------
  1324. - `skimage.exposure.adjust_gamma <http://scikit-image.org/docs/dev/api/skimage.exposure.html>`__
  1325. - `chinese blog <http://www.cnblogs.com/denny402/p/5124402.html>`__
  1326. """
  1327. if is_random:
  1328. gamma = np.random.uniform(1 - gamma, 1 + gamma)
  1329. x = exposure.adjust_gamma(x, gamma, gain)
  1330. return x
  1331. def brightness_multi(x, gamma=1, gain=1, is_random=False):
  1332. """Change the brightness of multiply images, randomly or non-randomly.
  1333. Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
  1334. Parameters
  1335. -----------
  1336. x : list of numpyarray
  1337. List of images with dimension of [n_images, row, col, channel] (default).
  1338. others : args
  1339. See ``tl.prepro.brightness``.
  1340. Returns
  1341. -------
  1342. numpy.array
  1343. A list of processed images.
  1344. """
  1345. if is_random:
  1346. gamma = np.random.uniform(1 - gamma, 1 + gamma)
  1347. results = []
  1348. for data in x:
  1349. results.append(exposure.adjust_gamma(data, gamma, gain))
  1350. return np.asarray(results)
  1351. def illumination(x, gamma=1., contrast=1., saturation=1., is_random=False):
  1352. """Perform illumination augmentation for a single image, randomly or non-randomly.
  1353. Parameters
  1354. -----------
  1355. x : numpy.array
  1356. An image with dimension of [row, col, channel] (default).
  1357. gamma : float
  1358. Change brightness (the same with ``tl.prepro.brightness``)
  1359. - if is_random=False, one float number, small than one means brighter, greater than one means darker.
  1360. - if is_random=True, tuple of two float numbers, (min, max).
  1361. contrast : float
  1362. Change contrast.
  1363. - if is_random=False, one float number, small than one means blur.
  1364. - if is_random=True, tuple of two float numbers, (min, max).
  1365. saturation : float
  1366. Change saturation.
  1367. - if is_random=False, one float number, small than one means unsaturation.
  1368. - if is_random=True, tuple of two float numbers, (min, max).
  1369. is_random : boolean
  1370. If True, randomly change illumination. Default is False.
  1371. Returns
  1372. -------
  1373. numpy.array
  1374. A processed image.
  1375. Examples
  1376. ---------
  1377. Random
  1378. >>> x = tl.prepro.illumination(x, gamma=(0.5, 5.0), contrast=(0.3, 1.0), saturation=(0.7, 1.0), is_random=True)
  1379. Non-random
  1380. >>> x = tl.prepro.illumination(x, 0.5, 0.6, 0.8, is_random=False)
  1381. """
  1382. if is_random:
  1383. if not (len(gamma) == len(contrast) == len(saturation) == 2):
  1384. raise AssertionError("if is_random = True, the arguments are (min, max)")
  1385. ## random change brightness # small --> brighter
  1386. illum_settings = np.random.randint(0, 3) # 0-brighter, 1-darker, 2 keep normal
  1387. if illum_settings == 0: # brighter
  1388. gamma = np.random.uniform(gamma[0], 1.0) # (.5, 1.0)
  1389. elif illum_settings == 1: # darker
  1390. gamma = np.random.uniform(1.0, gamma[1]) # (1.0, 5.0)
  1391. else:
  1392. gamma = 1
  1393. im_ = brightness(x, gamma=gamma, gain=1, is_random=False)
  1394. # tl.logging.info("using contrast and saturation")
  1395. image = PIL.Image.fromarray(im_) # array -> PIL
  1396. contrast_adjust = PIL.ImageEnhance.Contrast(image)
  1397. image = contrast_adjust.enhance(np.random.uniform(contrast[0], contrast[1])) #0.3,0.9))
  1398. saturation_adjust = PIL.ImageEnhance.Color(image)
  1399. image = saturation_adjust.enhance(np.random.uniform(saturation[0], saturation[1])) # (0.7,1.0))
  1400. im_ = np.array(image) # PIL -> array
  1401. else:
  1402. im_ = brightness(x, gamma=gamma, gain=1, is_random=False)
  1403. image = PIL.Image.fromarray(im_) # array -> PIL
  1404. contrast_adjust = PIL.ImageEnhance.Contrast(image)
  1405. image = contrast_adjust.enhance(contrast)
  1406. saturation_adjust = PIL.ImageEnhance.Color(image)
  1407. image = saturation_adjust.enhance(saturation)
  1408. im_ = np.array(image) # PIL -> array
  1409. return np.asarray(im_)
  1410. def rgb_to_hsv(rgb):
  1411. """Input RGB image [0~255] return HSV image [0~1].
  1412. Parameters
  1413. ------------
  1414. rgb : numpy.array
  1415. An image with values between 0 and 255.
  1416. Returns
  1417. -------
  1418. numpy.array
  1419. A processed image.
  1420. """
  1421. # Translated from source of colorsys.rgb_to_hsv
  1422. # r,g,b should be a numpy arrays with values between 0 and 255
  1423. # rgb_to_hsv returns an array of floats between 0.0 and 1.0.
  1424. rgb = rgb.astype('float')
  1425. hsv = np.zeros_like(rgb)
  1426. # in case an RGBA array was passed, just copy the A channel
  1427. hsv[..., 3:] = rgb[..., 3:]
  1428. r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2]
  1429. maxc = np.max(rgb[..., :3], axis=-1)
  1430. minc = np.min(rgb[..., :3], axis=-1)
  1431. hsv[..., 2] = maxc
  1432. mask = maxc != minc
  1433. hsv[mask, 1] = (maxc - minc)[mask] / maxc[mask]
  1434. rc = np.zeros_like(r)
  1435. gc = np.zeros_like(g)
  1436. bc = np.zeros_like(b)
  1437. rc[mask] = (maxc - r)[mask] / (maxc - minc)[mask]
  1438. gc[mask] = (maxc - g)[mask] / (maxc - minc)[mask]
  1439. bc[mask] = (maxc - b)[mask] / (maxc - minc)[mask]
  1440. hsv[..., 0] = np.select([r == maxc, g == maxc], [bc - gc, 2.0 + rc - bc], default=4.0 + gc - rc)
  1441. hsv[..., 0] = (hsv[..., 0] / 6.0) % 1.0
  1442. return hsv
  1443. def hsv_to_rgb(hsv):
  1444. """Input HSV image [0~1] return RGB image [0~255].
  1445. Parameters
  1446. -------------
  1447. hsv : numpy.array
  1448. An image with values between 0.0 and 1.0
  1449. Returns
  1450. -------
  1451. numpy.array
  1452. A processed image.
  1453. """
  1454. # Translated from source of colorsys.hsv_to_rgb
  1455. # h,s should be a numpy arrays with values between 0.0 and 1.0
  1456. # v should be a numpy array with values between 0.0 and 255.0
  1457. # hsv_to_rgb returns an array of uints between 0 and 255.
  1458. rgb = np.empty_like(hsv)
  1459. rgb[..., 3:] = hsv[..., 3:]
  1460. h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2]
  1461. i = (h * 6.0).astype('uint8')
  1462. f = (h * 6.0) - i
  1463. p = v * (1.0 - s)
  1464. q = v * (1.0 - s * f)
  1465. t = v * (1.0 - s * (1.0 - f))
  1466. i = i % 6
  1467. conditions = [s == 0.0, i == 1, i == 2, i == 3, i == 4, i == 5]
  1468. rgb[..., 0] = np.select(conditions, [v, q, p, p, t, v], default=v)
  1469. rgb[..., 1] = np.select(conditions, [v, v, v, q, p, p], default=t)
  1470. rgb[..., 2] = np.select(conditions, [v, p, t, v, v, q], default=p)
  1471. return rgb.astype('uint8')
  1472. def adjust_hue(im, hout=0.66, is_offset=True, is_clip=True, is_random=False):
  1473. """Adjust hue of an RGB image.
  1474. This is a convenience method that converts an RGB image to float representation, converts it to HSV, add an offset to the hue channel, converts back to RGB and then back to the original data type.
  1475. For TF, see `tf.image.adjust_hue <https://www.tensorflow.org/api_docs/python/tf/image/adjust_hue>`__.and `tf.image.random_hue <https://www.tensorflow.org/api_docs/python/tf/image/random_hue>`__.
  1476. Parameters
  1477. -----------
  1478. im : numpy.array
  1479. An image with values between 0 and 255.
  1480. hout : float
  1481. The scale value for adjusting hue.
  1482. - If is_offset is False, set all hue values to this value. 0 is red; 0.33 is green; 0.66 is blue.
  1483. - If is_offset is True, add this value as the offset to the hue channel.
  1484. is_offset : boolean
  1485. Whether `hout` is added on HSV as offset or not. Default is True.
  1486. is_clip : boolean
  1487. If HSV value smaller than 0, set to 0. Default is True.
  1488. is_random : boolean
  1489. If True, randomly change hue. Default is False.
  1490. Returns
  1491. -------
  1492. numpy.array
  1493. A processed image.
  1494. Examples
  1495. ---------
  1496. Random, add a random value between -0.2 and 0.2 as the offset to every hue values.
  1497. >>> im_hue = tl.prepro.adjust_hue(image, hout=0.2, is_offset=True, is_random=False)
  1498. Non-random, make all hue to green.
  1499. >>> im_green = tl.prepro.adjust_hue(image, hout=0.66, is_offset=False, is_random=False)
  1500. References
  1501. -----------
  1502. - `tf.image.random_hue <https://www.tensorflow.org/api_docs/python/tf/image/random_hue>`__.
  1503. - `tf.image.adjust_hue <https://www.tensorflow.org/api_docs/python/tf/image/adjust_hue>`__.
  1504. - `StackOverflow: Changing image hue with python PIL <https://stackoverflow.com/questions/7274221/changing-image-hue-with-python-pil>`__.
  1505. """
  1506. hsv = rgb_to_hsv(im)
  1507. if is_random:
  1508. hout = np.random.uniform(-hout, hout)
  1509. if is_offset:
  1510. hsv[..., 0] += hout
  1511. else:
  1512. hsv[..., 0] = hout
  1513. if is_clip:
  1514. hsv[..., 0] = np.clip(hsv[..., 0], 0, np.inf) # Hao : can remove green dots
  1515. rgb = hsv_to_rgb(hsv)
  1516. return rgb
  1517. # # contrast
  1518. # def constant(x, cutoff=0.5, gain=10, inv=False, is_random=False):
  1519. # # TODO
  1520. # x = exposure.adjust_sigmoid(x, cutoff=cutoff, gain=gain, inv=inv)
  1521. # return x
  1522. #
  1523. # def constant_multi():
  1524. # #TODO
  1525. # pass
  1526. def imresize(x, size=None, interp='bicubic', mode=None):
  1527. """Resize an image by given output size and method.
  1528. Warning, this function will rescale the value to [0, 255].
  1529. Parameters
  1530. -----------
  1531. x : numpy.array
  1532. An image with dimension of [row, col, channel] (default).
  1533. size : list of 2 int or None
  1534. For height and width.
  1535. interp : str
  1536. Interpolation method for re-sizing (`nearest`, `lanczos`, `bilinear`, `bicubic` (default) or `cubic`).
  1537. mode : str
  1538. The PIL image mode (`P`, `L`, etc.) to convert image before resizing.
  1539. Returns
  1540. -------
  1541. numpy.array
  1542. A processed image.
  1543. References
  1544. ------------
  1545. - `scipy.misc.imresize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.misc.imresize.html>`__
  1546. """
  1547. if size is None:
  1548. size = [100, 100]
  1549. if x.shape[-1] == 1:
  1550. # greyscale
  1551. # x = scipy.misc.imresize(x[:, :, 0], size, interp=interp, mode=mode)
  1552. x = resize(x[:, :, 0], size)
  1553. return x[:, :, np.newaxis]
  1554. else:
  1555. # rgb, bgr, rgba
  1556. return resize(x, output_shape=size)
  1557. # return scipy.misc.imresize(x, size, interp=interp, mode=mode)
  1558. # value scale
  1559. def pixel_value_scale(im, val=0.9, clip=None, is_random=False):
  1560. """Scales each value in the pixels of the image.
  1561. Parameters
  1562. -----------
  1563. im : numpy.array
  1564. An image.
  1565. val : float
  1566. The scale value for changing pixel value.
  1567. - If is_random=False, multiply this value with all pixels.
  1568. - If is_random=True, multiply a value between [1-val, 1+val] with all pixels.
  1569. clip : tuple of 2 numbers
  1570. The minimum and maximum value.
  1571. is_random : boolean
  1572. If True, see ``val``.
  1573. Returns
  1574. -------
  1575. numpy.array
  1576. A processed image.
  1577. Examples
  1578. ----------
  1579. Random
  1580. >>> im = pixel_value_scale(im, 0.1, [0, 255], is_random=True)
  1581. Non-random
  1582. >>> im = pixel_value_scale(im, 0.9, [0, 255], is_random=False)
  1583. """
  1584. clip = clip if clip is not None else (-np.inf, np.inf)
  1585. if is_random:
  1586. scale = 1 + np.random.uniform(-val, val)
  1587. im = im * scale
  1588. else:
  1589. im = im * val
  1590. if len(clip) == 2:
  1591. im = np.clip(im, clip[0], clip[1])
  1592. else:
  1593. raise Exception("clip : tuple of 2 numbers")
  1594. return im
  1595. # normailization
  1596. def samplewise_norm(
  1597. x, rescale=None, samplewise_center=False, samplewise_std_normalization=False, channel_index=2, epsilon=1e-7
  1598. ):
  1599. """Normalize an image by rescale, samplewise centering and samplewise centering in order.
  1600. Parameters
  1601. -----------
  1602. x : numpy.array
  1603. An image with dimension of [row, col, channel] (default).
  1604. rescale : float
  1605. Rescaling factor. If None or 0, no rescaling is applied, otherwise we multiply the data by the value provided (before applying any other transformation)
  1606. samplewise_center : boolean
  1607. If True, set each sample mean to 0.
  1608. samplewise_std_normalization : boolean
  1609. If True, divide each input by its std.
  1610. epsilon : float
  1611. A small position value for dividing standard deviation.
  1612. Returns
  1613. -------
  1614. numpy.array
  1615. A processed image.
  1616. Examples
  1617. --------
  1618. >>> x = samplewise_norm(x, samplewise_center=True, samplewise_std_normalization=True)
  1619. >>> print(x.shape, np.mean(x), np.std(x))
  1620. (160, 176, 1), 0.0, 1.0
  1621. Notes
  1622. ------
  1623. When samplewise_center and samplewise_std_normalization are True.
  1624. - For greyscale image, every pixels are subtracted and divided by the mean and std of whole image.
  1625. - For RGB image, every pixels are subtracted and divided by the mean and std of this pixel i.e. the mean and std of a pixel is 0 and 1.
  1626. """
  1627. if rescale:
  1628. x *= rescale
  1629. if x.shape[channel_index] == 1:
  1630. # greyscale
  1631. if samplewise_center:
  1632. x = x - np.mean(x)
  1633. if samplewise_std_normalization:
  1634. x = x / np.std(x)
  1635. return x
  1636. elif x.shape[channel_index] == 3:
  1637. # rgb
  1638. if samplewise_center:
  1639. x = x - np.mean(x, axis=channel_index, keepdims=True)
  1640. if samplewise_std_normalization:
  1641. x = x / (np.std(x, axis=channel_index, keepdims=True) + epsilon)
  1642. return x
  1643. else:
  1644. raise Exception("Unsupported channels %d" % x.shape[channel_index])
  1645. def featurewise_norm(x, mean=None, std=None, epsilon=1e-7):
  1646. """Normalize every pixels by the same given mean and std, which are usually
  1647. compute from all examples.
  1648. Parameters
  1649. -----------
  1650. x : numpy.array
  1651. An image with dimension of [row, col, channel] (default).
  1652. mean : float
  1653. Value for subtraction.
  1654. std : float
  1655. Value for division.
  1656. epsilon : float
  1657. A small position value for dividing standard deviation.
  1658. Returns
  1659. -------
  1660. numpy.array
  1661. A processed image.
  1662. """
  1663. if mean:
  1664. x = x - mean
  1665. if std:
  1666. x = x / (std + epsilon)
  1667. return x
  1668. # whitening
  1669. def get_zca_whitening_principal_components_img(X):
  1670. """Return the ZCA whitening principal components matrix.
  1671. Parameters
  1672. -----------
  1673. x : numpy.array
  1674. Batch of images with dimension of [n_example, row, col, channel] (default).
  1675. Returns
  1676. -------
  1677. numpy.array
  1678. A processed image.
  1679. """
  1680. flatX = np.reshape(X, (X.shape[0], X.shape[1] * X.shape[2] * X.shape[3]))
  1681. tl.logging.info("zca : computing sigma ..")
  1682. sigma = np.dot(flatX.T, flatX) / flatX.shape[0]
  1683. tl.logging.info("zca : computing U, S and V ..")
  1684. U, S, _ = linalg.svd(sigma) # USV
  1685. tl.logging.info("zca : computing principal components ..")
  1686. principal_components = np.dot(np.dot(U, np.diag(1. / np.sqrt(S + 10e-7))), U.T)
  1687. return principal_components
  1688. def zca_whitening(x, principal_components):
  1689. """Apply ZCA whitening on an image by given principal components matrix.
  1690. Parameters
  1691. -----------
  1692. x : numpy.array
  1693. An image with dimension of [row, col, channel] (default).
  1694. principal_components : matrix
  1695. Matrix from ``get_zca_whitening_principal_components_img``.
  1696. Returns
  1697. -------
  1698. numpy.array
  1699. A processed image.
  1700. """
  1701. flatx = np.reshape(x, (x.size))
  1702. # tl.logging.info(principal_components.shape, x.shape) # ((28160, 28160), (160, 176, 1))
  1703. # flatx = np.reshape(x, (x.shape))
  1704. # flatx = np.reshape(x, (x.shape[0], ))
  1705. # tl.logging.info(flatx.shape) # (160, 176, 1)
  1706. whitex = np.dot(flatx, principal_components)
  1707. x = np.reshape(whitex, (x.shape[0], x.shape[1], x.shape[2]))
  1708. return x
  1709. # developing
  1710. # def barrel_transform(x, intensity):
  1711. # # https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py
  1712. # # TODO
  1713. # pass
  1714. #
  1715. # def barrel_transform_multi(x, intensity):
  1716. # # https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py
  1717. # # TODO
  1718. # pass
  1719. # channel shift
  1720. def channel_shift(x, intensity, is_random=False, channel_index=2):
  1721. """Shift the channels of an image, randomly or non-randomly, see `numpy.rollaxis <https://docs.scipy.org/doc/numpy/reference/generated/numpy.rollaxis.html>`__.
  1722. Parameters
  1723. -----------
  1724. x : numpy.array
  1725. An image with dimension of [row, col, channel] (default).
  1726. intensity : float
  1727. Intensity of shifting.
  1728. is_random : boolean
  1729. If True, randomly shift. Default is False.
  1730. channel_index : int
  1731. Index of channel. Default is 2.
  1732. Returns
  1733. -------
  1734. numpy.array
  1735. A processed image.
  1736. """
  1737. if is_random:
  1738. factor = np.random.uniform(-intensity, intensity)
  1739. else:
  1740. factor = intensity
  1741. x = np.rollaxis(x, channel_index, 0)
  1742. min_x, max_x = np.min(x), np.max(x)
  1743. channel_images = [np.clip(x_channel + factor, min_x, max_x) for x_channel in x]
  1744. x = np.stack(channel_images, axis=0)
  1745. x = np.rollaxis(x, 0, channel_index + 1)
  1746. return x
  1747. # x = np.rollaxis(x, channel_index, 0)
  1748. # min_x, max_x = np.min(x), np.max(x)
  1749. # channel_images = [np.clip(x_channel + np.random.uniform(-intensity, intensity), min_x, max_x)
  1750. # for x_channel in x]
  1751. # x = np.stack(channel_images, axis=0)
  1752. # x = np.rollaxis(x, 0, channel_index+1)
  1753. # return x
  1754. def channel_shift_multi(x, intensity, is_random=False, channel_index=2):
  1755. """Shift the channels of images with the same arguments, randomly or non-randomly, see `numpy.rollaxis <https://docs.scipy.org/doc/numpy/reference/generated/numpy.rollaxis.html>`__.
  1756. Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
  1757. Parameters
  1758. -----------
  1759. x : list of numpy.array
  1760. List of images with dimension of [n_images, row, col, channel] (default).
  1761. others : args
  1762. See ``tl.prepro.channel_shift``.
  1763. Returns
  1764. -------
  1765. numpy.array
  1766. A list of processed images.
  1767. """
  1768. if is_random:
  1769. factor = np.random.uniform(-intensity, intensity)
  1770. else:
  1771. factor = intensity
  1772. results = []
  1773. for data in x:
  1774. data = np.rollaxis(data, channel_index, 0)
  1775. min_x, max_x = np.min(data), np.max(data)
  1776. channel_images = [np.clip(x_channel + factor, min_x, max_x) for x_channel in x]
  1777. data = np.stack(channel_images, axis=0)
  1778. data = np.rollaxis(x, 0, channel_index + 1)
  1779. results.append(data)
  1780. return np.asarray(results)
  1781. # noise
  1782. def drop(x, keep=0.5):
  1783. """Randomly set some pixels to zero by a given keeping probability.
  1784. Parameters
  1785. -----------
  1786. x : numpy.array
  1787. An image with dimension of [row, col, channel] or [row, col].
  1788. keep : float
  1789. The keeping probability (0, 1), the lower more values will be set to zero.
  1790. Returns
  1791. -------
  1792. numpy.array
  1793. A processed image.
  1794. """
  1795. if len(x.shape) == 3:
  1796. if x.shape[-1] == 3: # color
  1797. img_size = x.shape
  1798. mask = np.random.binomial(n=1, p=keep, size=x.shape[:-1])
  1799. for i in range(3):
  1800. x[:, :, i] = np.multiply(x[:, :, i], mask)
  1801. elif x.shape[-1] == 1: # greyscale image
  1802. img_size = x.shape
  1803. x = np.multiply(x, np.random.binomial(n=1, p=keep, size=img_size))
  1804. else:
  1805. raise Exception("Unsupported shape {}".format(x.shape))
  1806. elif len(x.shape) == 2 or 1: # greyscale matrix (image) or vector
  1807. img_size = x.shape
  1808. x = np.multiply(x, np.random.binomial(n=1, p=keep, size=img_size))
  1809. else:
  1810. raise Exception("Unsupported shape {}".format(x.shape))
  1811. return x
  1812. # x = np.asarray([[1,2,3,4,5,6,7,8,9,10],[1,2,3,4,5,6,7,8,9,10]])
  1813. # x = np.asarray([x,x,x,x,x,x])
  1814. # x.shape = 10, 4, 3
  1815. # tl.logging.info(x)
  1816. # # exit()
  1817. # tl.logging.info(x.shape)
  1818. # # exit()
  1819. # tl.logging.info(drop(x, keep=1.))
  1820. # exit()
  1821. # Numpy and PIL
  1822. def array_to_img(x, dim_ordering=(0, 1, 2), scale=True):
  1823. """Converts a numpy array to PIL image object (uint8 format).
  1824. Parameters
  1825. ----------
  1826. x : numpy.array
  1827. An image with dimension of 3 and channels of 1 or 3.
  1828. dim_ordering : tuple of 3 int
  1829. Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
  1830. scale : boolean
  1831. If True, converts image to [0, 255] from any range of value like [-1, 2]. Default is True.
  1832. Returns
  1833. -------
  1834. PIL.image
  1835. An image.
  1836. References
  1837. -----------
  1838. `PIL Image.fromarray <http://pillow.readthedocs.io/en/3.1.x/reference/Image.html?highlight=fromarray>`__
  1839. """
  1840. # if dim_ordering == 'default':
  1841. # dim_ordering = K.image_dim_ordering()
  1842. # if dim_ordering == 'th': # theano
  1843. # x = x.transpose(1, 2, 0)
  1844. x = x.transpose(dim_ordering)
  1845. if scale:
  1846. x += max(-np.min(x), 0)
  1847. x_max = np.max(x)
  1848. if x_max != 0:
  1849. # tl.logging.info(x_max)
  1850. # x /= x_max
  1851. x = x / x_max
  1852. x *= 255
  1853. if x.shape[2] == 3:
  1854. # RGB
  1855. return PIL.Image.fromarray(x.astype('uint8'), 'RGB')
  1856. elif x.shape[2] == 1:
  1857. # grayscale
  1858. return PIL.Image.fromarray(x[:, :, 0].astype('uint8'), 'L')
  1859. else:
  1860. raise Exception('Unsupported channel number: ', x.shape[2])
  1861. def find_contours(x, level=0.8, fully_connected='low', positive_orientation='low'):
  1862. """Find iso-valued contours in a 2D array for a given level value, returns list of (n, 2)-ndarrays
  1863. see `skimage.measure.find_contours <http://scikit-image.org/docs/dev/api/skimage.measure.html#skimage.measure.find_contours>`__.
  1864. Parameters
  1865. ------------
  1866. x : 2D ndarray of double.
  1867. Input data in which to find contours.
  1868. level : float
  1869. Value along which to find contours in the array.
  1870. fully_connected : str
  1871. Either `low` or `high`. Indicates whether array elements below the given level value are to be considered fully-connected (and hence elements above the value will only be face connected), or vice-versa. (See notes below for details.)
  1872. positive_orientation : str
  1873. Either `low` or `high`. Indicates whether the output contours will produce positively-oriented polygons around islands of low- or high-valued elements. If `low` then contours will wind counter-clockwise around elements below the iso-value. Alternately, this means that low-valued elements are always on the left of the contour.
  1874. Returns
  1875. --------
  1876. list of (n,2)-ndarrays
  1877. Each contour is an ndarray of shape (n, 2), consisting of n (row, column) coordinates along the contour.
  1878. """
  1879. return skimage.measure.find_contours(
  1880. x, level, fully_connected=fully_connected, positive_orientation=positive_orientation
  1881. )
  1882. def pt2map(list_points=None, size=(100, 100), val=1):
  1883. """Inputs a list of points, return a 2D image.
  1884. Parameters
  1885. --------------
  1886. list_points : list of 2 int
  1887. [[x, y], [x, y]..] for point coordinates.
  1888. size : tuple of 2 int
  1889. (w, h) for output size.
  1890. val : float or int
  1891. For the contour value.
  1892. Returns
  1893. -------
  1894. numpy.array
  1895. An image.
  1896. """
  1897. if list_points is None:
  1898. raise Exception("list_points : list of 2 int")
  1899. i_m = np.zeros(size)
  1900. if len(list_points) == 0:
  1901. return i_m
  1902. for xx in list_points:
  1903. for x in xx:
  1904. # tl.logging.info(x)
  1905. i_m[int(np.round(x[0]))][int(np.round(x[1]))] = val
  1906. return i_m
  1907. def binary_dilation(x, radius=3):
  1908. """Return fast binary morphological dilation of an image.
  1909. see `skimage.morphology.binary_dilation <http://scikit-image.org/docs/dev/api/skimage.morphology.html#skimage.morphology.binary_dilation>`__.
  1910. Parameters
  1911. -----------
  1912. x : 2D array
  1913. A binary image.
  1914. radius : int
  1915. For the radius of mask.
  1916. Returns
  1917. -------
  1918. numpy.array
  1919. A processed binary image.
  1920. """
  1921. mask = disk(radius)
  1922. x = _binary_dilation(x, selem=mask)
  1923. return x
  1924. def dilation(x, radius=3):
  1925. """Return greyscale morphological dilation of an image,
  1926. see `skimage.morphology.dilation <http://scikit-image.org/docs/dev/api/skimage.morphology.html#skimage.morphology.dilation>`__.
  1927. Parameters
  1928. -----------
  1929. x : 2D array
  1930. An greyscale image.
  1931. radius : int
  1932. For the radius of mask.
  1933. Returns
  1934. -------
  1935. numpy.array
  1936. A processed greyscale image.
  1937. """
  1938. mask = disk(radius)
  1939. x = dilation(x, selem=mask)
  1940. return x
  1941. def binary_erosion(x, radius=3):
  1942. """Return binary morphological erosion of an image,
  1943. see `skimage.morphology.binary_erosion <http://scikit-image.org/docs/dev/api/skimage.morphology.html#skimage.morphology.binary_erosion>`__.
  1944. Parameters
  1945. -----------
  1946. x : 2D array
  1947. A binary image.
  1948. radius : int
  1949. For the radius of mask.
  1950. Returns
  1951. -------
  1952. numpy.array
  1953. A processed binary image.
  1954. """
  1955. mask = disk(radius)
  1956. x = _binary_erosion(x, selem=mask)
  1957. return x
  1958. def erosion(x, radius=3):
  1959. """Return greyscale morphological erosion of an image,
  1960. see `skimage.morphology.erosion <http://scikit-image.org/docs/dev/api/skimage.morphology.html#skimage.morphology.erosion>`__.
  1961. Parameters
  1962. -----------
  1963. x : 2D array
  1964. A greyscale image.
  1965. radius : int
  1966. For the radius of mask.
  1967. Returns
  1968. -------
  1969. numpy.array
  1970. A processed greyscale image.
  1971. """
  1972. mask = disk(radius)
  1973. x = _erosion(x, selem=mask)
  1974. return x
  1975. def obj_box_coords_rescale(coords=None, shape=None):
  1976. """Scale down a list of coordinates from pixel unit to the ratio of image size i.e. in the range of [0, 1].
  1977. Parameters
  1978. ------------
  1979. coords : list of list of 4 ints or None
  1980. For coordinates of more than one images .e.g.[[x, y, w, h], [x, y, w, h], ...].
  1981. shape : list of 2 int or None
  1982. 【height, width].
  1983. Returns
  1984. -------
  1985. list of list of 4 numbers
  1986. A list of new bounding boxes.
  1987. Examples
  1988. ---------
  1989. >>> coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50], [10, 10, 20, 20]], shape=[100, 100])
  1990. >>> print(coords)
  1991. [[0.3, 0.4, 0.5, 0.5], [0.1, 0.1, 0.2, 0.2]]
  1992. >>> coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50]], shape=[50, 100])
  1993. >>> print(coords)
  1994. [[0.3, 0.8, 0.5, 1.0]]
  1995. >>> coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50]], shape=[100, 200])
  1996. >>> print(coords)
  1997. [[0.15, 0.4, 0.25, 0.5]]
  1998. Returns
  1999. -------
  2000. list of 4 numbers
  2001. New coordinates.
  2002. """
  2003. if coords is None:
  2004. coords = []
  2005. if shape is None:
  2006. shape = [100, 200]
  2007. imh, imw = shape[0], shape[1]
  2008. imh = imh * 1.0 # * 1.0 for python2 : force division to be float point
  2009. imw = imw * 1.0
  2010. coords_new = list()
  2011. for coord in coords:
  2012. if len(coord) != 4:
  2013. raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
  2014. x = coord[0] / imw
  2015. y = coord[1] / imh
  2016. w = coord[2] / imw
  2017. h = coord[3] / imh
  2018. coords_new.append([x, y, w, h])
  2019. return coords_new
  2020. def obj_box_coord_rescale(coord=None, shape=None):
  2021. """Scale down one coordinates from pixel unit to the ratio of image size i.e. in the range of [0, 1].
  2022. It is the reverse process of ``obj_box_coord_scale_to_pixelunit``.
  2023. Parameters
  2024. ------------
  2025. coords : list of 4 int or None
  2026. One coordinates of one image e.g. [x, y, w, h].
  2027. shape : list of 2 int or None
  2028. For [height, width].
  2029. Returns
  2030. -------
  2031. list of 4 numbers
  2032. New bounding box.
  2033. Examples
  2034. ---------
  2035. >>> coord = tl.prepro.obj_box_coord_rescale(coord=[30, 40, 50, 50], shape=[100, 100])
  2036. [0.3, 0.4, 0.5, 0.5]
  2037. """
  2038. if coord is None:
  2039. coord = []
  2040. if shape is None:
  2041. shape = [100, 200]
  2042. return obj_box_coords_rescale(coords=[coord], shape=shape)[0]
  2043. def obj_box_coord_scale_to_pixelunit(coord, shape=None):
  2044. """Convert one coordinate [x, y, w (or x2), h (or y2)] in ratio format to image coordinate format.
  2045. It is the reverse process of ``obj_box_coord_rescale``.
  2046. Parameters
  2047. -----------
  2048. coord : list of 4 float
  2049. One coordinate of one image [x, y, w (or x2), h (or y2)] in ratio format, i.e value range [0~1].
  2050. shape : tuple of 2 or None
  2051. For [height, width].
  2052. Returns
  2053. -------
  2054. list of 4 numbers
  2055. New bounding box.
  2056. Examples
  2057. ---------
  2058. >>> x, y, x2, y2 = tl.prepro.obj_box_coord_scale_to_pixelunit([0.2, 0.3, 0.5, 0.7], shape=(100, 200, 3))
  2059. [40, 30, 100, 70]
  2060. """
  2061. if shape is None:
  2062. shape = [100, 100]
  2063. imh, imw = shape[0:2]
  2064. x = int(coord[0] * imw)
  2065. x2 = int(coord[2] * imw)
  2066. y = int(coord[1] * imh)
  2067. y2 = int(coord[3] * imh)
  2068. return [x, y, x2, y2]
  2069. # coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50], [10, 10, 20, 20]], shape=[100, 100])
  2070. # tl.logging.info(coords)
  2071. # # [[0.3, 0.4, 0.5, 0.5], [0.1, 0.1, 0.2, 0.2]]
  2072. # coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50]], shape=[50, 100])
  2073. # tl.logging.info(coords)
  2074. # # [[0.3, 0.8, 0.5, 1.0]]
  2075. # coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50]], shape=[100, 200])
  2076. # tl.logging.info(coords)
  2077. # # [[0.15, 0.4, 0.25, 0.5]]
  2078. # exit()
  2079. def obj_box_coord_centroid_to_upleft_butright(coord, to_int=False):
  2080. """Convert one coordinate [x_center, y_center, w, h] to [x1, y1, x2, y2] in up-left and botton-right format.
  2081. Parameters
  2082. ------------
  2083. coord : list of 4 int/float
  2084. One coordinate.
  2085. to_int : boolean
  2086. Whether to convert output as integer.
  2087. Returns
  2088. -------
  2089. list of 4 numbers
  2090. New bounding box.
  2091. Examples
  2092. ---------
  2093. >>> coord = obj_box_coord_centroid_to_upleft_butright([30, 40, 20, 20])
  2094. [20, 30, 40, 50]
  2095. """
  2096. if len(coord) != 4:
  2097. raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
  2098. x_center, y_center, w, h = coord
  2099. x = x_center - w / 2.
  2100. y = y_center - h / 2.
  2101. x2 = x + w
  2102. y2 = y + h
  2103. if to_int:
  2104. return [int(x), int(y), int(x2), int(y2)]
  2105. else:
  2106. return [x, y, x2, y2]
  2107. # coord = obj_box_coord_centroid_to_upleft_butright([30, 40, 20, 20])
  2108. # tl.logging.info(coord) [20, 30, 40, 50]
  2109. # exit()
  2110. def obj_box_coord_upleft_butright_to_centroid(coord):
  2111. """Convert one coordinate [x1, y1, x2, y2] to [x_center, y_center, w, h].
  2112. It is the reverse process of ``obj_box_coord_centroid_to_upleft_butright``.
  2113. Parameters
  2114. ------------
  2115. coord : list of 4 int/float
  2116. One coordinate.
  2117. Returns
  2118. -------
  2119. list of 4 numbers
  2120. New bounding box.
  2121. """
  2122. if len(coord) != 4:
  2123. raise AssertionError("coordinate should be 4 values : [x1, y1, x2, y2]")
  2124. x1, y1, x2, y2 = coord
  2125. w = x2 - x1
  2126. h = y2 - y1
  2127. x_c = x1 + w / 2.
  2128. y_c = y1 + h / 2.
  2129. return [x_c, y_c, w, h]
  2130. def obj_box_coord_centroid_to_upleft(coord):
  2131. """Convert one coordinate [x_center, y_center, w, h] to [x, y, w, h].
  2132. It is the reverse process of ``obj_box_coord_upleft_to_centroid``.
  2133. Parameters
  2134. ------------
  2135. coord : list of 4 int/float
  2136. One coordinate.
  2137. Returns
  2138. -------
  2139. list of 4 numbers
  2140. New bounding box.
  2141. """
  2142. if len(coord) != 4:
  2143. raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
  2144. x_center, y_center, w, h = coord
  2145. x = x_center - w / 2.
  2146. y = y_center - h / 2.
  2147. return [x, y, w, h]
  2148. def obj_box_coord_upleft_to_centroid(coord):
  2149. """Convert one coordinate [x, y, w, h] to [x_center, y_center, w, h].
  2150. It is the reverse process of ``obj_box_coord_centroid_to_upleft``.
  2151. Parameters
  2152. ------------
  2153. coord : list of 4 int/float
  2154. One coordinate.
  2155. Returns
  2156. -------
  2157. list of 4 numbers
  2158. New bounding box.
  2159. """
  2160. if len(coord) != 4:
  2161. raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
  2162. x, y, w, h = coord
  2163. x_center = x + w / 2.
  2164. y_center = y + h / 2.
  2165. return [x_center, y_center, w, h]
  2166. def parse_darknet_ann_str_to_list(annotations):
  2167. r"""Input string format of class, x, y, w, h, return list of list format.
  2168. Parameters
  2169. -----------
  2170. annotations : str
  2171. The annotations in darkent format "class, x, y, w, h ...." seperated by "\\n".
  2172. Returns
  2173. -------
  2174. list of list of 4 numbers
  2175. List of bounding box.
  2176. """
  2177. annotations = annotations.split("\n")
  2178. ann = []
  2179. for a in annotations:
  2180. a = a.split()
  2181. if len(a) == 5:
  2182. for i, _v in enumerate(a):
  2183. if i == 0:
  2184. a[i] = int(a[i])
  2185. else:
  2186. a[i] = float(a[i])
  2187. ann.append(a)
  2188. return ann
  2189. def parse_darknet_ann_list_to_cls_box(annotations):
  2190. """Parse darknet annotation format into two lists for class and bounding box.
  2191. Input list of [[class, x, y, w, h], ...], return two list of [class ...] and [[x, y, w, h], ...].
  2192. Parameters
  2193. ------------
  2194. annotations : list of list
  2195. A list of class and bounding boxes of images e.g. [[class, x, y, w, h], ...]
  2196. Returns
  2197. -------
  2198. list of int
  2199. List of class labels.
  2200. list of list of 4 numbers
  2201. List of bounding box.
  2202. """
  2203. class_list = []
  2204. bbox_list = []
  2205. for ann in annotations:
  2206. class_list.append(ann[0])
  2207. bbox_list.append(ann[1:])
  2208. return class_list, bbox_list
  2209. def obj_box_horizontal_flip(im, coords=None, is_rescale=False, is_center=False, is_random=False):
  2210. """Left-right flip the image and coordinates for object detection.
  2211. Parameters
  2212. ----------
  2213. im : numpy.array
  2214. An image with dimension of [row, col, channel] (default).
  2215. coords : list of list of 4 int/float or None
  2216. Coordinates [[x, y, w, h], [x, y, w, h], ...].
  2217. is_rescale : boolean
  2218. Set to True, if the input coordinates are rescaled to [0, 1]. Default is False.
  2219. is_center : boolean
  2220. Set to True, if the x and y of coordinates are the centroid (i.e. darknet format). Default is False.
  2221. is_random : boolean
  2222. If True, randomly flip. Default is False.
  2223. Returns
  2224. -------
  2225. numpy.array
  2226. A processed image
  2227. list of list of 4 numbers
  2228. A list of new bounding boxes.
  2229. Examples
  2230. --------
  2231. >>> im = np.zeros([80, 100]) # as an image with shape width=100, height=80
  2232. >>> im, coords = obj_box_left_right_flip(im, coords=[[0.2, 0.4, 0.3, 0.3], [0.1, 0.5, 0.2, 0.3]], is_rescale=True, is_center=True, is_random=False)
  2233. >>> print(coords)
  2234. [[0.8, 0.4, 0.3, 0.3], [0.9, 0.5, 0.2, 0.3]]
  2235. >>> im, coords = obj_box_left_right_flip(im, coords=[[0.2, 0.4, 0.3, 0.3]], is_rescale=True, is_center=False, is_random=False)
  2236. >>> print(coords)
  2237. [[0.5, 0.4, 0.3, 0.3]]
  2238. >>> im, coords = obj_box_left_right_flip(im, coords=[[20, 40, 30, 30]], is_rescale=False, is_center=True, is_random=False)
  2239. >>> print(coords)
  2240. [[80, 40, 30, 30]]
  2241. >>> im, coords = obj_box_left_right_flip(im, coords=[[20, 40, 30, 30]], is_rescale=False, is_center=False, is_random=False)
  2242. >>> print(coords)
  2243. [[50, 40, 30, 30]]
  2244. """
  2245. if coords is None:
  2246. coords = []
  2247. def _flip(im, coords):
  2248. im = flip_axis(im, axis=1, is_random=False)
  2249. coords_new = list()
  2250. for coord in coords:
  2251. if len(coord) != 4:
  2252. raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
  2253. if is_rescale:
  2254. if is_center:
  2255. # x_center' = 1 - x
  2256. x = 1. - coord[0]
  2257. else:
  2258. # x_center' = 1 - x - w
  2259. x = 1. - coord[0] - coord[2]
  2260. else:
  2261. if is_center:
  2262. # x' = im.width - x
  2263. x = im.shape[1] - coord[0]
  2264. else:
  2265. # x' = im.width - x - w
  2266. x = im.shape[1] - coord[0] - coord[2]
  2267. coords_new.append([x, coord[1], coord[2], coord[3]])
  2268. return im, coords_new
  2269. if is_random:
  2270. factor = np.random.uniform(-1, 1)
  2271. if factor > 0:
  2272. return _flip(im, coords)
  2273. else:
  2274. return im, coords
  2275. else:
  2276. return _flip(im, coords)
  2277. obj_box_left_right_flip = obj_box_horizontal_flip
  2278. # im = np.zeros([80, 100]) # as an image with shape width=100, height=80
  2279. # im, coords = obj_box_left_right_flip(im, coords=[[0.2, 0.4, 0.3, 0.3], [0.1, 0.5, 0.2, 0.3]], is_rescale=True, is_center=True, is_random=False)
  2280. # tl.logging.info(coords)
  2281. # # [[0.8, 0.4, 0.3, 0.3], [0.9, 0.5, 0.2, 0.3]]
  2282. # im, coords = obj_box_left_right_flip(im, coords=[[0.2, 0.4, 0.3, 0.3]], is_rescale=True, is_center=False, is_random=False)
  2283. # tl.logging.info(coords)
  2284. # # [[0.5, 0.4, 0.3, 0.3]]
  2285. # im, coords = obj_box_left_right_flip(im, coords=[[20, 40, 30, 30]], is_rescale=False, is_center=True, is_random=False)
  2286. # tl.logging.info(coords)
  2287. # # [[80, 40, 30, 30]]
  2288. # im, coords = obj_box_left_right_flip(im, coords=[[20, 40, 30, 30]], is_rescale=False, is_center=False, is_random=False)
  2289. # tl.logging.info(coords)
  2290. # # [[50, 40, 30, 30]]
  2291. # exit()
  2292. def obj_box_imresize(im, coords=None, size=None, interp='bicubic', mode=None, is_rescale=False):
  2293. """Resize an image, and compute the new bounding box coordinates.
  2294. Parameters
  2295. -------------
  2296. im : numpy.array
  2297. An image with dimension of [row, col, channel] (default).
  2298. coords : list of list of 4 int/float or None
  2299. Coordinates [[x, y, w, h], [x, y, w, h], ...]
  2300. size interp and mode : args
  2301. See ``tl.prepro.imresize``.
  2302. is_rescale : boolean
  2303. Set to True, if the input coordinates are rescaled to [0, 1], then return the original coordinates. Default is False.
  2304. Returns
  2305. -------
  2306. numpy.array
  2307. A processed image
  2308. list of list of 4 numbers
  2309. A list of new bounding boxes.
  2310. Examples
  2311. --------
  2312. >>> im = np.zeros([80, 100, 3]) # as an image with shape width=100, height=80
  2313. >>> _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30], [10, 20, 20, 20]], size=[160, 200], is_rescale=False)
  2314. >>> print(coords)
  2315. [[40, 80, 60, 60], [20, 40, 40, 40]]
  2316. >>> _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[40, 100], is_rescale=False)
  2317. >>> print(coords)
  2318. [[20, 20, 30, 15]]
  2319. >>> _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[60, 150], is_rescale=False)
  2320. >>> print(coords)
  2321. [[30, 30, 45, 22]]
  2322. >>> im2, coords = obj_box_imresize(im, coords=[[0.2, 0.4, 0.3, 0.3]], size=[160, 200], is_rescale=True)
  2323. >>> print(coords, im2.shape)
  2324. [[0.2, 0.4, 0.3, 0.3]] (160, 200, 3)
  2325. """
  2326. if coords is None:
  2327. coords = []
  2328. if size is None:
  2329. size = [100, 100]
  2330. imh, imw = im.shape[0:2]
  2331. imh = imh * 1.0 # * 1.0 for python2 : force division to be float point
  2332. imw = imw * 1.0
  2333. im = imresize(im, size=size, interp=interp, mode=mode)
  2334. if is_rescale is False:
  2335. coords_new = list()
  2336. for coord in coords:
  2337. if len(coord) != 4:
  2338. raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
  2339. # x' = x * (imw'/imw)
  2340. x = int(coord[0] * (size[1] / imw))
  2341. # y' = y * (imh'/imh)
  2342. # tl.logging.info('>>', coord[1], size[0], imh)
  2343. y = int(coord[1] * (size[0] / imh))
  2344. # w' = w * (imw'/imw)
  2345. w = int(coord[2] * (size[1] / imw))
  2346. # h' = h * (imh'/imh)
  2347. h = int(coord[3] * (size[0] / imh))
  2348. coords_new.append([x, y, w, h])
  2349. return im, coords_new
  2350. else:
  2351. return im, coords
  2352. # im = np.zeros([80, 100, 3]) # as an image with shape width=100, height=80
  2353. # _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30], [10, 20, 20, 20]], size=[160, 200], is_rescale=False)
  2354. # tl.logging.info(coords)
  2355. # # [[40, 80, 60, 60], [20, 40, 40, 40]]
  2356. # _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[40, 100], is_rescale=False)
  2357. # tl.logging.info(coords)
  2358. # # [20, 20, 30, 15]
  2359. # _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[60, 150], is_rescale=False)
  2360. # tl.logging.info(coords)
  2361. # # [30, 30, 45, 22]
  2362. # im2, coords = obj_box_imresize(im, coords=[[0.2, 0.4, 0.3, 0.3]], size=[160, 200], is_rescale=True)
  2363. # tl.logging.info(coords, im2.shape)
  2364. # # [0.2, 0.4, 0.3, 0.3] (160, 200, 3)
  2365. # exit()
  2366. def obj_box_crop(
  2367. im, classes=None, coords=None, wrg=100, hrg=100, is_rescale=False, is_center=False, is_random=False, thresh_wh=0.02,
  2368. thresh_wh2=12.
  2369. ):
  2370. """Randomly or centrally crop an image, and compute the new bounding box coordinates.
  2371. Objects outside the cropped image will be removed.
  2372. Parameters
  2373. -----------
  2374. im : numpy.array
  2375. An image with dimension of [row, col, channel] (default).
  2376. classes : list of int or None
  2377. Class IDs.
  2378. coords : list of list of 4 int/float or None
  2379. Coordinates [[x, y, w, h], [x, y, w, h], ...]
  2380. wrg hrg and is_random : args
  2381. See ``tl.prepro.crop``.
  2382. is_rescale : boolean
  2383. Set to True, if the input coordinates are rescaled to [0, 1]. Default is False.
  2384. is_center : boolean, default False
  2385. Set to True, if the x and y of coordinates are the centroid (i.e. darknet format). Default is False.
  2386. thresh_wh : float
  2387. Threshold, remove the box if its ratio of width(height) to image size less than the threshold.
  2388. thresh_wh2 : float
  2389. Threshold, remove the box if its ratio of width to height or vice verse higher than the threshold.
  2390. Returns
  2391. -------
  2392. numpy.array
  2393. A processed image
  2394. list of int
  2395. A list of classes
  2396. list of list of 4 numbers
  2397. A list of new bounding boxes.
  2398. """
  2399. if classes is None:
  2400. classes = []
  2401. if coords is None:
  2402. coords = []
  2403. h, w = im.shape[0], im.shape[1]
  2404. if (h <= hrg) or (w <= wrg):
  2405. raise AssertionError("The size of cropping should smaller than the original image")
  2406. if is_random:
  2407. h_offset = int(np.random.uniform(0, h - hrg) - 1)
  2408. w_offset = int(np.random.uniform(0, w - wrg) - 1)
  2409. h_end = hrg + h_offset
  2410. w_end = wrg + w_offset
  2411. im_new = im[h_offset:h_end, w_offset:w_end]
  2412. else: # central crop
  2413. h_offset = int(np.floor((h - hrg) / 2.))
  2414. w_offset = int(np.floor((w - wrg) / 2.))
  2415. h_end = h_offset + hrg
  2416. w_end = w_offset + wrg
  2417. im_new = im[h_offset:h_end, w_offset:w_end]
  2418. # w
  2419. # _____________________________
  2420. # | h/w offset |
  2421. # | ------- |
  2422. # h | | | |
  2423. # | | | |
  2424. # | ------- |
  2425. # | h/w end |
  2426. # |___________________________|
  2427. def _get_coord(coord):
  2428. """Input pixel-unit [x, y, w, h] format, then make sure [x, y] it is the up-left coordinates,
  2429. before getting the new coordinates.
  2430. Boxes outsides the cropped image will be removed.
  2431. """
  2432. if is_center:
  2433. coord = obj_box_coord_centroid_to_upleft(coord)
  2434. ##======= pixel unit format and upleft, w, h ==========##
  2435. # x = np.clip( coord[0] - w_offset, 0, w_end - w_offset)
  2436. # y = np.clip( coord[1] - h_offset, 0, h_end - h_offset)
  2437. # w = np.clip( coord[2] , 0, w_end - w_offset)
  2438. # h = np.clip( coord[3] , 0, h_end - h_offset)
  2439. x = coord[0] - w_offset
  2440. y = coord[1] - h_offset
  2441. w = coord[2]
  2442. h = coord[3]
  2443. if x < 0:
  2444. if x + w <= 0:
  2445. return None
  2446. w = w + x
  2447. x = 0
  2448. elif x > im_new.shape[1]: # object outside the cropped image
  2449. return None
  2450. if y < 0:
  2451. if y + h <= 0:
  2452. return None
  2453. h = h + y
  2454. y = 0
  2455. elif y > im_new.shape[0]: # object outside the cropped image
  2456. return None
  2457. if (x is not None) and (x + w > im_new.shape[1]): # box outside the cropped image
  2458. w = im_new.shape[1] - x
  2459. if (y is not None) and (y + h > im_new.shape[0]): # box outside the cropped image
  2460. h = im_new.shape[0] - y
  2461. if (w / (h + 1.) > thresh_wh2) or (h / (w + 1.) > thresh_wh2): # object shape strange: too narrow
  2462. # tl.logging.info('xx', w, h)
  2463. return None
  2464. if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) <
  2465. thresh_wh): # object shape strange: too narrow
  2466. # tl.logging.info('yy', w, im_new.shape[1], h, im_new.shape[0])
  2467. return None
  2468. coord = [x, y, w, h]
  2469. ## convert back if input format is center.
  2470. if is_center:
  2471. coord = obj_box_coord_upleft_to_centroid(coord)
  2472. return coord
  2473. coords_new = list()
  2474. classes_new = list()
  2475. for i, _ in enumerate(coords):
  2476. coord = coords[i]
  2477. if len(coord) != 4:
  2478. raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
  2479. if is_rescale:
  2480. # for scaled coord, upscaled before process and scale back in the end.
  2481. coord = obj_box_coord_scale_to_pixelunit(coord, im.shape)
  2482. coord = _get_coord(coord)
  2483. if coord is not None:
  2484. coord = obj_box_coord_rescale(coord, im_new.shape)
  2485. coords_new.append(coord)
  2486. classes_new.append(classes[i])
  2487. else:
  2488. coord = _get_coord(coord)
  2489. if coord is not None:
  2490. coords_new.append(coord)
  2491. classes_new.append(classes[i])
  2492. return im_new, classes_new, coords_new
  2493. def obj_box_shift(
  2494. im, classes=None, coords=None, wrg=0.1, hrg=0.1, row_index=0, col_index=1, channel_index=2, fill_mode='nearest',
  2495. cval=0., order=1, is_rescale=False, is_center=False, is_random=False, thresh_wh=0.02, thresh_wh2=12.
  2496. ):
  2497. """Shift an image randomly or non-randomly, and compute the new bounding box coordinates.
  2498. Objects outside the cropped image will be removed.
  2499. Parameters
  2500. -----------
  2501. im : numpy.array
  2502. An image with dimension of [row, col, channel] (default).
  2503. classes : list of int or None
  2504. Class IDs.
  2505. coords : list of list of 4 int/float or None
  2506. Coordinates [[x, y, w, h], [x, y, w, h], ...]
  2507. wrg, hrg row_index col_index channel_index is_random fill_mode cval and order : see ``tl.prepro.shift``.
  2508. is_rescale : boolean
  2509. Set to True, if the input coordinates are rescaled to [0, 1]. Default is False.
  2510. is_center : boolean
  2511. Set to True, if the x and y of coordinates are the centroid (i.e. darknet format). Default is False.
  2512. thresh_wh : float
  2513. Threshold, remove the box if its ratio of width(height) to image size less than the threshold.
  2514. thresh_wh2 : float
  2515. Threshold, remove the box if its ratio of width to height or vice verse higher than the threshold.
  2516. Returns
  2517. -------
  2518. numpy.array
  2519. A processed image
  2520. list of int
  2521. A list of classes
  2522. list of list of 4 numbers
  2523. A list of new bounding boxes.
  2524. """
  2525. if classes is None:
  2526. classes = []
  2527. if coords is None:
  2528. coords = []
  2529. imh, imw = im.shape[row_index], im.shape[col_index]
  2530. if (hrg >= 1.0) and (hrg <= 0.) and (wrg >= 1.0) and (wrg <= 0.):
  2531. raise AssertionError("shift range should be (0, 1)")
  2532. if is_random:
  2533. tx = np.random.uniform(-hrg, hrg) * imh
  2534. ty = np.random.uniform(-wrg, wrg) * imw
  2535. else:
  2536. tx, ty = hrg * imh, wrg * imw
  2537. translation_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]])
  2538. transform_matrix = translation_matrix # no need to do offset
  2539. im_new = affine_transform(im, transform_matrix, channel_index, fill_mode, cval, order)
  2540. # modified from obj_box_crop
  2541. def _get_coord(coord):
  2542. """Input pixel-unit [x, y, w, h] format, then make sure [x, y] it is the up-left coordinates,
  2543. before getting the new coordinates.
  2544. Boxes outsides the cropped image will be removed.
  2545. """
  2546. if is_center:
  2547. coord = obj_box_coord_centroid_to_upleft(coord)
  2548. ##======= pixel unit format and upleft, w, h ==========##
  2549. x = coord[0] - ty # only change this
  2550. y = coord[1] - tx # only change this
  2551. w = coord[2]
  2552. h = coord[3]
  2553. if x < 0:
  2554. if x + w <= 0:
  2555. return None
  2556. w = w + x
  2557. x = 0
  2558. elif x > im_new.shape[1]: # object outside the cropped image
  2559. return None
  2560. if y < 0:
  2561. if y + h <= 0:
  2562. return None
  2563. h = h + y
  2564. y = 0
  2565. elif y > im_new.shape[0]: # object outside the cropped image
  2566. return None
  2567. if (x is not None) and (x + w > im_new.shape[1]): # box outside the cropped image
  2568. w = im_new.shape[1] - x
  2569. if (y is not None) and (y + h > im_new.shape[0]): # box outside the cropped image
  2570. h = im_new.shape[0] - y
  2571. if (w / (h + 1.) > thresh_wh2) or (h / (w + 1.) > thresh_wh2): # object shape strange: too narrow
  2572. # tl.logging.info('xx', w, h)
  2573. return None
  2574. if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) <
  2575. thresh_wh): # object shape strange: too narrow
  2576. # tl.logging.info('yy', w, im_new.shape[1], h, im_new.shape[0])
  2577. return None
  2578. coord = [x, y, w, h]
  2579. ## convert back if input format is center.
  2580. if is_center:
  2581. coord = obj_box_coord_upleft_to_centroid(coord)
  2582. return coord
  2583. coords_new = list()
  2584. classes_new = list()
  2585. for i, _ in enumerate(coords):
  2586. coord = coords[i]
  2587. if len(coord) != 4:
  2588. raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
  2589. if is_rescale:
  2590. # for scaled coord, upscaled before process and scale back in the end.
  2591. coord = obj_box_coord_scale_to_pixelunit(coord, im.shape)
  2592. coord = _get_coord(coord)
  2593. if coord is not None:
  2594. coord = obj_box_coord_rescale(coord, im_new.shape)
  2595. coords_new.append(coord)
  2596. classes_new.append(classes[i])
  2597. else:
  2598. coord = _get_coord(coord)
  2599. if coord is not None:
  2600. coords_new.append(coord)
  2601. classes_new.append(classes[i])
  2602. return im_new, classes_new, coords_new
  2603. def obj_box_zoom(
  2604. im, classes=None, coords=None, zoom_range=(0.9, 1.1), row_index=0, col_index=1, channel_index=2,
  2605. fill_mode='nearest', cval=0., order=1, is_rescale=False, is_center=False, is_random=False, thresh_wh=0.02,
  2606. thresh_wh2=12.
  2607. ):
  2608. """Zoom in and out of a single image, randomly or non-randomly, and compute the new bounding box coordinates.
  2609. Objects outside the cropped image will be removed.
  2610. Parameters
  2611. -----------
  2612. im : numpy.array
  2613. An image with dimension of [row, col, channel] (default).
  2614. classes : list of int or None
  2615. Class IDs.
  2616. coords : list of list of 4 int/float or None
  2617. Coordinates [[x, y, w, h], [x, y, w, h], ...].
  2618. zoom_range row_index col_index channel_index is_random fill_mode cval and order : see ``tl.prepro.zoom``.
  2619. is_rescale : boolean
  2620. Set to True, if the input coordinates are rescaled to [0, 1]. Default is False.
  2621. is_center : boolean
  2622. Set to True, if the x and y of coordinates are the centroid. (i.e. darknet format). Default is False.
  2623. thresh_wh : float
  2624. Threshold, remove the box if its ratio of width(height) to image size less than the threshold.
  2625. thresh_wh2 : float
  2626. Threshold, remove the box if its ratio of width to height or vice verse higher than the threshold.
  2627. Returns
  2628. -------
  2629. numpy.array
  2630. A processed image
  2631. list of int
  2632. A list of classes
  2633. list of list of 4 numbers
  2634. A list of new bounding boxes.
  2635. """
  2636. if classes is None:
  2637. classes = []
  2638. if coords is None:
  2639. coords = []
  2640. if len(zoom_range) != 2:
  2641. raise Exception('zoom_range should be a tuple or list of two floats. ' 'Received arg: ', zoom_range)
  2642. if is_random:
  2643. if zoom_range[0] == 1 and zoom_range[1] == 1:
  2644. zx, zy = 1, 1
  2645. tl.logging.info(" random_zoom : not zoom in/out")
  2646. else:
  2647. zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2)
  2648. else:
  2649. zx, zy = zoom_range
  2650. # tl.logging.info(zx, zy)
  2651. zoom_matrix = np.array([[zx, 0, 0], [0, zy, 0], [0, 0, 1]])
  2652. h, w = im.shape[row_index], im.shape[col_index]
  2653. transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
  2654. im_new = affine_transform(im, transform_matrix, channel_index, fill_mode, cval, order)
  2655. # modified from obj_box_crop
  2656. def _get_coord(coord):
  2657. """Input pixel-unit [x, y, w, h] format, then make sure [x, y] it is the up-left coordinates,
  2658. before getting the new coordinates.
  2659. Boxes outsides the cropped image will be removed.
  2660. """
  2661. if is_center:
  2662. coord = obj_box_coord_centroid_to_upleft(coord)
  2663. # ======= pixel unit format and upleft, w, h ==========
  2664. x = (coord[0] - im.shape[1] / 2) / zy + im.shape[1] / 2 # only change this
  2665. y = (coord[1] - im.shape[0] / 2) / zx + im.shape[0] / 2 # only change this
  2666. w = coord[2] / zy # only change this
  2667. h = coord[3] / zx # only change thisS
  2668. if x < 0:
  2669. if x + w <= 0:
  2670. return None
  2671. w = w + x
  2672. x = 0
  2673. elif x > im_new.shape[1]: # object outside the cropped image
  2674. return None
  2675. if y < 0:
  2676. if y + h <= 0:
  2677. return None
  2678. h = h + y
  2679. y = 0
  2680. elif y > im_new.shape[0]: # object outside the cropped image
  2681. return None
  2682. if (x is not None) and (x + w > im_new.shape[1]): # box outside the cropped image
  2683. w = im_new.shape[1] - x
  2684. if (y is not None) and (y + h > im_new.shape[0]): # box outside the cropped image
  2685. h = im_new.shape[0] - y
  2686. if (w / (h + 1.) > thresh_wh2) or (h / (w + 1.) > thresh_wh2): # object shape strange: too narrow
  2687. # tl.logging.info('xx', w, h)
  2688. return None
  2689. if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) <
  2690. thresh_wh): # object shape strange: too narrow
  2691. # tl.logging.info('yy', w, im_new.shape[1], h, im_new.shape[0])
  2692. return None
  2693. coord = [x, y, w, h]
  2694. # convert back if input format is center.
  2695. if is_center:
  2696. coord = obj_box_coord_upleft_to_centroid(coord)
  2697. return coord
  2698. coords_new = list()
  2699. classes_new = list()
  2700. for i, _ in enumerate(coords):
  2701. coord = coords[i]
  2702. if len(coord) != 4:
  2703. raise AssertionError("coordinate should be 4 values : [x, y, w, h]")
  2704. if is_rescale:
  2705. # for scaled coord, upscaled before process and scale back in the end.
  2706. coord = obj_box_coord_scale_to_pixelunit(coord, im.shape)
  2707. coord = _get_coord(coord)
  2708. if coord is not None:
  2709. coord = obj_box_coord_rescale(coord, im_new.shape)
  2710. coords_new.append(coord)
  2711. classes_new.append(classes[i])
  2712. else:
  2713. coord = _get_coord(coord)
  2714. if coord is not None:
  2715. coords_new.append(coord)
  2716. classes_new.append(classes[i])
  2717. return im_new, classes_new, coords_new
  2718. def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post', truncating='pre', value=0.):
  2719. """Pads each sequence to the same length:
  2720. the length of the longest sequence.
  2721. If maxlen is provided, any sequence longer
  2722. than maxlen is truncated to maxlen.
  2723. Truncation happens off either the beginning (default) or
  2724. the end of the sequence.
  2725. Supports post-padding and pre-padding (default).
  2726. Parameters
  2727. ----------
  2728. sequences : list of list of int
  2729. All sequences where each row is a sequence.
  2730. maxlen : int
  2731. Maximum length.
  2732. dtype : numpy.dtype or str
  2733. Data type to cast the resulting sequence.
  2734. padding : str
  2735. Either 'pre' or 'post', pad either before or after each sequence.
  2736. truncating : str
  2737. Either 'pre' or 'post', remove values from sequences larger than maxlen either in the beginning or in the end of the sequence
  2738. value : float
  2739. Value to pad the sequences to the desired value.
  2740. Returns
  2741. ----------
  2742. x : numpy.array
  2743. With dimensions (number_of_sequences, maxlen)
  2744. Examples
  2745. ----------
  2746. >>> sequences = [[1,1,1,1,1],[2,2,2],[3,3]]
  2747. >>> sequences = pad_sequences(sequences, maxlen=None, dtype='int32',
  2748. ... padding='post', truncating='pre', value=0.)
  2749. [[1 1 1 1 1]
  2750. [2 2 2 0 0]
  2751. [3 3 0 0 0]]
  2752. """
  2753. lengths = [len(s) for s in sequences]
  2754. nb_samples = len(sequences)
  2755. if maxlen is None:
  2756. maxlen = np.max(lengths)
  2757. # take the sample shape from the first non empty sequence
  2758. # checking for consistency in the main loop below.
  2759. sample_shape = tuple()
  2760. for s in sequences:
  2761. if len(s) > 0:
  2762. sample_shape = np.asarray(s).shape[1:]
  2763. break
  2764. x = (np.ones((nb_samples, maxlen) + sample_shape) * value).astype(dtype)
  2765. for idx, s in enumerate(sequences):
  2766. if len(s) == 0:
  2767. continue # empty list was found
  2768. if truncating == 'pre':
  2769. trunc = s[-maxlen:]
  2770. elif truncating == 'post':
  2771. trunc = s[:maxlen]
  2772. else:
  2773. raise ValueError('Truncating type "%s" not understood' % truncating)
  2774. # check `trunc` has expected shape
  2775. trunc = np.asarray(trunc, dtype=dtype)
  2776. if trunc.shape[1:] != sample_shape:
  2777. raise ValueError(
  2778. 'Shape of sample %s of sequence at position %s is different from expected shape %s' %
  2779. (trunc.shape[1:], idx, sample_shape)
  2780. )
  2781. if padding == 'post':
  2782. x[idx, :len(trunc)] = trunc
  2783. elif padding == 'pre':
  2784. x[idx, -len(trunc):] = trunc
  2785. else:
  2786. raise ValueError('Padding type "%s" not understood' % padding)
  2787. return x.tolist()
  2788. def remove_pad_sequences(sequences, pad_id=0):
  2789. """Remove padding.
  2790. Parameters
  2791. -----------
  2792. sequences : list of list of int
  2793. All sequences where each row is a sequence.
  2794. pad_id : int
  2795. The pad ID.
  2796. Returns
  2797. ----------
  2798. list of list of int
  2799. The processed sequences.
  2800. Examples
  2801. ----------
  2802. >>> sequences = [[2,3,4,0,0], [5,1,2,3,4,0,0,0], [4,5,0,2,4,0,0,0]]
  2803. >>> print(remove_pad_sequences(sequences, pad_id=0))
  2804. [[2, 3, 4], [5, 1, 2, 3, 4], [4, 5, 0, 2, 4]]
  2805. """
  2806. sequences_out = copy.deepcopy(sequences)
  2807. for i, _ in enumerate(sequences):
  2808. # for j in range(len(sequences[i])):
  2809. # if sequences[i][j] == pad_id:
  2810. # sequences_out[i] = sequences_out[i][:j]
  2811. # break
  2812. for j in range(1, len(sequences[i])):
  2813. if sequences[i][-j] != pad_id:
  2814. sequences_out[i] = sequences_out[i][0:-j + 1]
  2815. break
  2816. return sequences_out
  2817. def process_sequences(sequences, end_id=0, pad_val=0, is_shorten=True, remain_end_id=False):
  2818. """Set all tokens(ids) after END token to the padding value, and then shorten (option) it to the maximum sequence length in this batch.
  2819. Parameters
  2820. -----------
  2821. sequences : list of list of int
  2822. All sequences where each row is a sequence.
  2823. end_id : int
  2824. The special token for END.
  2825. pad_val : int
  2826. Replace the `end_id` and the IDs after `end_id` to this value.
  2827. is_shorten : boolean
  2828. Shorten the sequences. Default is True.
  2829. remain_end_id : boolean
  2830. Keep an `end_id` in the end. Default is False.
  2831. Returns
  2832. ----------
  2833. list of list of int
  2834. The processed sequences.
  2835. Examples
  2836. ---------
  2837. >>> sentences_ids = [[4, 3, 5, 3, 2, 2, 2, 2], <-- end_id is 2
  2838. ... [5, 3, 9, 4, 9, 2, 2, 3]] <-- end_id is 2
  2839. >>> sentences_ids = precess_sequences(sentences_ids, end_id=vocab.end_id, pad_val=0, is_shorten=True)
  2840. [[4, 3, 5, 3, 0], [5, 3, 9, 4, 9]]
  2841. """
  2842. max_length = 0
  2843. for _, seq in enumerate(sequences):
  2844. is_end = False
  2845. for i_w, n in enumerate(seq):
  2846. if n == end_id and is_end == False: # 1st time to see end_id
  2847. is_end = True
  2848. if max_length < i_w:
  2849. max_length = i_w
  2850. if remain_end_id is False:
  2851. seq[i_w] = pad_val # set end_id to pad_val
  2852. elif is_end ==True:
  2853. seq[i_w] = pad_val
  2854. if remain_end_id is True:
  2855. max_length += 1
  2856. if is_shorten:
  2857. for i, seq in enumerate(sequences):
  2858. sequences[i] = seq[:max_length]
  2859. return sequences
  2860. def sequences_add_start_id(sequences, start_id=0, remove_last=False):
  2861. """Add special start token(id) in the beginning of each sequence.
  2862. Parameters
  2863. ------------
  2864. sequences : list of list of int
  2865. All sequences where each row is a sequence.
  2866. start_id : int
  2867. The start ID.
  2868. remove_last : boolean
  2869. Remove the last value of each sequences. Usually be used for removing the end ID.
  2870. Returns
  2871. ----------
  2872. list of list of int
  2873. The processed sequences.
  2874. Examples
  2875. ---------
  2876. >>> sentences_ids = [[4,3,5,3,2,2,2,2], [5,3,9,4,9,2,2,3]]
  2877. >>> sentences_ids = sequences_add_start_id(sentences_ids, start_id=2)
  2878. [[2, 4, 3, 5, 3, 2, 2, 2, 2], [2, 5, 3, 9, 4, 9, 2, 2, 3]]
  2879. >>> sentences_ids = sequences_add_start_id(sentences_ids, start_id=2, remove_last=True)
  2880. [[2, 4, 3, 5, 3, 2, 2, 2], [2, 5, 3, 9, 4, 9, 2, 2]]
  2881. For Seq2seq
  2882. >>> input = [a, b, c]
  2883. >>> target = [x, y, z]
  2884. >>> decode_seq = [start_id, a, b] <-- sequences_add_start_id(input, start_id, True)
  2885. """
  2886. sequences_out = [[] for _ in range(len(sequences))] #[[]] * len(sequences)
  2887. for i, _ in enumerate(sequences):
  2888. if remove_last:
  2889. sequences_out[i] = [start_id] + sequences[i][:-1]
  2890. else:
  2891. sequences_out[i] = [start_id] + sequences[i]
  2892. return sequences_out
  2893. def sequences_add_end_id(sequences, end_id=888):
  2894. """Add special end token(id) in the end of each sequence.
  2895. Parameters
  2896. -----------
  2897. sequences : list of list of int
  2898. All sequences where each row is a sequence.
  2899. end_id : int
  2900. The end ID.
  2901. Returns
  2902. ----------
  2903. list of list of int
  2904. The processed sequences.
  2905. Examples
  2906. ---------
  2907. >>> sequences = [[1,2,3],[4,5,6,7]]
  2908. >>> print(sequences_add_end_id(sequences, end_id=999))
  2909. [[1, 2, 3, 999], [4, 5, 6, 999]]
  2910. """
  2911. sequences_out = [[] for _ in range(len(sequences))] #[[]] * len(sequences)
  2912. for i, _ in enumerate(sequences):
  2913. sequences_out[i] = sequences[i] + [end_id]
  2914. return sequences_out
  2915. def sequences_add_end_id_after_pad(sequences, end_id=888, pad_id=0):
  2916. """Add special end token(id) in the end of each sequence.
  2917. Parameters
  2918. -----------
  2919. sequences : list of list of int
  2920. All sequences where each row is a sequence.
  2921. end_id : int
  2922. The end ID.
  2923. pad_id : int
  2924. The pad ID.
  2925. Returns
  2926. ----------
  2927. list of list of int
  2928. The processed sequences.
  2929. Examples
  2930. ---------
  2931. >>> sequences = [[1,2,0,0], [1,2,3,0], [1,2,3,4]]
  2932. >>> print(sequences_add_end_id_after_pad(sequences, end_id=99, pad_id=0))
  2933. [[1, 2, 99, 0], [1, 2, 3, 99], [1, 2, 3, 4]]
  2934. """
  2935. # sequences_out = [[] for _ in range(len(sequences))]#[[]] * len(sequences)
  2936. sequences_out = copy.deepcopy(sequences)
  2937. # # add a pad to all
  2938. # for i in range(len(sequences)):
  2939. # for j in range(len(sequences[i])):
  2940. # sequences_out[i].append(pad_id)
  2941. # # pad -- > end
  2942. # max_len = 0
  2943. for i, v in enumerate(sequences):
  2944. for j, _v2 in enumerate(v):
  2945. if sequences[i][j] == pad_id:
  2946. sequences_out[i][j] = end_id
  2947. # if j > max_len:
  2948. # max_len = j
  2949. break
  2950. # # remove pad if too long
  2951. # for i in range(len(sequences)):
  2952. # for j in range(len(sequences[i])):
  2953. # sequences_out[i] = sequences_out[i][:max_len+1]
  2954. return sequences_out
  2955. def sequences_get_mask(sequences, pad_val=0):
  2956. """Return mask for sequences.
  2957. Parameters
  2958. -----------
  2959. sequences : list of list of int
  2960. All sequences where each row is a sequence.
  2961. pad_val : int
  2962. The pad value.
  2963. Returns
  2964. ----------
  2965. list of list of int
  2966. The mask.
  2967. Examples
  2968. ---------
  2969. >>> sentences_ids = [[4, 0, 5, 3, 0, 0],
  2970. ... [5, 3, 9, 4, 9, 0]]
  2971. >>> mask = sequences_get_mask(sentences_ids, pad_val=0)
  2972. [[1 1 1 1 0 0]
  2973. [1 1 1 1 1 0]]
  2974. """
  2975. mask = np.ones_like(sequences)
  2976. for i, seq in enumerate(sequences):
  2977. for i_w in reversed(range(len(seq))):
  2978. if seq[i_w] == pad_val:
  2979. mask[i, i_w] = 0
  2980. else:
  2981. break # <-- exit the for loop, prepcess next sequence
  2982. return mask
  2983. def keypoint_random_crop(image, annos, mask=None, size=(368, 368)):
  2984. """Randomly crop an image and corresponding keypoints without influence scales, given by ``keypoint_random_resize_shortestedge``.
  2985. Parameters
  2986. -----------
  2987. image : 3 channel image
  2988. The given image for augmentation.
  2989. annos : list of list of floats
  2990. The keypoints annotation of people.
  2991. mask : single channel image or None
  2992. The mask if available.
  2993. size : tuple of int
  2994. The size of returned image.
  2995. Returns
  2996. ----------
  2997. preprocessed image, annotation, mask
  2998. """
  2999. _target_height = size[0]
  3000. _target_width = size[1]
  3001. target_size = (_target_width, _target_height)
  3002. if len(np.shape(image)) == 2:
  3003. image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
  3004. height, width, _ = np.shape(image)
  3005. for _ in range(50):
  3006. x = random.randrange(0, width - target_size[0]) if width > target_size[0] else 0
  3007. y = random.randrange(0, height - target_size[1]) if height > target_size[1] else 0
  3008. # check whether any face is inside the box to generate a reasonably-balanced datasets
  3009. for joint in annos:
  3010. if x <= joint[0][0] < x + target_size[0] and y <= joint[0][1] < y + target_size[1]:
  3011. break
  3012. def pose_crop(image, annos, mask, x, y, w, h): # TODO : speed up with affine transform
  3013. # adjust image
  3014. target_size = (w, h)
  3015. img = image
  3016. resized = img[y:y + target_size[1], x:x + target_size[0], :]
  3017. resized_mask = mask[y:y + target_size[1], x:x + target_size[0]]
  3018. # adjust meta data
  3019. adjust_joint_list = []
  3020. for joint in annos:
  3021. adjust_joint = []
  3022. for point in joint:
  3023. if point[0] < -10 or point[1] < -10:
  3024. adjust_joint.append((-1000, -1000))
  3025. continue
  3026. new_x, new_y = point[0] - x, point[1] - y
  3027. # should not crop outside the image
  3028. if new_x > w - 1 or new_y > h - 1:
  3029. adjust_joint.append((-1000, -1000))
  3030. continue
  3031. adjust_joint.append((new_x, new_y))
  3032. adjust_joint_list.append(adjust_joint)
  3033. return resized, adjust_joint_list, resized_mask
  3034. return pose_crop(image, annos, mask, x, y, target_size[0], target_size[1])
  3035. def keypoint_resize_random_crop(image, annos, mask=None, size=(368, 368)):
  3036. """Reszie the image to make either its width or height equals to the given sizes.
  3037. Then randomly crop image without influence scales.
  3038. Resize the image match with the minimum size before cropping, this API will change the zoom scale of object.
  3039. Parameters
  3040. -----------
  3041. image : 3 channel image
  3042. The given image for augmentation.
  3043. annos : list of list of floats
  3044. The keypoints annotation of people.
  3045. mask : single channel image or None
  3046. The mask if available.
  3047. size : tuple of int
  3048. The size (height, width) of returned image.
  3049. Returns
  3050. ----------
  3051. preprocessed image, annos, mask
  3052. """
  3053. if len(np.shape(image)) == 2:
  3054. image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
  3055. def resize_image(image, annos, mask, target_width, target_height):
  3056. """Reszie image
  3057. Parameters
  3058. -----------
  3059. image : 3 channel image
  3060. The given image.
  3061. annos : list of list of floats
  3062. Keypoints of people
  3063. mask : single channel image or None
  3064. The mask if available.
  3065. target_width : int
  3066. Expected width of returned image.
  3067. target_height : int
  3068. Expected height of returned image.
  3069. Returns
  3070. ----------
  3071. preprocessed input image, annos, mask
  3072. """
  3073. y, x, _ = np.shape(image)
  3074. ratio_y = target_height / y
  3075. ratio_x = target_width / x
  3076. new_joints = []
  3077. # update meta
  3078. for people in annos:
  3079. new_keypoints = []
  3080. for keypoints in people:
  3081. if keypoints[0] < 0 or keypoints[1] < 0:
  3082. new_keypoints.append((-1000, -1000))
  3083. continue
  3084. pts = (int(keypoints[0] * ratio_x + 0.5), int(keypoints[1] * ratio_y + 0.5))
  3085. if pts[0] > target_width - 1 or pts[1] > target_height - 1:
  3086. new_keypoints.append((-1000, -1000))
  3087. continue
  3088. new_keypoints.append(pts)
  3089. new_joints.append(new_keypoints)
  3090. annos = new_joints
  3091. new_image = cv2.resize(image, (target_width, target_height), interpolation=cv2.INTER_AREA)
  3092. if mask is not None:
  3093. new_mask = cv2.resize(mask, (target_width, target_height), interpolation=cv2.INTER_AREA)
  3094. return new_image, annos, new_mask
  3095. else:
  3096. return new_image, annos, None
  3097. _target_height = size[0]
  3098. _target_width = size[1]
  3099. if len(np.shape(image)) == 2:
  3100. image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
  3101. input_height, input_width, _ = np.shape(image)
  3102. vertical_ratio = _target_height / input_height
  3103. horizontal_ratio = _target_width / input_width
  3104. rescale_ratio = max(vertical_ratio, horizontal_ratio)
  3105. image, annos, mask = resize_image(
  3106. image, annos, mask, round(input_width * rescale_ratio), round(input_height * rescale_ratio)
  3107. )
  3108. # At this point we should have input image which matches at least target
  3109. # height or target width, while the other dimensions larger than target.
  3110. new_height, new_width, _ = np.shape(image)
  3111. if new_height > _target_height:
  3112. crop_range_y = np.random.randint(0, new_height - _target_height)
  3113. image = image[crop_range_y:crop_range_y + _target_height, :, :]
  3114. if mask is not None:
  3115. mask = mask[crop_range_y:crop_range_y + _target_height, :]
  3116. new_joints = []
  3117. for people in annos: # TODO : speed up with affine transform
  3118. new_keypoints = []
  3119. for keypoints in people:
  3120. # case orginal points are not usable
  3121. if keypoints[1] >= crop_range_y and keypoints[1] <= crop_range_y + _target_height - 1:
  3122. pts = (int(keypoints[0]), int(keypoints[1] - crop_range_y))
  3123. else:
  3124. pts = (-1000, -1000)
  3125. new_keypoints.append(pts)
  3126. new_joints.append(new_keypoints)
  3127. annos = new_joints
  3128. elif new_width > _target_width:
  3129. crop_range_x = np.random.randint(0, new_width - _target_width)
  3130. image = image[:, crop_range_x:crop_range_x + _target_width, :]
  3131. if mask is not None:
  3132. mask = mask[:, crop_range_x:crop_range_x + _target_width]
  3133. new_joints = []
  3134. for people in annos:
  3135. new_keypoints = []
  3136. for keypoints in people:
  3137. # case orginal points are not usable
  3138. if keypoints[0] >= crop_range_x and keypoints[0] <= crop_range_x + _target_width - 1:
  3139. pts = (int(keypoints[0] - crop_range_x), int(keypoints[1]))
  3140. else:
  3141. pts = (-1000, -1000)
  3142. new_keypoints.append(pts)
  3143. new_joints.append(new_keypoints)
  3144. annos = new_joints
  3145. if mask is not None:
  3146. return image, annos, mask
  3147. else:
  3148. return image, annos, None
  3149. def keypoint_random_rotate(image, annos, mask=None, rg=15.):
  3150. """Rotate an image and corresponding keypoints.
  3151. Parameters
  3152. -----------
  3153. image : 3 channel image
  3154. The given image for augmentation.
  3155. annos : list of list of floats
  3156. The keypoints annotation of people.
  3157. mask : single channel image or None
  3158. The mask if available.
  3159. rg : int or float
  3160. Degree to rotate, usually 0 ~ 180.
  3161. Returns
  3162. ----------
  3163. preprocessed image, annos, mask
  3164. """
  3165. def _rotate_coord(shape, newxy, point, angle):
  3166. angle = -1 * angle / 180.0 * math.pi
  3167. ox, oy = shape
  3168. px, py = point
  3169. ox /= 2
  3170. oy /= 2
  3171. qx = math.cos(angle) * (px - ox) - math.sin(angle) * (py - oy)
  3172. qy = math.sin(angle) * (px - ox) + math.cos(angle) * (py - oy)
  3173. new_x, new_y = newxy
  3174. qx += ox - new_x
  3175. qy += oy - new_y
  3176. return int(qx + 0.5), int(qy + 0.5)
  3177. def _largest_rotated_rect(w, h, angle):
  3178. """
  3179. Get largest rectangle after rotation.
  3180. http://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders
  3181. """
  3182. angle = angle / 180.0 * math.pi
  3183. if w <= 0 or h <= 0:
  3184. return 0, 0
  3185. width_is_longer = w >= h
  3186. side_long, side_short = (w, h) if width_is_longer else (h, w)
  3187. # since the solutions for angle, -angle and 180-angle are all the same,
  3188. # if suffices to look at the first quadrant and the absolute values of sin,cos:
  3189. sin_a, cos_a = abs(math.sin(angle)), abs(math.cos(angle))
  3190. if side_short <= 2. * sin_a * cos_a * side_long:
  3191. # half constrained case: two crop corners touch the longer side,
  3192. # the other two corners are on the mid-line parallel to the longer line
  3193. x = 0.5 * side_short
  3194. wr, hr = (x / sin_a, x / cos_a) if width_is_longer else (x / cos_a, x / sin_a)
  3195. else:
  3196. # fully constrained case: crop touches all 4 sides
  3197. cos_2a = cos_a * cos_a - sin_a * sin_a
  3198. wr, hr = (w * cos_a - h * sin_a) / cos_2a, (h * cos_a - w * sin_a) / cos_2a
  3199. return int(np.round(wr)), int(np.round(hr))
  3200. img_shape = np.shape(image)
  3201. height = img_shape[0]
  3202. width = img_shape[1]
  3203. deg = np.random.uniform(-rg, rg)
  3204. img = image
  3205. center = (img.shape[1] * 0.5, img.shape[0] * 0.5) # x, y
  3206. rot_m = cv2.getRotationMatrix2D((int(center[0]), int(center[1])), deg, 1)
  3207. ret = cv2.warpAffine(img, rot_m, img.shape[1::-1], flags=cv2.INTER_AREA, borderMode=cv2.BORDER_CONSTANT)
  3208. if img.ndim == 3 and ret.ndim == 2:
  3209. ret = ret[:, :, np.newaxis]
  3210. neww, newh = _largest_rotated_rect(ret.shape[1], ret.shape[0], deg)
  3211. neww = min(neww, ret.shape[1])
  3212. newh = min(newh, ret.shape[0])
  3213. newx = int(center[0] - neww * 0.5)
  3214. newy = int(center[1] - newh * 0.5)
  3215. # print(ret.shape, deg, newx, newy, neww, newh)
  3216. img = ret[newy:newy + newh, newx:newx + neww]
  3217. # adjust meta data
  3218. adjust_joint_list = []
  3219. for joint in annos: # TODO : speed up with affine transform
  3220. adjust_joint = []
  3221. for point in joint:
  3222. if point[0] < -100 or point[1] < -100:
  3223. adjust_joint.append((-1000, -1000))
  3224. continue
  3225. x, y = _rotate_coord((width, height), (newx, newy), point, deg)
  3226. if x > neww - 1 or y > newh - 1:
  3227. adjust_joint.append((-1000, -1000))
  3228. continue
  3229. if x < 0 or y < 0:
  3230. adjust_joint.append((-1000, -1000))
  3231. continue
  3232. adjust_joint.append((x, y))
  3233. adjust_joint_list.append(adjust_joint)
  3234. joint_list = adjust_joint_list
  3235. if mask is not None:
  3236. msk = mask
  3237. center = (msk.shape[1] * 0.5, msk.shape[0] * 0.5) # x, y
  3238. rot_m = cv2.getRotationMatrix2D((int(center[0]), int(center[1])), deg, 1)
  3239. ret = cv2.warpAffine(msk, rot_m, msk.shape[1::-1], flags=cv2.INTER_AREA, borderMode=cv2.BORDER_CONSTANT)
  3240. if msk.ndim == 3 and msk.ndim == 2:
  3241. ret = ret[:, :, np.newaxis]
  3242. neww, newh = _largest_rotated_rect(ret.shape[1], ret.shape[0], deg)
  3243. neww = min(neww, ret.shape[1])
  3244. newh = min(newh, ret.shape[0])
  3245. newx = int(center[0] - neww * 0.5)
  3246. newy = int(center[1] - newh * 0.5)
  3247. # print(ret.shape, deg, newx, newy, neww, newh)
  3248. msk = ret[newy:newy + newh, newx:newx + neww]
  3249. return img, joint_list, msk
  3250. else:
  3251. return img, joint_list, None
  3252. def keypoint_random_flip(
  3253. image, annos, mask=None, prob=0.5, flip_list=(0, 1, 5, 6, 7, 2, 3, 4, 11, 12, 13, 8, 9, 10, 15, 14, 17, 16, 18)
  3254. ):
  3255. """Flip an image and corresponding keypoints.
  3256. Parameters
  3257. -----------
  3258. image : 3 channel image
  3259. The given image for augmentation.
  3260. annos : list of list of floats
  3261. The keypoints annotation of people.
  3262. mask : single channel image or None
  3263. The mask if available.
  3264. prob : float, 0 to 1
  3265. The probability to flip the image, if 1, always flip the image.
  3266. flip_list : tuple of int
  3267. Denotes how the keypoints number be changed after flipping which is required for pose estimation task.
  3268. The left and right body should be maintained rather than switch.
  3269. (Default COCO format).
  3270. Set to an empty tuple if you don't need to maintain left and right information.
  3271. Returns
  3272. ----------
  3273. preprocessed image, annos, mask
  3274. """
  3275. _prob = np.random.uniform(0, 1.0)
  3276. if _prob < prob:
  3277. return image, annos, mask
  3278. _, width, _ = np.shape(image)
  3279. image = cv2.flip(image, 1)
  3280. mask = cv2.flip(mask, 1)
  3281. new_joints = []
  3282. for people in annos: # TODO : speed up with affine transform
  3283. new_keypoints = []
  3284. for k in flip_list:
  3285. point = people[k]
  3286. if point[0] < 0 or point[1] < 0:
  3287. new_keypoints.append((-1000, -1000))
  3288. continue
  3289. if point[0] > image.shape[1] - 1 or point[1] > image.shape[0] - 1:
  3290. new_keypoints.append((-1000, -1000))
  3291. continue
  3292. if (width - point[0]) > image.shape[1] - 1:
  3293. new_keypoints.append((-1000, -1000))
  3294. continue
  3295. new_keypoints.append((width - point[0], point[1]))
  3296. new_joints.append(new_keypoints)
  3297. annos = new_joints
  3298. return image, annos, mask
  3299. def keypoint_random_resize(image, annos, mask=None, zoom_range=(0.8, 1.2)):
  3300. """Randomly resize an image and corresponding keypoints.
  3301. The height and width of image will be changed independently, so the scale will be changed.
  3302. Parameters
  3303. -----------
  3304. image : 3 channel image
  3305. The given image for augmentation.
  3306. annos : list of list of floats
  3307. The keypoints annotation of people.
  3308. mask : single channel image or None
  3309. The mask if available.
  3310. zoom_range : tuple of two floats
  3311. The minimum and maximum factor to zoom in or out, e.g (0.5, 1) means zoom out 1~2 times.
  3312. Returns
  3313. ----------
  3314. preprocessed image, annos, mask
  3315. """
  3316. height = image.shape[0]
  3317. width = image.shape[1]
  3318. _min, _max = zoom_range
  3319. scalew = np.random.uniform(_min, _max)
  3320. scaleh = np.random.uniform(_min, _max)
  3321. neww = int(width * scalew)
  3322. newh = int(height * scaleh)
  3323. dst = cv2.resize(image, (neww, newh), interpolation=cv2.INTER_AREA)
  3324. if mask is not None:
  3325. mask = cv2.resize(mask, (neww, newh), interpolation=cv2.INTER_AREA)
  3326. # adjust meta data
  3327. adjust_joint_list = []
  3328. for joint in annos: # TODO : speed up with affine transform
  3329. adjust_joint = []
  3330. for point in joint:
  3331. if point[0] < -100 or point[1] < -100:
  3332. adjust_joint.append((-1000, -1000))
  3333. continue
  3334. adjust_joint.append((int(point[0] * scalew + 0.5), int(point[1] * scaleh + 0.5)))
  3335. adjust_joint_list.append(adjust_joint)
  3336. if mask is not None:
  3337. return dst, adjust_joint_list, mask
  3338. else:
  3339. return dst, adjust_joint_list, None
  3340. def keypoint_random_resize_shortestedge(
  3341. image, annos, mask=None, min_size=(368, 368), zoom_range=(0.8, 1.2), pad_val=(0, 0, np.random.uniform(0.0, 1.0))
  3342. ):
  3343. """Randomly resize an image and corresponding keypoints based on shorter edgeself.
  3344. If the resized image is smaller than `min_size`, uses padding to make shape matchs `min_size`.
  3345. The height and width of image will be changed together, the scale would not be changed.
  3346. Parameters
  3347. -----------
  3348. image : 3 channel image
  3349. The given image for augmentation.
  3350. annos : list of list of floats
  3351. The keypoints annotation of people.
  3352. mask : single channel image or None
  3353. The mask if available.
  3354. min_size : tuple of two int
  3355. The minimum size of height and width.
  3356. zoom_range : tuple of two floats
  3357. The minimum and maximum factor to zoom in or out, e.g (0.5, 1) means zoom out 1~2 times.
  3358. pad_val : int/float, or tuple of int or random function
  3359. The three padding values for RGB channels respectively.
  3360. Returns
  3361. ----------
  3362. preprocessed image, annos, mask
  3363. """
  3364. _target_height = min_size[0]
  3365. _target_width = min_size[1]
  3366. if len(np.shape(image)) == 2:
  3367. image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
  3368. height, width, _ = np.shape(image)
  3369. ratio_w = _target_width / width
  3370. ratio_h = _target_height / height
  3371. ratio = min(ratio_w, ratio_h)
  3372. target_size = int(min(width * ratio + 0.5, height * ratio + 0.5))
  3373. random_target = np.random.uniform(zoom_range[0], zoom_range[1])
  3374. target_size = int(target_size * random_target)
  3375. # target_size = int(min(_network_w, _network_h) * random.uniform(0.7, 1.5))
  3376. def pose_resize_shortestedge(image, annos, mask, target_size):
  3377. """ """
  3378. # _target_height = 368
  3379. # _target_width = 368
  3380. # img = image
  3381. height, width, _ = np.shape(image)
  3382. # adjust image
  3383. scale = target_size / min(height, width)
  3384. if height < width:
  3385. newh, neww = target_size, int(scale * width + 0.5)
  3386. else:
  3387. newh, neww = int(scale * height + 0.5), target_size
  3388. dst = cv2.resize(image, (neww, newh), interpolation=cv2.INTER_AREA)
  3389. mask = cv2.resize(mask, (neww, newh), interpolation=cv2.INTER_AREA)
  3390. pw = ph = 0
  3391. if neww < _target_width or newh < _target_height:
  3392. pw = max(0, (_target_width - neww) // 2)
  3393. ph = max(0, (_target_height - newh) // 2)
  3394. mw = (_target_width - neww) % 2
  3395. mh = (_target_height - newh) % 2
  3396. # color = np.random.uniform(0.0, 1.0)
  3397. dst = cv2.copyMakeBorder(dst, ph, ph + mh, pw, pw + mw, cv2.BORDER_CONSTANT, value=pad_val) #(0, 0, color))
  3398. if mask is not None:
  3399. mask = cv2.copyMakeBorder(mask, ph, ph + mh, pw, pw + mw, cv2.BORDER_CONSTANT, value=1)
  3400. # adjust meta data
  3401. adjust_joint_list = []
  3402. for joint in annos: # TODO : speed up with affine transform
  3403. adjust_joint = []
  3404. for point in joint:
  3405. if point[0] < -100 or point[1] < -100:
  3406. adjust_joint.append((-1000, -1000))
  3407. continue
  3408. # if point[0] <= 0 or point[1] <= 0 or int(point[0]*scale+0.5) > neww or int(point[1]*scale+0.5) > newh:
  3409. # adjust_joint.append((-1, -1))
  3410. # continue
  3411. adjust_joint.append((int(point[0] * scale + 0.5) + pw, int(point[1] * scale + 0.5) + ph))
  3412. adjust_joint_list.append(adjust_joint)
  3413. if mask is not None:
  3414. return dst, adjust_joint_list, mask
  3415. else:
  3416. return dst, adjust_joint_list, None
  3417. return pose_resize_shortestedge(image, annos, mask, target_size)

TensorLayer3.0 是一款兼容多种深度学习框架为计算后端的深度学习库。计划兼容TensorFlow, Pytorch, MindSpore, Paddle.