You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensorflow_data.py 10 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. #! /usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import tensorflow as tf
  4. import tensorlayer as tl
  5. import numpy as np
  6. __all__ = [
  7. 'Batch',
  8. 'Concat',
  9. 'FromGenerator',
  10. 'FromSlices',
  11. 'Map',
  12. 'Repeat',
  13. 'Shuffle',
  14. 'Zip',
  15. 'Dataloader',
  16. 'Dataset',
  17. 'IterableDataset',
  18. ]
  19. class Dataset(object):
  20. """An abstract class to encapsulate methods and behaviors of datasets.
  21. All datasets in map-style(dataset samples can be get by a given key) should be a subclass of 'tensorlayer.dataflow.Dataset'.
  22. ALl subclasses should implement following methods:
  23. :code:`__getitem__`: get sample from dataset with a given index.
  24. :code:`__len__`: return dataset sample number.
  25. Examples
  26. ----------
  27. With TensorLayer
  28. >>> from tensorlayer.dataflow import Dataset
  29. >>> class mnistdataset(Dataset):
  30. >>> def __init__(self, data, label,transform):
  31. >>> self.data = data
  32. >>> self.label = label
  33. >>> self.transform = transform
  34. >>> def __getitem__(self, index):
  35. >>> data = self.data[index].astype('float32')
  36. >>> data = self.transform(data)
  37. >>> label = self.label[index].astype('int64')
  38. >>> return data, label
  39. >>> def __len__(self):
  40. >>> return len(self.data)
  41. >>> train_dataset = mnistdataset(data = X_train, label = y_train ,transform = transform)
  42. """
  43. def __init__(self):
  44. pass
  45. def __call__(self):
  46. return self
  47. def __getitem__(self, idx):
  48. raise NotImplementedError("'{}' not implement in class "\
  49. "{}".format('__getitem__', self.__class__.__name__))
  50. def __len__(self):
  51. raise NotImplementedError("'{}' not implement in class "\
  52. "{}".format('__len__', self.__class__.__name__))
  53. class IterableDataset(object):
  54. """An abstract class to encapsulate methods and behaviors of iterable datasets.
  55. All datasets in iterable-style (can only get sample one by one sequentially, likea Python iterator) should be a subclass of `tensorlayer.dataflow.IterableDataset`.
  56. All subclasses should implement following methods:
  57. :code:`__iter__`: yield sample sequentially.
  58. Examples
  59. ----------
  60. With TensorLayer
  61. >>> class mnistdataset(IterableDataset):
  62. >>> def __init__(self, data, label,transform):
  63. >>> self.data = data
  64. >>> self.label = label
  65. >>> self.transform = transform
  66. >>> def __iter__(self):
  67. >>> for i in range(len(self.data)):
  68. >>> data = self.data[i].astype('float32')
  69. >>> data = self.transform(data)
  70. >>> label = self.label[i].astype('int64')
  71. >>> yield data, label
  72. >>> train_dataset = mnistdataset(data = X_train, label = y_train ,transform = transform)
  73. """
  74. def __init__(self):
  75. pass
  76. def __call__(self):
  77. return self
  78. def __iter__(self):
  79. raise NotImplementedError("'{}' not implement in class "\
  80. "{}".format('__iter__', self.__class__.__name__))
  81. def FromGenerator(generator, output_types, column_names=None):
  82. """Creates a `Dataset` whose elements are generated by `generator`.
  83. Parameters
  84. ----------
  85. generator: Callable or Iterable
  86. A generator callable object or an iterable Python object.
  87. output_types: list or tuple
  88. Set output data type. This parameter not support in MindSpore backend and Paddle backend.
  89. column_names: list or tuple
  90. column names of the dataset. This parameter not support in TensorFlow backend and Paddle backend.
  91. Returns
  92. -------
  93. Dataset
  94. A Dataset.
  95. Examples
  96. ----------
  97. With TensorLayer
  98. >>> train_dataset = mnistdataset(data = X_train, label = y_train ,transform = transform)
  99. >>> train_dataset = tl.dataflow.FromGenerator(train_dataset, output_types=[tl.float32, tl.int64], column_names=['data', 'label'])
  100. """
  101. output_types = tuple(output_types)
  102. return tf.data.Dataset.from_generator(generator, output_types=output_types)
  103. def Batch(dataset, batch_size, drop_last=False):
  104. """Combine batch_size number of consecutive rows into batches.This function not implement in Paddle backend.
  105. Parameters
  106. ----------
  107. dataset:
  108. A dataset.
  109. batch_size: int
  110. Sample number in a mini-batch.
  111. drop_last: boolean
  112. whether drop the last incomplete batch dataset size is not divisible by the batch size.
  113. Returns
  114. -------
  115. Dataset
  116. A batchDataset.
  117. """
  118. return dataset.batch(batch_size=batch_size, drop_remainder=drop_last)
  119. def Concat(datasets):
  120. """Concatenate the datasets in the input list of datasets.
  121. Parameters
  122. ----------
  123. datasets: dataset
  124. A list of datasets.
  125. Returns
  126. -------
  127. Dataset
  128. datasets concatenated.
  129. Examples
  130. ----------
  131. With TensorLayer
  132. >>> dataset = tl.dataflow.Concat([dataset1, dataset2])
  133. """
  134. dataset_num = len(datasets)
  135. dataset = datasets[0]
  136. for i in range(1, dataset_num):
  137. dataset.concatenate(datasets[i])
  138. return dataset
  139. def FromSlices(datas, column_names=None):
  140. """Creates a dataset with given data slices.
  141. Parameters
  142. ----------
  143. datas: list or tuple
  144. Each data should be in shape of [N, …], while N is the sample number.
  145. Input data will be sliced along the first dimension and generate additional rows
  146. column_names: list
  147. List of column names of the dataset. This parameter not support in TensorFlow backend and Paddle backend.
  148. Returns
  149. -------
  150. Dataset
  151. A dataset.
  152. Examples
  153. ----------
  154. With TensorLayer
  155. >>> dataset = tl.dataflow.FromSlices([data1, data2])
  156. """
  157. return tf.data.Dataset.from_tensor_slices(datas)
  158. def Map(dataset, map_func, input_columns=None):
  159. """ Maps map_func across the elements of this dataset. This function not implement in Paddle backend.
  160. Parameters
  161. ----------
  162. dataset : Dataset
  163. A dataset to map.
  164. map_func : function
  165. A function mapping a dataset element to another dataset element.
  166. input_columns: list
  167. List of column names of the dataset to map. This parameter not support in TensorFlow backend.
  168. Returns
  169. -------
  170. Dataset
  171. A mapped dataset.
  172. Examples
  173. ----------
  174. With TensorLayer
  175. >>> dataset = tl.dataflow.Map(dataset, map_func)
  176. """
  177. return dataset.map(map_func)
  178. def Repeat(dataset, count=None):
  179. """ Repeat this dataset count times. This function not implement in Paddle backend.
  180. Parameters
  181. ----------
  182. dataset : Dataset
  183. A dataset to repeat.
  184. count : int
  185. The number of times the dataset should be repeated. The default behavior (if count is None or -1) is for the dataset be repeated indefinitely.
  186. Returns
  187. -------
  188. Dataset
  189. A repeated dataset.
  190. Examples
  191. ----------
  192. With TensorLayer
  193. >>> dataset = tl.dataflow.Repeat(dataset, 2)
  194. """
  195. return dataset.repeat(count=count)
  196. def Shuffle(dataset, buffer_size):
  197. """ Randomly shuffles the elements of this dataset.This function not implement in Paddle backend.
  198. Parameters
  199. ----------
  200. dataset : Dataset
  201. A dataset to shuffle.
  202. buffer_size : int
  203. The number of elements from this dataset from which the new dataset will sample.
  204. Returns
  205. -------
  206. Dataset
  207. A shuffled dataset.
  208. Examples
  209. ----------
  210. With TensorLayer
  211. >>> dataset = tl.dataflow.Shuffle(dataset, 2000)
  212. """
  213. return dataset.shuffle(buffer_size, seed=None, reshuffle_each_iteration=True)
  214. def Zip(datasets):
  215. """ Creates a Dataset by zipping together the given datasets.This function not implement in Paddle backend.
  216. Parameters
  217. ----------
  218. datasets : list
  219. A list of datasets to zip.
  220. Returns
  221. -------
  222. Dataset
  223. A zip dataset.
  224. Examples
  225. ----------
  226. With TensorLayer
  227. >>> dataset = tl.dataflow.Zip([dataset1, dataset2])
  228. """
  229. return tf.data.Dataset.zip(datasets)
  230. def Dataloader(dataset, batch_size, shuffle=False, drop_last=False, shuffle_buffer_size=10000):
  231. """ Creates a Datasetloader to trian network. We recommend using this function.
  232. Parameters
  233. ----------
  234. dataset : Dataset
  235. the dataset to load data from.
  236. batch_size: int or None
  237. sample number in a mini-batch.
  238. shuffle: boolean
  239. whther to shuffle indices order before genrate batch indices.
  240. drop_last: boolean
  241. whether drop the last incomplete batch dataset size is not divisible by the batch size.
  242. shuffle_buffer_size: int
  243. The number of elements from this dataset from which the new dataset will sample. This parameter not support in Paddle backend.
  244. Returns
  245. -------
  246. DataLoader
  247. an iterable object for data iterating, each elemnet of the generated data is a Tensor.
  248. Examples
  249. ----------
  250. With TensorLayer
  251. >>> from tensorlayer.dataflow import Dataset
  252. >>> class mnistdataset(Dataset):
  253. >>> def __init__(self, data, label,transform):
  254. >>> self.data = data
  255. >>> self.label = label
  256. >>> self.transform = transform
  257. >>> def __getitem__(self, index):
  258. >>> data = self.data[index].astype('float32')
  259. >>> data = self.transform(data)
  260. >>> label = self.label[index].astype('int64')
  261. >>> return data, label
  262. >>> def __len__(self):
  263. >>> return len(self.data)
  264. >>> train_dataset = mnistdataset(data = X_train, label = y_train ,transform = transform)
  265. >>> train_dataset = tl.dataflow.FromGenerator(train_dataset, output_types=[tl.float32, tl.int64], column_names=['data', 'label'])
  266. >>> train_dataloader = tl.dataflow.Dataloader(train_dataset, batch_size=128, shuffle=True, drop_last=False, shuffle_buffer_size=2000)
  267. """
  268. if shuffle:
  269. dataset = Shuffle(dataset, buffer_size=shuffle_buffer_size)
  270. dataset = Batch(dataset, batch_size=batch_size, drop_last=drop_last)
  271. dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
  272. return dataset

TensorLayer3.0 是一款兼容多种深度学习框架为计算后端的深度学习库。计划兼容TensorFlow, Pytorch, MindSpore, Paddle.