|
- #! /usr/bin/python
- # -*- coding: utf-8 -*-
-
- import tensorflow as tf
- import tensorlayer as tl
- import numpy as np
- __all__ = [
- 'Batch',
- 'Concat',
- 'FromGenerator',
- 'FromSlices',
- 'Map',
- 'Repeat',
- 'Shuffle',
- 'Zip',
- 'Dataloader',
- 'Dataset',
- 'IterableDataset',
- ]
-
-
- class Dataset(object):
- """An abstract class to encapsulate methods and behaviors of datasets.
- All datasets in map-style(dataset samples can be get by a given key) should be a subclass of 'tensorlayer.dataflow.Dataset'.
- ALl subclasses should implement following methods:
- :code:`__getitem__`: get sample from dataset with a given index.
- :code:`__len__`: return dataset sample number.
-
- Examples
- ----------
- With TensorLayer
-
- >>> from tensorlayer.dataflow import Dataset
- >>> class mnistdataset(Dataset):
- >>> def __init__(self, data, label,transform):
- >>> self.data = data
- >>> self.label = label
- >>> self.transform = transform
- >>> def __getitem__(self, index):
- >>> data = self.data[index].astype('float32')
- >>> data = self.transform(data)
- >>> label = self.label[index].astype('int64')
- >>> return data, label
- >>> def __len__(self):
- >>> return len(self.data)
- >>> train_dataset = mnistdataset(data = X_train, label = y_train ,transform = transform)
-
- """
-
- def __init__(self):
- pass
-
- def __call__(self):
-
- return self
-
- def __getitem__(self, idx):
- raise NotImplementedError("'{}' not implement in class "\
- "{}".format('__getitem__', self.__class__.__name__))
-
- def __len__(self):
- raise NotImplementedError("'{}' not implement in class "\
- "{}".format('__len__', self.__class__.__name__))
-
-
- class IterableDataset(object):
- """An abstract class to encapsulate methods and behaviors of iterable datasets.
- All datasets in iterable-style (can only get sample one by one sequentially, likea Python iterator) should be a subclass of `tensorlayer.dataflow.IterableDataset`.
- All subclasses should implement following methods:
- :code:`__iter__`: yield sample sequentially.
-
- Examples
- ----------
- With TensorLayer
-
- >>> class mnistdataset(IterableDataset):
- >>> def __init__(self, data, label,transform):
- >>> self.data = data
- >>> self.label = label
- >>> self.transform = transform
- >>> def __iter__(self):
- >>> for i in range(len(self.data)):
- >>> data = self.data[i].astype('float32')
- >>> data = self.transform(data)
- >>> label = self.label[i].astype('int64')
- >>> yield data, label
- >>> train_dataset = mnistdataset(data = X_train, label = y_train ,transform = transform)
-
- """
-
- def __init__(self):
- pass
-
- def __call__(self):
-
- return self
-
- def __iter__(self):
- raise NotImplementedError("'{}' not implement in class "\
- "{}".format('__iter__', self.__class__.__name__))
-
-
- def FromGenerator(generator, output_types, column_names=None):
- """Creates a `Dataset` whose elements are generated by `generator`.
-
- Parameters
- ----------
- generator: Callable or Iterable
- A generator callable object or an iterable Python object.
- output_types: list or tuple
- Set output data type. This parameter not support in MindSpore backend and Paddle backend.
- column_names: list or tuple
- column names of the dataset. This parameter not support in TensorFlow backend and Paddle backend.
-
- Returns
- -------
- Dataset
- A Dataset.
-
- Examples
- ----------
- With TensorLayer
-
- >>> train_dataset = mnistdataset(data = X_train, label = y_train ,transform = transform)
- >>> train_dataset = tl.dataflow.FromGenerator(train_dataset, output_types=[tl.float32, tl.int64], column_names=['data', 'label'])
-
- """
- output_types = tuple(output_types)
- return tf.data.Dataset.from_generator(generator, output_types=output_types)
-
-
- def Batch(dataset, batch_size, drop_last=False):
- """Combine batch_size number of consecutive rows into batches.This function not implement in Paddle backend.
-
- Parameters
- ----------
- dataset:
- A dataset.
- batch_size: int
- Sample number in a mini-batch.
- drop_last: boolean
- whether drop the last incomplete batch dataset size is not divisible by the batch size.
-
- Returns
- -------
- Dataset
- A batchDataset.
- """
-
- return dataset.batch(batch_size=batch_size, drop_remainder=drop_last)
-
-
- def Concat(datasets):
- """Concatenate the datasets in the input list of datasets.
-
- Parameters
- ----------
- datasets: dataset
- A list of datasets.
-
- Returns
- -------
- Dataset
- datasets concatenated.
-
- Examples
- ----------
- With TensorLayer
-
- >>> dataset = tl.dataflow.Concat([dataset1, dataset2])
-
- """
-
- dataset_num = len(datasets)
- dataset = datasets[0]
- for i in range(1, dataset_num):
- dataset.concatenate(datasets[i])
- return dataset
-
-
- def FromSlices(datas, column_names=None):
- """Creates a dataset with given data slices.
-
- Parameters
- ----------
- datas: list or tuple
- Each data should be in shape of [N, …], while N is the sample number.
- Input data will be sliced along the first dimension and generate additional rows
- column_names: list
- List of column names of the dataset. This parameter not support in TensorFlow backend and Paddle backend.
-
- Returns
- -------
- Dataset
- A dataset.
-
- Examples
- ----------
- With TensorLayer
-
- >>> dataset = tl.dataflow.FromSlices([data1, data2])
-
- """
-
- return tf.data.Dataset.from_tensor_slices(datas)
-
-
- def Map(dataset, map_func, input_columns=None):
- """ Maps map_func across the elements of this dataset. This function not implement in Paddle backend.
-
- Parameters
- ----------
- dataset : Dataset
- A dataset to map.
- map_func : function
- A function mapping a dataset element to another dataset element.
- input_columns: list
- List of column names of the dataset to map. This parameter not support in TensorFlow backend.
-
- Returns
- -------
- Dataset
- A mapped dataset.
-
- Examples
- ----------
- With TensorLayer
-
- >>> dataset = tl.dataflow.Map(dataset, map_func)
-
- """
- return dataset.map(map_func)
-
-
- def Repeat(dataset, count=None):
- """ Repeat this dataset count times. This function not implement in Paddle backend.
-
- Parameters
- ----------
- dataset : Dataset
- A dataset to repeat.
- count : int
- The number of times the dataset should be repeated. The default behavior (if count is None or -1) is for the dataset be repeated indefinitely.
-
- Returns
- -------
- Dataset
- A repeated dataset.
-
- Examples
- ----------
- With TensorLayer
-
- >>> dataset = tl.dataflow.Repeat(dataset, 2)
-
- """
- return dataset.repeat(count=count)
-
-
- def Shuffle(dataset, buffer_size):
- """ Randomly shuffles the elements of this dataset.This function not implement in Paddle backend.
-
- Parameters
- ----------
- dataset : Dataset
- A dataset to shuffle.
- buffer_size : int
- The number of elements from this dataset from which the new dataset will sample.
-
- Returns
- -------
- Dataset
- A shuffled dataset.
-
- Examples
- ----------
- With TensorLayer
-
- >>> dataset = tl.dataflow.Shuffle(dataset, 2000)
-
- """
- return dataset.shuffle(buffer_size, seed=None, reshuffle_each_iteration=True)
-
-
- def Zip(datasets):
- """ Creates a Dataset by zipping together the given datasets.This function not implement in Paddle backend.
-
- Parameters
- ----------
- datasets : list
- A list of datasets to zip.
-
- Returns
- -------
- Dataset
- A zip dataset.
-
- Examples
- ----------
- With TensorLayer
-
- >>> dataset = tl.dataflow.Zip([dataset1, dataset2])
-
- """
- return tf.data.Dataset.zip(datasets)
-
-
- def Dataloader(dataset, batch_size, shuffle=False, drop_last=False, shuffle_buffer_size=10000):
- """ Creates a Datasetloader to trian network. We recommend using this function.
-
- Parameters
- ----------
- dataset : Dataset
- the dataset to load data from.
- batch_size: int or None
- sample number in a mini-batch.
- shuffle: boolean
- whther to shuffle indices order before genrate batch indices.
- drop_last: boolean
- whether drop the last incomplete batch dataset size is not divisible by the batch size.
- shuffle_buffer_size: int
- The number of elements from this dataset from which the new dataset will sample. This parameter not support in Paddle backend.
-
- Returns
- -------
- DataLoader
- an iterable object for data iterating, each elemnet of the generated data is a Tensor.
-
- Examples
- ----------
- With TensorLayer
-
- >>> from tensorlayer.dataflow import Dataset
- >>> class mnistdataset(Dataset):
- >>> def __init__(self, data, label,transform):
- >>> self.data = data
- >>> self.label = label
- >>> self.transform = transform
- >>> def __getitem__(self, index):
- >>> data = self.data[index].astype('float32')
- >>> data = self.transform(data)
- >>> label = self.label[index].astype('int64')
- >>> return data, label
- >>> def __len__(self):
- >>> return len(self.data)
- >>> train_dataset = mnistdataset(data = X_train, label = y_train ,transform = transform)
- >>> train_dataset = tl.dataflow.FromGenerator(train_dataset, output_types=[tl.float32, tl.int64], column_names=['data', 'label'])
- >>> train_dataloader = tl.dataflow.Dataloader(train_dataset, batch_size=128, shuffle=True, drop_last=False, shuffle_buffer_size=2000)
-
- """
-
- if shuffle:
- dataset = Shuffle(dataset, buffer_size=shuffle_buffer_size)
-
- dataset = Batch(dataset, batch_size=batch_size, drop_last=drop_last)
- dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
-
- return dataset
|