|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663 |
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2.2.0\n",
- "sys.version_info(major=3, minor=6, micro=9, releaselevel='final', serial=0)\n",
- "matplotlib 3.3.4\n",
- "numpy 1.19.5\n",
- "pandas 1.1.5\n",
- "sklearn 0.24.2\n",
- "tensorflow 2.2.0\n",
- "tensorflow.keras 2.3.0-tf\n"
- ]
- }
- ],
- "source": [
- "import matplotlib as mpl\n",
- "import matplotlib.pyplot as plt\n",
- "%matplotlib inline\n",
- "import numpy as np\n",
- "import sklearn\n",
- "import pandas as pd\n",
- "import os\n",
- "import sys\n",
- "import time\n",
- "import tensorflow as tf\n",
- "\n",
- "from tensorflow import keras\n",
- "\n",
- "print(tf.__version__)\n",
- "print(sys.version_info)\n",
- "for module in mpl, np, pd, sklearn, tf, keras:\n",
- " print(module.__name__, module.__version__)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# from_tensor_slices ,repeat,batch,interleave"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "<TensorSliceDataset shapes: (), types: tf.int64>\n"
- ]
- }
- ],
- "source": [
- "#内存中构建dataset,初始化dataset,返回的像一个迭代器\n",
- "dataset = tf.data.Dataset.from_tensor_slices(np.arange(1,6).repeat(6))\n",
- "\n",
- "print(dataset)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n"
- ]
- }
- ],
- "source": [
- "#可以做遍历,每一个都是tensor\n",
- "for item in dataset:\n",
- " print(item)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n"
- ]
- }
- ],
- "source": [
- "dataset1 = dataset.repeat(3) #为了epoch服务的\n",
- "for item in dataset1:\n",
- " print(item)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "<BatchDataset shapes: (None,), types: tf.int64>\n",
- "tf.Tensor([1 1 1 1 1 1 2], shape=(7,), dtype=int64)\n",
- "tf.Tensor([2 2 2 2 2 3 3], shape=(7,), dtype=int64)\n",
- "tf.Tensor([3 3 3 3 4 4 4], shape=(7,), dtype=int64)\n",
- "tf.Tensor([4 4 4 5 5 5 5], shape=(7,), dtype=int64)\n",
- "tf.Tensor([5 5 1 1 1 1 1], shape=(7,), dtype=int64)\n",
- "tf.Tensor([1 2 2 2 2 2 2], shape=(7,), dtype=int64)\n",
- "tf.Tensor([3 3 3 3 3 3 4], shape=(7,), dtype=int64)\n",
- "tf.Tensor([4 4 4 4 4 5 5], shape=(7,), dtype=int64)\n",
- "tf.Tensor([5 5 5 5 1 1 1], shape=(7,), dtype=int64)\n",
- "tf.Tensor([1 1 1 2 2 2 2], shape=(7,), dtype=int64)\n",
- "tf.Tensor([2 2 3 3 3 3 3], shape=(7,), dtype=int64)\n",
- "tf.Tensor([3 4 4 4 4 4 4], shape=(7,), dtype=int64)\n",
- "tf.Tensor([5 5 5 5 5 5], shape=(6,), dtype=int64)\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "13"
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# 1. repeat epoch 遍历一次数据集,就称为1次epoch\n",
- "# 2. get batch 取一部分数据集\n",
- "\n",
- "dataset = dataset.repeat(3).batch(7)\n",
- "print(dataset)\n",
- "i=0\n",
- "for item in dataset:\n",
- " i=i+1\n",
- " print(item)\n",
- "i"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "tf.Tensor([1 1 1 1 1 1 2], shape=(7,), dtype=int64)\n",
- "tf.Tensor([2 2 2 2 2 3 3], shape=(7,), dtype=int64)\n",
- "tf.Tensor([3 3 3 3 4 4 4], shape=(7,), dtype=int64)\n",
- "tf.Tensor([4 4 4 5 5 5 5], shape=(7,), dtype=int64)\n",
- "tf.Tensor([5 5 1 1 1 1 1], shape=(7,), dtype=int64)\n",
- "tf.Tensor([1 2 2 2 2 2 2], shape=(7,), dtype=int64)\n",
- "tf.Tensor([3 3 3 3 3 3 4], shape=(7,), dtype=int64)\n",
- "tf.Tensor([4 4 4 4 4 5 5], shape=(7,), dtype=int64)\n",
- "tf.Tensor([5 5 5 5 1 1 1], shape=(7,), dtype=int64)\n",
- "tf.Tensor([1 1 1 2 2 2 2], shape=(7,), dtype=int64)\n",
- "tf.Tensor([2 2 3 3 3 3 3], shape=(7,), dtype=int64)\n",
- "tf.Tensor([3 4 4 4 4 4 4], shape=(7,), dtype=int64)\n",
- "tf.Tensor([5 5 5 5 5 5], shape=(6,), dtype=int64)\n"
- ]
- }
- ],
- "source": [
- "for i in dataset:\n",
- " print(i)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {
- "scrolled": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "<InterleaveDataset shapes: (), types: tf.int64>\n",
- "--------------------------------------------------\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "90"
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# interleave: 作用 https://zhuanlan.zhihu.com/p/97876668\n",
- "# case: 文件dataset -> 具体数据集\n",
- "\n",
- "dataset2 = dataset.interleave(\n",
- " lambda v: tf.data.Dataset.from_tensor_slices(v), # map_fn,第一参数是回调函数\n",
- " cycle_length = 5, # cycle_length,每一个cycle提取的个数\n",
- " block_length = 6, # block_length\n",
- ")\n",
- "print(dataset2)\n",
- "print('-'*50)\n",
- "i=0\n",
- "for item in dataset2:\n",
- " i=i+1\n",
- " print(item)\n",
- "i"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [],
- "source": [
- "# 下面的例子理解interleave更加简单"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "--------------------------------------------------\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n"
- ]
- }
- ],
- "source": [
- "a = tf.data.Dataset.range(1,6)\n",
- "for i in a:\n",
- " print(i)\n",
- "# b=a.repeat(6)\n",
- "# for i in b:\n",
- "# print(i)\n",
- "print('-'*50)\n",
- "#cycle_length是使用了几个block以后,就要重复的 参数\n",
- "a1=a.interleave(lambda x: tf.data.Dataset.from_tensors(x).repeat(6),\n",
- " cycle_length=3, block_length=4)\n",
- "for i in a1:\n",
- " print(i)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 针对各种格式数据变为dataset"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "<TensorSliceDataset shapes: ((2,), ()), types: (tf.int64, tf.string)>\n",
- "tf.Tensor([1 2], shape=(2,), dtype=int64) tf.Tensor(b'cat', shape=(), dtype=string)\n",
- "tf.Tensor([3 4], shape=(2,), dtype=int64) tf.Tensor(b'dog', shape=(), dtype=string)\n",
- "tf.Tensor([5 6], shape=(2,), dtype=int64) tf.Tensor(b'fox', shape=(), dtype=string)\n"
- ]
- }
- ],
- "source": [
- "x = np.array([[1, 2], [3, 4], [5, 6]])\n",
- "y = np.array(['cat', 'dog', 'fox'])\n",
- "#输入的参数是元祖的情况下\n",
- "dataset3 = tf.data.Dataset.from_tensor_slices((x, y))\n",
- "print(dataset3)\n",
- "\n",
- "for item_x, item_y in dataset3:\n",
- " print(item_x, item_y)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "<TensorSliceDataset shapes: {feature1: (2,), label: ()}, types: {feature1: tf.int64, label: tf.string}>\n",
- "tf.Tensor([1 2], shape=(2,), dtype=int64) tf.Tensor(b'cat', shape=(), dtype=string)\n",
- "tf.Tensor([3 4], shape=(2,), dtype=int64) tf.Tensor(b'dog', shape=(), dtype=string)\n",
- "tf.Tensor([5 6], shape=(2,), dtype=int64) tf.Tensor(b'fox', shape=(), dtype=string)\n"
- ]
- }
- ],
- "source": [
- "#输入的参数是字典的情况下\n",
- "dataset4 = tf.data.Dataset.from_tensor_slices({\"feature1\": x,\n",
- " \"label\": y})\n",
- "print(dataset4)\n",
- "for item in dataset4:\n",
- " print(item[\"feature1\"], item[\"label\"])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "<RangeDataset shapes: (), types: tf.int64>\n",
- "<InterleaveDataset shapes: (), types: tf.int64>\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(1, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(2, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(3, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(4, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n",
- "tf.Tensor(5, shape=(), dtype=int64)\n"
- ]
- }
- ],
- "source": [
- "a = tf.data.Dataset.range(1, 6) # ==> [ 1, 2, 3, 4, 5 ]\n",
- "print(a)\n",
- "# NOTE: New lines indicate \"block\" boundaries.\n",
- "dataset2=a.interleave(lambda x: tf.data.Dataset.from_tensors(x).repeat(6),\n",
- " cycle_length=2, block_length=4) # ==> [1, 1, 1, 1,\n",
- " # 2, 2, 2, 2,\n",
- " # 1, 1,\n",
- " # 2, 2,\n",
- " # 3, 3, 3, 3,\n",
- " # 4, 4, 4, 4,\n",
- " # 3, 3,\n",
- " # 4, 4,\n",
- " # 5, 5, 5, 5,\n",
- " # 5, 5]\n",
- "print(dataset2)\n",
- "for item in dataset2:\n",
- " print(item)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.9"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
|