|
|
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2.2.0\n",
- "sys.version_info(major=3, minor=6, micro=9, releaselevel='final', serial=0)\n",
- "matplotlib 3.3.4\n",
- "numpy 1.19.5\n",
- "pandas 1.1.5\n",
- "sklearn 0.24.2\n",
- "tensorflow 2.2.0\n",
- "tensorflow.keras 2.3.0-tf\n"
- ]
- }
- ],
- "source": [
- "import matplotlib as mpl\n",
- "import matplotlib.pyplot as plt\n",
- "%matplotlib inline\n",
- "import numpy as np\n",
- "import sklearn\n",
- "import pandas as pd\n",
- "import os\n",
- "import sys\n",
- "import time\n",
- "import tensorflow as tf\n",
- "\n",
- "from tensorflow import keras\n",
- "\n",
- "print(tf.__version__)\n",
- "print(sys.version_info)\n",
- "for module in mpl, np, pd, sklearn, tf, keras:\n",
- " print(module.__name__, module.__version__)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- ".. _california_housing_dataset:\n",
- "\n",
- "California Housing dataset\n",
- "--------------------------\n",
- "\n",
- "**Data Set Characteristics:**\n",
- "\n",
- " :Number of Instances: 20640\n",
- "\n",
- " :Number of Attributes: 8 numeric, predictive attributes and the target\n",
- "\n",
- " :Attribute Information:\n",
- " - MedInc median income in block\n",
- " - HouseAge median house age in block\n",
- " - AveRooms average number of rooms\n",
- " - AveBedrms average number of bedrooms\n",
- " - Population block population\n",
- " - AveOccup average house occupancy\n",
- " - Latitude house block latitude\n",
- " - Longitude house block longitude\n",
- "\n",
- " :Missing Attribute Values: None\n",
- "\n",
- "This dataset was obtained from the StatLib repository.\n",
- "http://lib.stat.cmu.edu/datasets/\n",
- "\n",
- "The target variable is the median house value for California districts.\n",
- "\n",
- "This dataset was derived from the 1990 U.S. census, using one row per census\n",
- "block group. A block group is the smallest geographical unit for which the U.S.\n",
- "Census Bureau publishes sample data (a block group typically has a population\n",
- "of 600 to 3,000 people).\n",
- "\n",
- "It can be downloaded/loaded using the\n",
- ":func:`sklearn.datasets.fetch_california_housing` function.\n",
- "\n",
- ".. topic:: References\n",
- "\n",
- " - Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions,\n",
- " Statistics and Probability Letters, 33 (1997) 291-297\n",
- "\n",
- "(20640, 8)\n",
- "(20640,)\n"
- ]
- }
- ],
- "source": [
- "from sklearn.datasets import fetch_california_housing\n",
- "\n",
- "housing = fetch_california_housing()\n",
- "print(housing.DESCR)\n",
- "print(housing.data.shape)\n",
- "print(housing.target.shape)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {
- "scrolled": true
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "array([[ 8.32520000e+00, 4.10000000e+01, 6.98412698e+00,\n",
- " 1.02380952e+00, 3.22000000e+02, 2.55555556e+00,\n",
- " 3.78800000e+01, -1.22230000e+02],\n",
- " [ 8.30140000e+00, 2.10000000e+01, 6.23813708e+00,\n",
- " 9.71880492e-01, 2.40100000e+03, 2.10984183e+00,\n",
- " 3.78600000e+01, -1.22220000e+02],\n",
- " [ 7.25740000e+00, 5.20000000e+01, 8.28813559e+00,\n",
- " 1.07344633e+00, 4.96000000e+02, 2.80225989e+00,\n",
- " 3.78500000e+01, -1.22240000e+02],\n",
- " [ 5.64310000e+00, 5.20000000e+01, 5.81735160e+00,\n",
- " 1.07305936e+00, 5.58000000e+02, 2.54794521e+00,\n",
- " 3.78500000e+01, -1.22250000e+02],\n",
- " [ 3.84620000e+00, 5.20000000e+01, 6.28185328e+00,\n",
- " 1.08108108e+00, 5.65000000e+02, 2.18146718e+00,\n",
- " 3.78500000e+01, -1.22250000e+02]])\n",
- "--------------------------------------------------\n",
- "array([4.526, 3.585, 3.521, 3.413, 3.422])\n"
- ]
- }
- ],
- "source": [
- "# print(housing.data[0:5])\n",
- "import pprint #打印的格式比较 好看\n",
- "\n",
- "pprint.pprint(housing.data[0:5])\n",
- "print('-'*50)\n",
- "pprint.pprint(housing.target[0:5])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(11610, 8) (11610,)\n",
- "(3870, 8) (3870,)\n",
- "(5160, 8) (5160,)\n"
- ]
- }
- ],
- "source": [
- "from sklearn.model_selection import train_test_split\n",
- "\n",
- "#拆分训练集和测试集,random_state是随机种子,同样的随机数种子,是为了得到同样的随机值\n",
- "x_train_all, x_test, y_train_all, y_test = train_test_split(\n",
- " housing.data, housing.target, random_state = 7)\n",
- "x_train, x_valid, y_train, y_valid = train_test_split(\n",
- " x_train_all, y_train_all, random_state = 11)\n",
- "# 训练集\n",
- "print(x_train.shape, y_train.shape)\n",
- "# 验证集\n",
- "print(x_valid.shape, y_valid.shape)\n",
- "# 测试集\n",
- "print(x_test.shape, y_test.shape)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "metadata": {},
- "outputs": [],
- "source": [
- "from sklearn.preprocessing import StandardScaler\n",
- "\n",
- "scaler = StandardScaler()\n",
- "x_train_scaled = scaler.fit_transform(x_train)\n",
- "x_valid_scaled = scaler.transform(x_valid)\n",
- "x_test_scaled = scaler.transform(x_test)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(8,)"
- ]
- },
- "execution_count": 27,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#看特征数目\n",
- "x_train_scaled.shape[1:]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 28,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([-0.29807281, 0.35226166, -0.10920508, -0.25055521, -0.03406402,\n",
- " -0.006034 , 1.08055484, -1.06113817])"
- ]
- },
- "execution_count": 28,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "x_train_scaled[1] #标准化后的值看一下"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 29,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Model: \"sequential_1\"\n",
- "_________________________________________________________________\n",
- "Layer (type) Output Shape Param # \n",
- "=================================================================\n",
- "dense_2 (Dense) (None, 30) 270 \n",
- "_________________________________________________________________\n",
- "dense_3 (Dense) (None, 1) 31 \n",
- "=================================================================\n",
- "Total params: 301\n",
- "Trainable params: 301\n",
- "Non-trainable params: 0\n",
- "_________________________________________________________________\n"
- ]
- }
- ],
- "source": [
- "#回归模型我们只需要1个数\n",
- "model = keras.models.Sequential([\n",
- " keras.layers.Dense(30, activation='relu',\n",
- " input_shape=x_train.shape[1:]),\n",
- " keras.layers.Dense(1),#结果层只有1个节点\n",
- "])\n",
- "model.summary()\n",
- "#mean_squared_error是均方差\n",
- "model.compile(loss=\"mean_squared_error\", optimizer= \"sgd\")\n",
- "callbacks = [keras.callbacks.EarlyStopping(\n",
- " patience=5, min_delta=1e-3)]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 30,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[<tensorflow.python.keras.layers.core.Dense at 0x7fd59002a898>,\n",
- " <tensorflow.python.keras.layers.core.Dense at 0x7fd59002ab00>]"
- ]
- },
- "execution_count": 30,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "model.layers"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 31,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Epoch 1/100\n",
- "363/363 [==============================] - 0s 875us/step - loss: 0.8912 - val_loss: 0.6639\n",
- "Epoch 2/100\n",
- "363/363 [==============================] - 0s 811us/step - loss: 0.7128 - val_loss: 0.4861\n",
- "Epoch 3/100\n",
- "363/363 [==============================] - 0s 820us/step - loss: 0.4428 - val_loss: 0.4569\n",
- "Epoch 4/100\n",
- "363/363 [==============================] - 0s 812us/step - loss: 0.4231 - val_loss: 0.4379\n",
- "Epoch 5/100\n",
- "363/363 [==============================] - 0s 814us/step - loss: 0.4111 - val_loss: 0.4254\n",
- "Epoch 6/100\n",
- "363/363 [==============================] - 0s 812us/step - loss: 0.4003 - val_loss: 0.4203\n",
- "Epoch 7/100\n",
- "363/363 [==============================] - 0s 824us/step - loss: 0.3940 - val_loss: 0.4087\n",
- "Epoch 8/100\n",
- "363/363 [==============================] - 0s 813us/step - loss: 0.3890 - val_loss: 0.4104\n",
- "Epoch 9/100\n",
- "363/363 [==============================] - 0s 819us/step - loss: 0.3826 - val_loss: 0.4032\n",
- "Epoch 10/100\n",
- "363/363 [==============================] - 0s 819us/step - loss: 0.3787 - val_loss: 0.3991\n",
- "Epoch 11/100\n",
- "363/363 [==============================] - 0s 817us/step - loss: 0.3742 - val_loss: 0.3897\n",
- "Epoch 12/100\n",
- "363/363 [==============================] - 0s 804us/step - loss: 0.3733 - val_loss: 0.3891\n",
- "Epoch 13/100\n",
- "363/363 [==============================] - 0s 806us/step - loss: 0.3686 - val_loss: 0.3868\n",
- "Epoch 14/100\n",
- "363/363 [==============================] - 0s 821us/step - loss: 0.3655 - val_loss: 0.3836\n",
- "Epoch 15/100\n",
- "363/363 [==============================] - 0s 814us/step - loss: 0.3632 - val_loss: 0.3758\n",
- "Epoch 16/100\n",
- "363/363 [==============================] - 0s 814us/step - loss: 0.3616 - val_loss: 0.3914\n",
- "Epoch 17/100\n",
- "363/363 [==============================] - 0s 826us/step - loss: 0.3585 - val_loss: 0.3732\n",
- "Epoch 18/100\n",
- "363/363 [==============================] - 0s 811us/step - loss: 0.3682 - val_loss: 0.3766\n",
- "Epoch 19/100\n",
- "363/363 [==============================] - 0s 808us/step - loss: 0.3540 - val_loss: 0.3707\n",
- "Epoch 20/100\n",
- "363/363 [==============================] - 0s 829us/step - loss: 0.3562 - val_loss: 0.3750\n",
- "Epoch 21/100\n",
- "363/363 [==============================] - 0s 830us/step - loss: 0.3507 - val_loss: 0.3659\n",
- "Epoch 22/100\n",
- "363/363 [==============================] - 0s 813us/step - loss: 0.3581 - val_loss: 0.3752\n",
- "Epoch 23/100\n",
- "363/363 [==============================] - 0s 819us/step - loss: 0.3482 - val_loss: 0.3663\n",
- "Epoch 24/100\n",
- "363/363 [==============================] - 0s 816us/step - loss: 0.3469 - val_loss: 0.3658\n",
- "Epoch 25/100\n",
- "363/363 [==============================] - 0s 807us/step - loss: 0.3441 - val_loss: 0.3635\n",
- "Epoch 26/100\n",
- "363/363 [==============================] - 0s 814us/step - loss: 0.3434 - val_loss: 0.3640\n",
- "Epoch 27/100\n",
- "363/363 [==============================] - 0s 805us/step - loss: 0.3428 - val_loss: 0.3643\n",
- "Epoch 28/100\n",
- "363/363 [==============================] - 0s 804us/step - loss: 0.3422 - val_loss: 0.3575\n",
- "Epoch 29/100\n",
- "363/363 [==============================] - 0s 826us/step - loss: 0.3395 - val_loss: 0.3584\n",
- "Epoch 30/100\n",
- "363/363 [==============================] - 0s 822us/step - loss: 0.3397 - val_loss: 0.3564\n",
- "Epoch 31/100\n",
- "363/363 [==============================] - 0s 813us/step - loss: 0.3380 - val_loss: 0.3563\n",
- "Epoch 32/100\n",
- "363/363 [==============================] - 0s 810us/step - loss: 0.3362 - val_loss: 0.3541\n",
- "Epoch 33/100\n",
- "363/363 [==============================] - 0s 826us/step - loss: 0.3366 - val_loss: 0.3518\n",
- "Epoch 34/100\n",
- "363/363 [==============================] - 0s 824us/step - loss: 0.3525 - val_loss: 0.3794\n",
- "Epoch 35/100\n",
- "363/363 [==============================] - 0s 813us/step - loss: 0.3368 - val_loss: 0.3485\n",
- "Epoch 36/100\n",
- "363/363 [==============================] - 0s 826us/step - loss: 0.3393 - val_loss: 0.3552\n",
- "Epoch 37/100\n",
- "363/363 [==============================] - 0s 824us/step - loss: 0.3458 - val_loss: 0.3571\n",
- "Epoch 38/100\n",
- "363/363 [==============================] - 0s 818us/step - loss: 0.3453 - val_loss: 0.3681\n",
- "Epoch 39/100\n",
- "363/363 [==============================] - 0s 815us/step - loss: 0.3352 - val_loss: 0.3475\n",
- "Epoch 40/100\n",
- "363/363 [==============================] - 0s 821us/step - loss: 0.3623 - val_loss: 0.4131\n"
- ]
- }
- ],
- "source": [
- "history = model.fit(x_train_scaled, y_train,\n",
- " validation_data = (x_valid_scaled, y_valid),\n",
- " epochs = 100,\n",
- " callbacks = callbacks)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 32,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>0</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>-0.177860</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>-0.029153</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>-0.019061</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>-0.012509</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>5</th>\n",
- " <td>-0.005047</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>6</th>\n",
- " <td>-0.011595</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>7</th>\n",
- " <td>0.001715</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8</th>\n",
- " <td>-0.007277</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>9</th>\n",
- " <td>-0.004029</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10</th>\n",
- " <td>-0.009408</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>11</th>\n",
- " <td>-0.000596</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>12</th>\n",
- " <td>-0.002325</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>13</th>\n",
- " <td>-0.003157</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>14</th>\n",
- " <td>-0.007837</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>15</th>\n",
- " <td>0.015586</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>16</th>\n",
- " <td>-0.018196</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>17</th>\n",
- " <td>0.003385</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>18</th>\n",
- " <td>-0.005923</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>19</th>\n",
- " <td>0.004336</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>20</th>\n",
- " <td>-0.009119</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>21</th>\n",
- " <td>0.009357</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>22</th>\n",
- " <td>-0.008908</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>23</th>\n",
- " <td>-0.000567</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>24</th>\n",
- " <td>-0.002293</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>25</th>\n",
- " <td>0.000529</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>26</th>\n",
- " <td>0.000278</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>27</th>\n",
- " <td>-0.006812</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>28</th>\n",
- " <td>0.000943</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>29</th>\n",
- " <td>-0.002013</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>30</th>\n",
- " <td>-0.000127</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>31</th>\n",
- " <td>-0.002156</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>32</th>\n",
- " <td>-0.002337</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>33</th>\n",
- " <td>0.027607</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>34</th>\n",
- " <td>-0.030894</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>35</th>\n",
- " <td>0.006756</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>36</th>\n",
- " <td>0.001852</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>37</th>\n",
- " <td>0.011062</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>38</th>\n",
- " <td>-0.020612</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>39</th>\n",
- " <td>0.065578</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " 0\n",
- "0 NaN\n",
- "1 -0.177860\n",
- "2 -0.029153\n",
- "3 -0.019061\n",
- "4 -0.012509\n",
- "5 -0.005047\n",
- "6 -0.011595\n",
- "7 0.001715\n",
- "8 -0.007277\n",
- "9 -0.004029\n",
- "10 -0.009408\n",
- "11 -0.000596\n",
- "12 -0.002325\n",
- "13 -0.003157\n",
- "14 -0.007837\n",
- "15 0.015586\n",
- "16 -0.018196\n",
- "17 0.003385\n",
- "18 -0.005923\n",
- "19 0.004336\n",
- "20 -0.009119\n",
- "21 0.009357\n",
- "22 -0.008908\n",
- "23 -0.000567\n",
- "24 -0.002293\n",
- "25 0.000529\n",
- "26 0.000278\n",
- "27 -0.006812\n",
- "28 0.000943\n",
- "29 -0.002013\n",
- "30 -0.000127\n",
- "31 -0.002156\n",
- "32 -0.002337\n",
- "33 0.027607\n",
- "34 -0.030894\n",
- "35 0.006756\n",
- "36 0.001852\n",
- "37 0.011062\n",
- "38 -0.020612\n",
- "39 0.065578"
- ]
- },
- "execution_count": 32,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "pd.DataFrame(history.history['val_loss']).diff(1)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 33,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "\n",
- "text/plain": [
- "<Figure size 576x360 with 1 Axes>"
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "def plot_learning_curves(history):\n",
- " pd.DataFrame(history.history).plot(figsize=(8, 5))\n",
- " plt.grid(True)\n",
- " plt.gca().set_ylim(0, 1.4)\n",
- " plt.show()\n",
- "plot_learning_curves(history)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 34,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.37843212485313416"
- ]
- },
- "execution_count": 34,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "model.evaluate(x_test_scaled, y_test, verbose=0)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 35,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "-1.0"
- ]
- },
- "execution_count": 35,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "np.tanh(-1000)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.9"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
|