{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2.2.0\n", "sys.version_info(major=3, minor=6, micro=9, releaselevel='final', serial=0)\n", "matplotlib 3.2.1\n", "numpy 1.18.5\n", "pandas 1.0.4\n", "sklearn 0.23.1\n", "tensorflow 2.2.0\n", "tensorflow.keras 2.3.0-tf\n" ] } ], "source": [ "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "import numpy as np\n", "import sklearn\n", "import pandas as pd\n", "import os\n", "import sys\n", "import time\n", "import tensorflow as tf\n", "\n", "from tensorflow import keras\n", "\n", "print(tf.__version__)\n", "print(sys.version_info)\n", "for module in mpl, np, pd, sklearn, tf, keras:\n", " print(module.__name__, module.__version__)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ".. _california_housing_dataset:\n", "\n", "California Housing dataset\n", "--------------------------\n", "\n", "**Data Set Characteristics:**\n", "\n", " :Number of Instances: 20640\n", "\n", " :Number of Attributes: 8 numeric, predictive attributes and the target\n", "\n", " :Attribute Information:\n", " - MedInc median income in block\n", " - HouseAge median house age in block\n", " - AveRooms average number of rooms\n", " - AveBedrms average number of bedrooms\n", " - Population block population\n", " - AveOccup average house occupancy\n", " - Latitude house block latitude\n", " - Longitude house block longitude\n", "\n", " :Missing Attribute Values: None\n", "\n", "This dataset was obtained from the StatLib repository.\n", "http://lib.stat.cmu.edu/datasets/\n", "\n", "The target variable is the median house value for California districts.\n", "\n", "This dataset was derived from the 1990 U.S. census, using one row per census\n", "block group. A block group is the smallest geographical unit for which the U.S.\n", "Census Bureau publishes sample data (a block group typically has a population\n", "of 600 to 3,000 people).\n", "\n", "It can be downloaded/loaded using the\n", ":func:`sklearn.datasets.fetch_california_housing` function.\n", "\n", ".. topic:: References\n", "\n", " - Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions,\n", " Statistics and Probability Letters, 33 (1997) 291-297\n", "\n", "(20640, 8)\n", "(20640,)\n" ] } ], "source": [ "from sklearn.datasets import fetch_california_housing\n", "\n", "housing = fetch_california_housing()\n", "print(housing.DESCR)\n", "print(housing.data.shape)\n", "print(housing.target.shape)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(11610, 8) (11610,)\n", "(3870, 8) (3870,)\n", "(5160, 8) (5160,)\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "x_train_all, x_test, y_train_all, y_test = train_test_split(\n", " housing.data, housing.target, random_state = 7)\n", "x_train, x_valid, y_train, y_valid = train_test_split(\n", " x_train_all, y_train_all, random_state = 11)\n", "print(x_train.shape, y_train.shape)\n", "print(x_valid.shape, y_valid.shape)\n", "print(x_test.shape, y_test.shape)\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler\n", "\n", "scaler = StandardScaler()\n", "x_train_scaled = scaler.fit_transform(x_train)\n", "x_valid_scaled = scaler.transform(x_valid)\n", "x_test_scaled = scaler.transform(x_test)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"wide_deep_model_2\"\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "dense_6 (Dense) multiple 270 \n", "_________________________________________________________________\n", "dense_7 (Dense) multiple 930 \n", "_________________________________________________________________\n", "dense_8 (Dense) multiple 39 \n", "=================================================================\n", "Total params: 1,239\n", "Trainable params: 1,239\n", "Non-trainable params: 0\n", "_________________________________________________________________\n", "None\n" ] } ], "source": [ "# 子类API\n", "#Model类的介绍看下面\n", "# https://tensorflow.google.cn/api_docs/python/tf/keras/Model\n", "#其实就是将原有面向过程的代码,改为面向对象\n", "class WideDeepModel(keras.models.Model):\n", " def __init__(self):\n", " super(WideDeepModel, self).__init__()\n", " \"\"\"定义模型的层次\"\"\"\n", " #初始化了3个全连接层的层对象\n", " self.hidden1_layer = keras.layers.Dense(30, activation='relu')\n", " self.hidden2_layer = keras.layers.Dense(30, activation='relu')\n", " self.output_layer = keras.layers.Dense(1)\n", " \n", " def call(self, input):\n", " \"\"\"完成模型的正向计算(搭建),call是被build方法调用的\"\"\"\n", " #这里的input是输入的特征形状\n", " hidden1 = self.hidden1_layer(input)\n", " hidden2 = self.hidden2_layer(hidden1)\n", " concat = keras.layers.concatenate([input, hidden2])\n", " output = self.output_layer(concat)\n", " return output\n", "\n", "model = WideDeepModel()\n", "#下面这种也可以,和model = WideDeepModel()是没有打印细节\n", "# model = keras.models.Sequential([\n", "# WideDeepModel(),\n", "# ])\n", "#build等价于调用call,29行和30行是等价的\n", "# model(input_shape=(None, 8))\n", "model.build(input_shape=(None, 8))\n", " \n", "print(model.summary())\n", "model.compile(loss=\"mean_squared_error\",\n", " optimizer = keras.optimizers.SGD(0.001))\n", "callbacks = [keras.callbacks.EarlyStopping(\n", " patience=5, min_delta=1e-2)]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/100\n", "363/363 [==============================] - 1s 3ms/step - loss: 2.8035 - val_loss: 1.0274\n", "Epoch 2/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.7387 - val_loss: 0.6841\n", "Epoch 3/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.6182 - val_loss: 0.6414\n", "Epoch 4/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.5884 - val_loss: 0.6184\n", "Epoch 5/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.5689 - val_loss: 0.6016\n", "Epoch 6/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.5555 - val_loss: 0.5885\n", "Epoch 7/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.5444 - val_loss: 0.5785\n", "Epoch 8/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.5350 - val_loss: 0.5679\n", "Epoch 9/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.5276 - val_loss: 0.5606\n", "Epoch 10/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.5214 - val_loss: 0.5531\n", "Epoch 11/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.5158 - val_loss: 0.5499\n", "Epoch 12/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.5110 - val_loss: 0.5429\n", "Epoch 13/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.5059 - val_loss: 0.5385\n", "Epoch 14/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.5008 - val_loss: 0.5325\n", "Epoch 15/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4973 - val_loss: 0.5282\n", "Epoch 16/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4933 - val_loss: 0.5243\n", "Epoch 17/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4890 - val_loss: 0.5195\n", "Epoch 18/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4858 - val_loss: 0.5162\n", "Epoch 19/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4830 - val_loss: 0.5129\n", "Epoch 20/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4794 - val_loss: 0.5113\n", "Epoch 21/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4764 - val_loss: 0.5058\n", "Epoch 22/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4734 - val_loss: 0.5035\n", "Epoch 23/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4707 - val_loss: 0.4990\n", "Epoch 24/100\n", "363/363 [==============================] - 1s 1ms/step - loss: 0.4676 - val_loss: 0.4974\n", "Epoch 25/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4657 - val_loss: 0.4938\n", "Epoch 26/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4632 - val_loss: 0.4917\n", "Epoch 27/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4600 - val_loss: 0.4888\n", "Epoch 28/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4579 - val_loss: 0.4864\n", "Epoch 29/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4554 - val_loss: 0.4835\n", "Epoch 30/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4529 - val_loss: 0.4804\n", "Epoch 31/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4505 - val_loss: 0.4786\n", "Epoch 32/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4483 - val_loss: 0.4758\n", "Epoch 33/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4461 - val_loss: 0.4749\n", "Epoch 34/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4431 - val_loss: 0.4708\n", "Epoch 35/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4414 - val_loss: 0.4681\n", "Epoch 36/100\n", "363/363 [==============================] - 1s 1ms/step - loss: 0.4394 - val_loss: 0.4661\n", "Epoch 37/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4383 - val_loss: 0.4651\n", "Epoch 38/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4354 - val_loss: 0.4635\n", "Epoch 39/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4335 - val_loss: 0.4605\n", "Epoch 40/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4319 - val_loss: 0.4587\n", "Epoch 41/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4293 - val_loss: 0.4571\n", "Epoch 42/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4274 - val_loss: 0.4542\n", "Epoch 43/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4254 - val_loss: 0.4520\n", "Epoch 44/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4242 - val_loss: 0.4504\n", "Epoch 45/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4219 - val_loss: 0.4490\n", "Epoch 46/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4198 - val_loss: 0.4473\n", "Epoch 47/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4196 - val_loss: 0.4449\n", "Epoch 48/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4173 - val_loss: 0.4432\n", "Epoch 49/100\n", "363/363 [==============================] - 1s 2ms/step - loss: 0.4154 - val_loss: 0.4417\n" ] } ], "source": [ "history = model.fit(x_train_scaled, y_train,\n", " validation_data = (x_valid_scaled, y_valid),\n", " epochs = 100,\n", " callbacks = callbacks)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "def plot_learning_curves(history):\n", " pd.DataFrame(history.history).plot(figsize=(8, 5))\n", " plt.grid(True)\n", " plt.gca().set_ylim(0, 2)\n", " plt.show()\n", "plot_learning_curves(history)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.43126633763313293" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.evaluate(x_test_scaled, y_test, verbose=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 2 }