|
|
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2.2.0\n",
- "sys.version_info(major=3, minor=6, micro=9, releaselevel='final', serial=0)\n",
- "matplotlib 3.2.1\n",
- "numpy 1.18.5\n",
- "pandas 1.0.4\n",
- "sklearn 0.23.1\n",
- "tensorflow 2.2.0\n",
- "tensorflow.keras 2.3.0-tf\n"
- ]
- }
- ],
- "source": [
- "import matplotlib as mpl\n",
- "import matplotlib.pyplot as plt\n",
- "%matplotlib inline\n",
- "import numpy as np\n",
- "import sklearn\n",
- "import pandas as pd\n",
- "import os\n",
- "import sys\n",
- "import time\n",
- "import tensorflow as tf\n",
- "\n",
- "from tensorflow import keras\n",
- "\n",
- "print(tf.__version__)\n",
- "print(sys.version_info)\n",
- "for module in mpl, np, pd, sklearn, tf, keras:\n",
- " print(module.__name__, module.__version__)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- ".. _california_housing_dataset:\n",
- "\n",
- "California Housing dataset\n",
- "--------------------------\n",
- "\n",
- "**Data Set Characteristics:**\n",
- "\n",
- " :Number of Instances: 20640\n",
- "\n",
- " :Number of Attributes: 8 numeric, predictive attributes and the target\n",
- "\n",
- " :Attribute Information:\n",
- " - MedInc median income in block\n",
- " - HouseAge median house age in block\n",
- " - AveRooms average number of rooms\n",
- " - AveBedrms average number of bedrooms\n",
- " - Population block population\n",
- " - AveOccup average house occupancy\n",
- " - Latitude house block latitude\n",
- " - Longitude house block longitude\n",
- "\n",
- " :Missing Attribute Values: None\n",
- "\n",
- "This dataset was obtained from the StatLib repository.\n",
- "http://lib.stat.cmu.edu/datasets/\n",
- "\n",
- "The target variable is the median house value for California districts.\n",
- "\n",
- "This dataset was derived from the 1990 U.S. census, using one row per census\n",
- "block group. A block group is the smallest geographical unit for which the U.S.\n",
- "Census Bureau publishes sample data (a block group typically has a population\n",
- "of 600 to 3,000 people).\n",
- "\n",
- "It can be downloaded/loaded using the\n",
- ":func:`sklearn.datasets.fetch_california_housing` function.\n",
- "\n",
- ".. topic:: References\n",
- "\n",
- " - Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions,\n",
- " Statistics and Probability Letters, 33 (1997) 291-297\n",
- "\n",
- "(20640, 8)\n",
- "(20640,)\n"
- ]
- }
- ],
- "source": [
- "from sklearn.datasets import fetch_california_housing\n",
- "\n",
- "housing = fetch_california_housing()\n",
- "print(housing.DESCR)\n",
- "print(housing.data.shape)\n",
- "print(housing.target.shape)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(11610, 8) (11610,)\n",
- "(3870, 8) (3870,)\n",
- "(5160, 8) (5160,)\n"
- ]
- }
- ],
- "source": [
- "from sklearn.model_selection import train_test_split\n",
- "\n",
- "x_train_all, x_test, y_train_all, y_test = train_test_split(\n",
- " housing.data, housing.target, random_state = 7)\n",
- "x_train, x_valid, y_train, y_valid = train_test_split(\n",
- " x_train_all, y_train_all, random_state = 11)\n",
- "print(x_train.shape, y_train.shape)\n",
- "print(x_valid.shape, y_valid.shape)\n",
- "print(x_test.shape, y_test.shape)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "from sklearn.preprocessing import StandardScaler\n",
- "\n",
- "scaler = StandardScaler()\n",
- "x_train_scaled = scaler.fit_transform(x_train)\n",
- "x_valid_scaled = scaler.transform(x_valid)\n",
- "x_test_scaled = scaler.transform(x_test)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Model: \"wide_deep_model_2\"\n",
- "_________________________________________________________________\n",
- "Layer (type) Output Shape Param # \n",
- "=================================================================\n",
- "dense_6 (Dense) multiple 270 \n",
- "_________________________________________________________________\n",
- "dense_7 (Dense) multiple 930 \n",
- "_________________________________________________________________\n",
- "dense_8 (Dense) multiple 39 \n",
- "=================================================================\n",
- "Total params: 1,239\n",
- "Trainable params: 1,239\n",
- "Non-trainable params: 0\n",
- "_________________________________________________________________\n",
- "None\n"
- ]
- }
- ],
- "source": [
- "# 子类API\n",
- "#Model类的介绍看下面\n",
- "# https://tensorflow.google.cn/api_docs/python/tf/keras/Model\n",
- "#其实就是将原有面向过程的代码,改为面向对象\n",
- "class WideDeepModel(keras.models.Model):\n",
- " def __init__(self):\n",
- " super(WideDeepModel, self).__init__()\n",
- " \"\"\"定义模型的层次\"\"\"\n",
- " #初始化了3个全连接层的层对象\n",
- " self.hidden1_layer = keras.layers.Dense(30, activation='relu')\n",
- " self.hidden2_layer = keras.layers.Dense(30, activation='relu')\n",
- " self.output_layer = keras.layers.Dense(1)\n",
- " \n",
- " def call(self, input):\n",
- " \"\"\"完成模型的正向计算(搭建),call是被build方法调用的\"\"\"\n",
- " #这里的input是输入的特征形状\n",
- " hidden1 = self.hidden1_layer(input)\n",
- " hidden2 = self.hidden2_layer(hidden1)\n",
- " concat = keras.layers.concatenate([input, hidden2])\n",
- " output = self.output_layer(concat)\n",
- " return output\n",
- "\n",
- "model = WideDeepModel()\n",
- "#下面这种也可以,和model = WideDeepModel()是没有打印细节\n",
- "# model = keras.models.Sequential([\n",
- "# WideDeepModel(),\n",
- "# ])\n",
- "#build等价于调用call,29行和30行是等价的\n",
- "# model(input_shape=(None, 8))\n",
- "model.build(input_shape=(None, 8))\n",
- " \n",
- "print(model.summary())\n",
- "model.compile(loss=\"mean_squared_error\",\n",
- " optimizer = keras.optimizers.SGD(0.001))\n",
- "callbacks = [keras.callbacks.EarlyStopping(\n",
- " patience=5, min_delta=1e-2)]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Epoch 1/100\n",
- "363/363 [==============================] - 1s 3ms/step - loss: 2.8035 - val_loss: 1.0274\n",
- "Epoch 2/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.7387 - val_loss: 0.6841\n",
- "Epoch 3/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.6182 - val_loss: 0.6414\n",
- "Epoch 4/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.5884 - val_loss: 0.6184\n",
- "Epoch 5/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.5689 - val_loss: 0.6016\n",
- "Epoch 6/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.5555 - val_loss: 0.5885\n",
- "Epoch 7/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.5444 - val_loss: 0.5785\n",
- "Epoch 8/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.5350 - val_loss: 0.5679\n",
- "Epoch 9/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.5276 - val_loss: 0.5606\n",
- "Epoch 10/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.5214 - val_loss: 0.5531\n",
- "Epoch 11/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.5158 - val_loss: 0.5499\n",
- "Epoch 12/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.5110 - val_loss: 0.5429\n",
- "Epoch 13/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.5059 - val_loss: 0.5385\n",
- "Epoch 14/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.5008 - val_loss: 0.5325\n",
- "Epoch 15/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4973 - val_loss: 0.5282\n",
- "Epoch 16/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4933 - val_loss: 0.5243\n",
- "Epoch 17/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4890 - val_loss: 0.5195\n",
- "Epoch 18/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4858 - val_loss: 0.5162\n",
- "Epoch 19/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4830 - val_loss: 0.5129\n",
- "Epoch 20/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4794 - val_loss: 0.5113\n",
- "Epoch 21/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4764 - val_loss: 0.5058\n",
- "Epoch 22/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4734 - val_loss: 0.5035\n",
- "Epoch 23/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4707 - val_loss: 0.4990\n",
- "Epoch 24/100\n",
- "363/363 [==============================] - 1s 1ms/step - loss: 0.4676 - val_loss: 0.4974\n",
- "Epoch 25/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4657 - val_loss: 0.4938\n",
- "Epoch 26/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4632 - val_loss: 0.4917\n",
- "Epoch 27/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4600 - val_loss: 0.4888\n",
- "Epoch 28/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4579 - val_loss: 0.4864\n",
- "Epoch 29/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4554 - val_loss: 0.4835\n",
- "Epoch 30/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4529 - val_loss: 0.4804\n",
- "Epoch 31/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4505 - val_loss: 0.4786\n",
- "Epoch 32/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4483 - val_loss: 0.4758\n",
- "Epoch 33/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4461 - val_loss: 0.4749\n",
- "Epoch 34/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4431 - val_loss: 0.4708\n",
- "Epoch 35/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4414 - val_loss: 0.4681\n",
- "Epoch 36/100\n",
- "363/363 [==============================] - 1s 1ms/step - loss: 0.4394 - val_loss: 0.4661\n",
- "Epoch 37/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4383 - val_loss: 0.4651\n",
- "Epoch 38/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4354 - val_loss: 0.4635\n",
- "Epoch 39/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4335 - val_loss: 0.4605\n",
- "Epoch 40/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4319 - val_loss: 0.4587\n",
- "Epoch 41/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4293 - val_loss: 0.4571\n",
- "Epoch 42/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4274 - val_loss: 0.4542\n",
- "Epoch 43/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4254 - val_loss: 0.4520\n",
- "Epoch 44/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4242 - val_loss: 0.4504\n",
- "Epoch 45/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4219 - val_loss: 0.4490\n",
- "Epoch 46/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4198 - val_loss: 0.4473\n",
- "Epoch 47/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4196 - val_loss: 0.4449\n",
- "Epoch 48/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4173 - val_loss: 0.4432\n",
- "Epoch 49/100\n",
- "363/363 [==============================] - 1s 2ms/step - loss: 0.4154 - val_loss: 0.4417\n"
- ]
- }
- ],
- "source": [
- "history = model.fit(x_train_scaled, y_train,\n",
- " validation_data = (x_valid_scaled, y_valid),\n",
- " epochs = 100,\n",
- " callbacks = callbacks)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "\n",
- "text/plain": [
- "<Figure size 576x360 with 1 Axes>"
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "def plot_learning_curves(history):\n",
- " pd.DataFrame(history.history).plot(figsize=(8, 5))\n",
- " plt.grid(True)\n",
- " plt.gca().set_ylim(0, 2)\n",
- " plt.show()\n",
- "plot_learning_curves(history)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.43126633763313293"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "model.evaluate(x_test_scaled, y_test, verbose=0)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.9"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
|