Update

4 years ago · 2b3c7a308d
--- a/Introduction/范例/Colab/Google_Colab_Tutorial.ipynb
+++ b/Introduction/范例/Colab/Google_Colab_Tutorial.ipynb
--- a/Introduction/范例/Colab/Google_Colab_Tutorial.pdf
+++ b/Introduction/范例/Colab/Google_Colab_Tutorial.pdf
--- a/Introduction/范例/HW01/HW01.ipynb
+++ b/Introduction/范例/HW01/HW01.ipynb
--- a/Introduction/范例/HW01/HW01.pdf
+++ b/Introduction/范例/HW01/HW01.pdf
--- a/Introduction/范例/Pytorch/Pytorch_Tutorial.ipynb
+++ b/Introduction/范例/Pytorch/Pytorch_Tutorial.ipynb
@@ -0,0 +1,625 @@
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Pytorch Tutorial",
      "provenance": [],
      "collapsed_sections": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/ga642381/ML2021-Spring/blob/main/Pytorch/Pytorch_Tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "tHILOGjOQbsQ"
      },
      "source": [
        "# **Pytorch Tutorial**\r\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "C1zA7GupxdJv"
      },
      "source": [
        "import torch"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "6Eqj90EkWbWx"
      },
      "source": [
        "**1. Pytorch Documentation Explanation with torch.max**\r\n",
        "\r\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "JCXOg-iSQuk7"
      },
      "source": [
        "x = torch.randn(4,5)\r\n",
        "y = torch.randn(4,5)\r\n",
        "z = torch.randn(4,5)\r\n",
        "print(x)\r\n",
        "print(y)\r\n",
        "print(z)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "EEqa9GFoWF78"
      },
      "source": [
        "# 1. max of entire tensor (torch.max(input) → Tensor)\r\n",
        "m = torch.max(x)\r\n",
        "print(m)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "wffThGDyWKxJ"
      },
      "source": [
        "# 2. max along a dimension (torch.max(input, dim, keepdim=False, *, out=None) → (Tensor, LongTensor))\r\n",
        "m, idx = torch.max(x,0)\r\n",
        "print(m)\r\n",
        "print(idx)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "oKDQW3tIXKg-"
      },
      "source": [
        "# 2-2\r\n",
        "m, idx = torch.max(input=x,dim=0)\r\n",
        "print(m)\r\n",
        "print(idx)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "6QZ6WRLyX3De"
      },
      "source": [
        "# 2-3\r\n",
        "m, idx = torch.max(x,0,False)\r\n",
        "print(m)\r\n",
        "print(idx)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "nqGuctkKbUEn"
      },
      "source": [
        "# 2-4\r\n",
        "m, idx = torch.max(x,dim=0,keepdim=True)\r\n",
        "print(m)\r\n",
        "print(idx)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9OMzxuMlZPIu"
      },
      "source": [
        "# 2-5\r\n",
        "p = (m,idx)\r\n",
        "torch.max(x,0,False,out=p)\r\n",
        "print(p[0])\r\n",
        "print(p[1])\r\n"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "uhd4TqGTbD2c"
      },
      "source": [
        "# 2-6\r\n",
        "p = (m,idx)\r\n",
        "torch.max(x,0,False,p)\r\n",
        "print(p[0])\r\n",
        "print(p[1])"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "wbxjUSOXxN0n"
      },
      "source": [
        "# 2-7\r\n",
        "m, idx = torch.max(x,True)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "iMwhGLlGWYaR"
      },
      "source": [
        "# 3. max(choose max) operators on two tensors (torch.max(input, other, *, out=None) → Tensor)\r\n",
        "t = torch.max(x,y)\r\n",
        "print(t)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "nFxRKu2Dedwb"
      },
      "source": [
        "**2. Common errors**\r\n",
        "\r\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "KMcRyMxGwhul"
      },
      "source": [
        "The following code blocks show some common errors while using the torch library. First, execute the code with error, and then execute the next code block to fix the error. You need to change the runtime to GPU.\r\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "eX-kKdi6ynFf"
      },
      "source": [
        "import torch"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-muJ4KKreoP2",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 363
        },
        "outputId": "c1d5c3a5-9540-4145-d80c-3cbca18a1deb"
      },
      "source": [
        "# 1. different device error\r\n",
        "model = torch.nn.Linear(5,1).to(\"cuda:0\")\r\n",
        "x = torch.Tensor([1,2,3,4,5]).to(\"cpu\")\r\n",
        "y = model(x)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "error",
          "ename": "RuntimeError",
          "evalue": "ignored",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-2-12e5b7d55705>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cuda:0\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m    725\u001b[0m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    726\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 727\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    728\u001b[0m         for hook in itertools.chain(\n\u001b[1;32m    729\u001b[0m                 \u001b[0m_global_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m     91\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     92\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     95\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mextra_repr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mlinear\u001b[0;34m(input, weight, bias)\u001b[0m\n\u001b[1;32m   1690\u001b[0m         \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maddmm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1691\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1692\u001b[0;31m         \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmatmul\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1693\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mbias\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1694\u001b[0m             \u001b[0moutput\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mRuntimeError\u001b[0m: Tensor for 'out' is on CPU, Tensor for argument #1 'self' is on CPU, but expected them to be on GPU (while checking arguments for addmm)"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "a54PqxJLe9-c",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "909d3693-236f-4419-f269-8fb443ef7534"
      },
      "source": [
        "# 1. different device error (fixed)\r\n",
        "x = torch.Tensor([1,2,3,4,5]).to(\"cuda:0\")\r\n",
        "y = model(x)\r\n",
        "print(y.shape)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "torch.Size([1])\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "n7OHtZwbi7Qw",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 201
        },
        "outputId": "2a7d2dd0-6498-4da0-9591-3554c1739046"
      },
      "source": [
        "# 2. mismatched dimensions error\r\n",
        "x = torch.randn(4,5)\r\n",
        "y= torch.randn(5,4)\r\n",
        "z = x + y"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "error",
          "ename": "RuntimeError",
          "evalue": "ignored",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-4-7fa8b244df3c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
            "\u001b[0;31mRuntimeError\u001b[0m: The size of tensor a (5) must match the size of tensor b (4) at non-singleton dimension 1"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qVynzvrskFCD",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "926dc01c-be6f-48e1-ad39-a5bcecebc513"
      },
      "source": [
        "# 2. mismatched dimensions error (fixed)\r\n",
        "y= y.transpose(0,1)\r\n",
        "z = x + y\r\n",
        "print(z.shape)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "torch.Size([4, 5])\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Hgzgb9gJANod",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 398
        },
        "outputId": "21b58850-b3f1-4f2a-db5d-cc45e47ccbea"
      },
      "source": [
        "# 3. cuda out of memory error\n",
        "import torch\n",
        "import torchvision.models as models\n",
        "resnet18 = models.resnet18().to(\"cuda:0\") # Neural Networks for Image Recognition\n",
        "data = torch.randn(2048,3,244,244) # Create fake data (512 images)\n",
        "out = resnet18(data.to(\"cuda:0\")) # Use Data as Input and Feed to Model\n",
        "print(out.shape)\n"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "error",
          "ename": "RuntimeError",
          "evalue": "ignored",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-8-711923c7f347>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mresnet18\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodels\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresnet18\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cuda:0\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# Neural Networks for Image Recognition\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2048\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m244\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m244\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# Create fake data (512 images)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresnet18\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cuda:0\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# Use Data as Input and Feed to Model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m    725\u001b[0m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    726\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 727\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    728\u001b[0m         for hook in itertools.chain(\n\u001b[1;32m    729\u001b[0m                 \u001b[0m_global_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torchvision/models/resnet.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m    218\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    219\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 220\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    221\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    222\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torchvision/models/resnet.py\u001b[0m in \u001b[0;36m_forward_impl\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m    202\u001b[0m         \u001b[0;31m# See note [TorchScript super()]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    203\u001b[0m         \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconv1\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 204\u001b[0;31m         \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbn1\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    205\u001b[0m         \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrelu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    206\u001b[0m         \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmaxpool\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m    725\u001b[0m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    726\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 727\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    728\u001b[0m         for hook in itertools.chain(\n\u001b[1;32m    729\u001b[0m                 \u001b[0m_global_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/batchnorm.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m    134\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrunning_mean\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrack_running_stats\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    135\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrunning_var\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrack_running_stats\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 136\u001b[0;31m             self.weight, self.bias, bn_training, exponential_average_factor, self.eps)\n\u001b[0m\u001b[1;32m    137\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    138\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mbatch_norm\u001b[0;34m(input, running_mean, running_var, weight, bias, training, momentum, eps)\u001b[0m\n\u001b[1;32m   2056\u001b[0m     return torch.batch_norm(\n\u001b[1;32m   2057\u001b[0m         \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrunning_mean\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrunning_var\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2058\u001b[0;31m         \u001b[0mtraining\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmomentum\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meps\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackends\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcudnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menabled\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2059\u001b[0m     )\n\u001b[1;32m   2060\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mRuntimeError\u001b[0m: CUDA out of memory. Tried to allocate 7.27 GiB (GPU 0; 14.76 GiB total capacity; 8.74 GiB already allocated; 4.42 GiB free; 9.42 GiB reserved in total by PyTorch)"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "VPksKnB_w343",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "fbee46ad-e63e-4bfc-8971-452895dd7a15"
      },
      "source": [
        "# 3. cuda out of memory error (fixed)\n",
        "for d in data:\n",
        "  out = resnet18(d.to(\"cuda:0\").unsqueeze(0))\n",
        "print(out.shape)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "torch.Size([1, 1000])\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "vqszlxEE0Bk0",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 346
        },
        "outputId": "a698b34d-00a8-4067-ddc5-180cb4c8eeaa"
      },
      "source": [
        "# 4. mismatched tensor type\n",
        "import torch.nn as nn\n",
        "L = nn.CrossEntropyLoss()\n",
        "outs = torch.randn(5,5)\n",
        "labels = torch.Tensor([1,2,3,4,0])\n",
        "lossval = L(outs,labels) # Calculate CrossEntropyLoss between outs and labels"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "error",
          "ename": "RuntimeError",
          "evalue": "ignored",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-10-60a5d1aad216>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mouts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mlossval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mL\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mouts\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# Calculate CrossEntropyLoss between outs and labels\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m    725\u001b[0m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    726\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 727\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    728\u001b[0m         for hook in itertools.chain(\n\u001b[1;32m    729\u001b[0m                 \u001b[0m_global_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/loss.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input, target)\u001b[0m\n\u001b[1;32m    960\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    961\u001b[0m         return F.cross_entropy(input, target, weight=self.weight,\n\u001b[0;32m--> 962\u001b[0;31m                                ignore_index=self.ignore_index, reduction=self.reduction)\n\u001b[0m\u001b[1;32m    963\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    964\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mcross_entropy\u001b[0;34m(input, target, weight, size_average, ignore_index, reduce, reduction)\u001b[0m\n\u001b[1;32m   2466\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0msize_average\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mreduce\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2467\u001b[0m         \u001b[0mreduction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_Reduction\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlegacy_get_string\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msize_average\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreduce\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2468\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mnll_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlog_softmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreduction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2469\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2470\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mnll_loss\u001b[0;34m(input, target, weight, size_average, ignore_index, reduce, reduction)\u001b[0m\n\u001b[1;32m   2262\u001b[0m                          .format(input.size(0), target.size(0)))\n\u001b[1;32m   2263\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mdim\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2264\u001b[0;31m         \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_nn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnll_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_Reduction\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_enum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreduction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_index\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2265\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0mdim\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2266\u001b[0m         \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_nn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnll_loss2d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_Reduction\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_enum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreduction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_index\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mRuntimeError\u001b[0m: expected scalar type Long but found Float"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "CZwgwup_1dgS",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "aaf1de76-7ef2-4ca4-b87d-8482a3117249"
      },
      "source": [
        "# 4. mismatched tensor type (fixed)\n",
        "labels = labels.long()\n",
        "lossval = L(outs,labels)\n",
        "print(lossval)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "tensor(2.6215)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "dSuNdA8F06dK"
      },
      "source": [
        "**3. More on dataset and dataloader**\r\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "in84z_xu1rE6"
      },
      "source": [
        "A dataset is a cluster of data in a organized way. A dataloader is a loader which can iterate through the data set."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "34zfh-c22Qqs"
      },
      "source": [
        "Let a dataset be the English alphabets \"abcdefghijklmnopqrstuvwxyz\""
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "TaiHofty1qKA"
      },
      "source": [
        "dataset = \"abcdefghijklmnopqrstuvwxyz\""
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "h0jwhVa12h3a"
      },
      "source": [
        "A simple dataloader could be implemented with the python code \"for\""
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "bWC5Wwbv2egy"
      },
      "source": [
        "for datapoint in dataset:\r\n",
        "  print(datapoint)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "n33VKzkG2y2U"
      },
      "source": [
        "When using the dataloader, we often like to shuffle the data. This is where torch.utils.data.DataLoader comes in handy. If each data is an index (0,1,2...) from the view of torch.utils.data.DataLoader, shuffling can simply be done by shuffling an index array. \r\n",
        "\r\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "9MXUUKQ65APf"
      },
      "source": [
        "torch.utils.data.DataLoader will need two imformation to fulfill its role. First, it needs to know the length of the data. Second, once torch.utils.data.DataLoader outputs the index of the shuffling results, the dataset needs to return the corresponding data."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "BV5txsjK5j4j"
      },
      "source": [
        "Therefore, torch.utils.data.Dataset provides the imformation by two functions, `__len__()` and `__getitem__()` to support torch.utils.data.Dataloader"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "A0IEkemJ5ajD"
      },
      "source": [
        "import torch\r\n",
        "import torch.utils.data \r\n",
        "class ExampleDataset(torch.utils.data.Dataset):\r\n",
        "  def __init__(self):\r\n",
        "    self.data = \"abcdefghijklmnopqrstuvwxyz\"\r\n",
        "  \r\n",
        "  def __getitem__(self,idx): # if the index is idx, what will be the data?\r\n",
        "    return self.data[idx]\r\n",
        "  \r\n",
        "  def __len__(self): # What is the length of the dataset\r\n",
        "    return len(self.data)\r\n",
        "\r\n",
        "dataset1 = ExampleDataset() # create the dataset\r\n",
        "dataloader = torch.utils.data.DataLoader(dataset = dataset1,shuffle = True,batch_size = 1)\r\n",
        "for datapoint in dataloader:\r\n",
        "  print(datapoint)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "nTt-ZTid9S2n"
      },
      "source": [
        "A simple data augmentation technique can be done by changing the code in `__len__()` and `__getitem__()`. Suppose we want to double the length of the dataset by adding in the uppercase letters, using only the lowercase dataset, you can change the dataset to the following."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "7Wn3BA2j-NXl"
      },
      "source": [
        "import torch.utils.data \r\n",
        "class ExampleDataset(torch.utils.data.Dataset):\r\n",
        "  def __init__(self):\r\n",
        "    self.data = \"abcdefghijklmnopqrstuvwxyz\"\r\n",
        "  \r\n",
        "  def __getitem__(self,idx): # if the index is idx, what will be the data?\r\n",
        "    if idx >= len(self.data): # if the index >= 26, return upper case letter\r\n",
        "      return self.data[idx%26].upper()\r\n",
        "    else: # if the index < 26, return lower case, return lower case letter\r\n",
        "      return self.data[idx]\r\n",
        "  \r\n",
        "  def __len__(self): # What is the length of the dataset\r\n",
        "    return 2 * len(self.data) # The length is now twice as large\r\n",
        "\r\n",
        "dataset1 = ExampleDataset() # create the dataset\r\n",
        "dataloader = torch.utils.data.DataLoader(dataset = dataset1,shuffle = True,batch_size = 1)\r\n",
        "for datapoint in dataloader:\r\n",
        "  print(datapoint)"
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
 }
--- a/Introduction/范例/Pytorch/Pytorch_Tutorial_1.pdf
+++ b/Introduction/范例/Pytorch/Pytorch_Tutorial_1.pdf
--- a/Introduction/范例/Pytorch/Pytorch_Tutorial_2.pdf
+++ b/Introduction/范例/Pytorch/Pytorch_Tutorial_2.pdf
--- a/Learning/范例/HW02/HW02-1.ipynb
+++ b/Learning/范例/HW02/HW02-1.ipynb
@@ -0,0 +1,655 @@
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "name": "SHARE MLSpring2021 - HW2-1.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/ga642381/ML2021-Spring/blob/main/HW02/HW02-1.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "OYlaRwNu7ojq"
      },
      "source": [
        "# **Homework 2-1 Phoneme Classification**"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "emUd7uS7crTz"
      },
      "source": [
        "## The DARPA TIMIT Acoustic-Phonetic Continuous Speech Corpus (TIMIT)\n",
        "The TIMIT corpus of reading speech has been designed to provide speech data for the acquisition of acoustic-phonetic knowledge and for the development and evaluation of automatic speech recognition systems.\n",
        "\n",
        "This homework is a multiclass classification task, \n",
        "we are going to train a deep neural network classifier to predict the phonemes for each frame from the speech corpus TIMIT.\n",
        "\n",
        "link: https://academictorrents.com/details/34e2b78745138186976cbc27939b1b34d18bd5b3"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "KVUGfWTo7_Oj"
      },
      "source": [
        "## Download Data\n",
        "Download data from google drive, then unzip it.\n",
        "\n",
        "You should have `timit_11/train_11.npy`, `timit_11/train_label_11.npy`, and `timit_11/test_11.npy` after running this block.<br><br>\n",
        "`timit_11/`\n",
        "- `train_11.npy`: training data<br>\n",
        "- `train_label_11.npy`: training label<br>\n",
        "- `test_11.npy`:  testing data<br><br>\n",
        "\n",
        "**notes: if the google drive link is dead, you can download the data directly from Kaggle and upload it to the workspace**\n",
        "\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "OzkiMEcC3Foq",
        "outputId": "4308c64c-6885-4d1c-8eb7-a2d9b8038401"
      },
      "source": [
        "!gdown --id '1HPkcmQmFGu-3OknddKIa5dNDsR05lIQR' --output data.zip\n",
        "!unzip data.zip\n",
        "!ls "
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Downloading...\n",
            "From: https://drive.google.com/uc?id=1HPkcmQmFGu-3OknddKIa5dNDsR05lIQR\n",
            "To: /content/data.zip\n",
            "372MB [00:03, 121MB/s]\n",
            "Archive:  data.zip\n",
            "   creating: timit_11/\n",
            "  inflating: timit_11/train_11.npy   \n",
            "  inflating: timit_11/test_11.npy    \n",
            "  inflating: timit_11/train_label_11.npy  \n",
            "data.zip  sample_data  timit_11\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "_L_4anls8Drv"
      },
      "source": [
        "## Preparing Data\n",
        "Load the training and testing data from the `.npy` file (NumPy array)."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "IJjLT8em-y9G",
        "outputId": "8edc6bfe-7511-447f-f239-00b96dba6dcf"
      },
      "source": [
        "import numpy as np\n",
        "\n",
        "print('Loading data ...')\n",
        "\n",
        "data_root='./timit_11/'\n",
        "train = np.load(data_root + 'train_11.npy')\n",
        "train_label = np.load(data_root + 'train_label_11.npy')\n",
        "test = np.load(data_root + 'test_11.npy')\n",
        "\n",
        "print('Size of training data: {}'.format(train.shape))\n",
        "print('Size of testing data: {}'.format(test.shape))"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Loading data ...\n",
            "Size of training data: (1229932, 429)\n",
            "Size of testing data: (451552, 429)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "us5XW_x6udZQ"
      },
      "source": [
        "## Create Dataset"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Fjf5EcmJtf4e"
      },
      "source": [
        "import torch\n",
        "from torch.utils.data import Dataset\n",
        "\n",
        "class TIMITDataset(Dataset):\n",
        "    def __init__(self, X, y=None):\n",
        "        self.data = torch.from_numpy(X).float()\n",
        "        if y is not None:\n",
        "            y = y.astype(np.int)\n",
        "            self.label = torch.LongTensor(y)\n",
        "        else:\n",
        "            self.label = None\n",
        "\n",
        "    def __getitem__(self, idx):\n",
        "        if self.label is not None:\n",
        "            return self.data[idx], self.label[idx]\n",
        "        else:\n",
        "            return self.data[idx]\n",
        "\n",
        "    def __len__(self):\n",
        "        return len(self.data)\n"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "otIC6WhGeh9v"
      },
      "source": [
        "Split the labeled data into a training set and a validation set, you can modify the variable `VAL_RATIO` to change the ratio of validation data."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "sYqi_lAuvC59",
        "outputId": "13dabe63-4849-47ee-fe04-57427b9d601c"
      },
      "source": [
        "VAL_RATIO = 0.2\n",
        "\n",
        "percent = int(train.shape[0] * (1 - VAL_RATIO))\n",
        "train_x, train_y, val_x, val_y = train[:percent], train_label[:percent], train[percent:], train_label[percent:]\n",
        "print('Size of training set: {}'.format(train_x.shape))\n",
        "print('Size of validation set: {}'.format(val_x.shape))"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Size of training set: (983945, 429)\n",
            "Size of validation set: (245987, 429)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "nbCfclUIgMTX"
      },
      "source": [
        "Create a data loader from the dataset, feel free to tweak the variable `BATCH_SIZE` here."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "RUCbQvqJurYc"
      },
      "source": [
        "BATCH_SIZE = 64\n",
        "\n",
        "from torch.utils.data import DataLoader\n",
        "\n",
        "train_set = TIMITDataset(train_x, train_y)\n",
        "val_set = TIMITDataset(val_x, val_y)\n",
        "train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True) #only shuffle the training data\n",
        "val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "_SY7X0lUgb50"
      },
      "source": [
        "Cleanup the unneeded variables to save memory.<br>\n",
        "\n",
        "**notes: if you need to use these variables later, then you may remove this block or clean up unneeded variables later<br>the data size is quite huge, so be aware of memory usage in colab**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "y8rzkGraeYeN",
        "outputId": "dc790996-a43c-4a99-90d4-e7928892a899"
      },
      "source": [
        "import gc\n",
        "\n",
        "del train, train_label, train_x, train_y, val_x, val_y\n",
        "gc.collect()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "50"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 6
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "IRqKNvNZwe3V"
      },
      "source": [
        "## Create Model"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "FYr1ng5fh9pA"
      },
      "source": [
        "Define model architecture, you are encouraged to change and experiment with the model architecture."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "lbZrwT6Ny0XL"
      },
      "source": [
        "import torch\n",
        "import torch.nn as nn\n",
        "\n",
        "class Classifier(nn.Module):\n",
        "    def __init__(self):\n",
        "        super(Classifier, self).__init__()\n",
        "        self.layer1 = nn.Linear(429, 1024)\n",
        "        self.layer2 = nn.Linear(1024, 512)\n",
        "        self.layer3 = nn.Linear(512, 128)\n",
        "        self.out = nn.Linear(128, 39) \n",
        "\n",
        "        self.act_fn = nn.Sigmoid()\n",
        "\n",
        "    def forward(self, x):\n",
        "        x = self.layer1(x)\n",
        "        x = self.act_fn(x)\n",
        "\n",
        "        x = self.layer2(x)\n",
        "        x = self.act_fn(x)\n",
        "\n",
        "        x = self.layer3(x)\n",
        "        x = self.act_fn(x)\n",
        "\n",
        "        x = self.out(x)\n",
        "        \n",
        "        return x"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "VRYciXZvPbYh"
      },
      "source": [
        "## Training"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "y114Vmm3Ja6o"
      },
      "source": [
        "#check device\n",
        "def get_device():\n",
        "  return 'cuda' if torch.cuda.is_available() else 'cpu'"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "sEX-yjHjhGuH"
      },
      "source": [
        "Fix random seeds for reproducibility."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "88xPiUnm0tAd"
      },
      "source": [
        "# fix random seed\n",
        "def same_seeds(seed):\n",
        "    torch.manual_seed(seed)\n",
        "    if torch.cuda.is_available():\n",
        "        torch.cuda.manual_seed(seed)\n",
        "        torch.cuda.manual_seed_all(seed)  \n",
        "    np.random.seed(seed)  \n",
        "    torch.backends.cudnn.benchmark = False\n",
        "    torch.backends.cudnn.deterministic = True"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "KbBcBXkSp6RA"
      },
      "source": [
        "Feel free to change the training parameters here."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "QTp3ZXg1yO9Y"
      },
      "source": [
        "# fix random seed for reproducibility\n",
        "same_seeds(0)\n",
        "\n",
        "# get device \n",
        "device = get_device()\n",
        "print(f'DEVICE: {device}')\n",
        "\n",
        "# training parameters\n",
        "num_epoch = 20               # number of training epoch\n",
        "learning_rate = 0.0001       # learning rate\n",
        "\n",
        "# the path where checkpoint saved\n",
        "model_path = './model.ckpt'\n",
        "\n",
        "# create model, define a loss function, and optimizer\n",
        "model = Classifier().to(device)\n",
        "criterion = nn.CrossEntropyLoss() \n",
        "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "CdMWsBs7zzNs",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "c5ed561e-610d-4a35-d936-fd97adf342a0"
      },
      "source": [
        "# start training\n",
        "\n",
        "best_acc = 0.0\n",
        "for epoch in range(num_epoch):\n",
        "    train_acc = 0.0\n",
        "    train_loss = 0.0\n",
        "    val_acc = 0.0\n",
        "    val_loss = 0.0\n",
        "\n",
        "    # training\n",
        "    model.train() # set the model to training mode\n",
        "    for i, data in enumerate(train_loader):\n",
        "        inputs, labels = data\n",
        "        inputs, labels = inputs.to(device), labels.to(device)\n",
        "        optimizer.zero_grad() \n",
        "        outputs = model(inputs) \n",
        "        batch_loss = criterion(outputs, labels)\n",
        "        _, train_pred = torch.max(outputs, 1) # get the index of the class with the highest probability\n",
        "        batch_loss.backward() \n",
        "        optimizer.step() \n",
        "\n",
        "        train_acc += (train_pred.cpu() == labels.cpu()).sum().item()\n",
        "        train_loss += batch_loss.item()\n",
        "\n",
        "    # validation\n",
        "    if len(val_set) > 0:\n",
        "        model.eval() # set the model to evaluation mode\n",
        "        with torch.no_grad():\n",
        "            for i, data in enumerate(val_loader):\n",
        "                inputs, labels = data\n",
        "                inputs, labels = inputs.to(device), labels.to(device)\n",
        "                outputs = model(inputs)\n",
        "                batch_loss = criterion(outputs, labels) \n",
        "                _, val_pred = torch.max(outputs, 1) \n",
        "            \n",
        "                val_acc += (val_pred.cpu() == labels.cpu()).sum().item() # get the index of the class with the highest probability\n",
        "                val_loss += batch_loss.item()\n",
        "\n",
        "            print('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f} | Val Acc: {:3.6f} loss: {:3.6f}'.format(\n",
        "                epoch + 1, num_epoch, train_acc/len(train_set), train_loss/len(train_loader), val_acc/len(val_set), val_loss/len(val_loader)\n",
        "            ))\n",
        "\n",
        "            # if the model improves, save a checkpoint at this epoch\n",
        "            if val_acc > best_acc:\n",
        "                best_acc = val_acc\n",
        "                torch.save(model.state_dict(), model_path)\n",
        "                print('saving model with acc {:.3f}'.format(best_acc/len(val_set)))\n",
        "    else:\n",
        "        print('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f}'.format(\n",
        "            epoch + 1, num_epoch, train_acc/len(train_set), train_loss/len(train_loader)\n",
        "        ))\n",
        "\n",
        "# if not validating, save the last epoch\n",
        "if len(val_set) == 0:\n",
        "    torch.save(model.state_dict(), model_path)\n",
        "    print('saving model at last epoch')\n"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "[001/020] Train Acc: 0.467390 Loss: 1.812880 | Val Acc: 0.564884 loss: 1.440870\n",
            "saving model with acc 0.565\n",
            "[002/020] Train Acc: 0.594031 Loss: 1.332670 | Val Acc: 0.629594 loss: 1.209077\n",
            "saving model with acc 0.630\n",
            "[003/020] Train Acc: 0.644419 Loss: 1.154247 | Val Acc: 0.658295 loss: 1.102313\n",
            "saving model with acc 0.658\n",
            "[004/020] Train Acc: 0.672767 Loss: 1.051355 | Val Acc: 0.675568 loss: 1.040186\n",
            "saving model with acc 0.676\n",
            "[005/020] Train Acc: 0.691564 Loss: 0.982245 | Val Acc: 0.683853 loss: 1.004628\n",
            "saving model with acc 0.684\n",
            "[006/020] Train Acc: 0.705731 Loss: 0.930892 | Val Acc: 0.691707 loss: 0.977562\n",
            "saving model with acc 0.692\n",
            "[007/020] Train Acc: 0.716722 Loss: 0.890210 | Val Acc: 0.691016 loss: 0.973670\n",
            "[008/020] Train Acc: 0.726312 Loss: 0.856612 | Val Acc: 0.690207 loss: 0.971627\n",
            "[009/020] Train Acc: 0.734965 Loss: 0.827445 | Val Acc: 0.698561 loss: 0.942904\n",
            "saving model with acc 0.699\n",
            "[010/020] Train Acc: 0.741926 Loss: 0.801676 | Val Acc: 0.698854 loss: 0.946376\n",
            "saving model with acc 0.699\n",
            "[011/020] Train Acc: 0.748191 Loss: 0.779319 | Val Acc: 0.700944 loss: 0.938454\n",
            "saving model with acc 0.701\n",
            "[012/020] Train Acc: 0.754672 Loss: 0.758071 | Val Acc: 0.699423 loss: 0.940523\n",
            "[013/020] Train Acc: 0.759725 Loss: 0.739450 | Val Acc: 0.699728 loss: 0.951068\n",
            "[014/020] Train Acc: 0.765137 Loss: 0.721372 | Val Acc: 0.701903 loss: 0.938658\n",
            "saving model with acc 0.702\n",
            "[015/020] Train Acc: 0.769828 Loss: 0.704748 | Val Acc: 0.701761 loss: 0.937079\n",
            "[016/020] Train Acc: 0.774698 Loss: 0.688990 | Val Acc: 0.702293 loss: 0.938634\n",
            "saving model with acc 0.702\n",
            "[017/020] Train Acc: 0.779358 Loss: 0.674498 | Val Acc: 0.702492 loss: 0.943941\n",
            "saving model with acc 0.702\n",
            "[018/020] Train Acc: 0.783076 Loss: 0.660028 | Val Acc: 0.695195 loss: 0.966189\n",
            "[019/020] Train Acc: 0.787432 Loss: 0.646340 | Val Acc: 0.700708 loss: 0.958220\n",
            "[020/020] Train Acc: 0.791536 Loss: 0.633378 | Val Acc: 0.700643 loss: 0.957066\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "1Hi7jTn3PX-m"
      },
      "source": [
        "## Testing"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "NfUECMFCn5VG"
      },
      "source": [
        "Create a testing dataset, and load model from the saved checkpoint."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "1PKjtAScPWtr",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "8c17272b-536a-4692-a95f-a3292766c698"
      },
      "source": [
        "# create testing dataset\n",
        "test_set = TIMITDataset(test, None)\n",
        "test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False)\n",
        "\n",
        "# create model and load weights from checkpoint\n",
        "model = Classifier().to(device)\n",
        "model.load_state_dict(torch.load(model_path))"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<All keys matched successfully>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 12
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "940TtCCdoYd0"
      },
      "source": [
        "Make prediction."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "84HU5GGjPqR0"
      },
      "source": [
        "predict = []\n",
        "model.eval() # set the model to evaluation mode\n",
        "with torch.no_grad():\n",
        "    for i, data in enumerate(test_loader):\n",
        "        inputs = data\n",
        "        inputs = inputs.to(device)\n",
        "        outputs = model(inputs)\n",
        "        _, test_pred = torch.max(outputs, 1) # get the index of the class with the highest probability\n",
        "\n",
        "        for y in test_pred.cpu().numpy():\n",
        "            predict.append(y)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "AWDf_C-omElb"
      },
      "source": [
        "Write prediction to a CSV file.\n",
        "\n",
        "After finish running this block, download the file `prediction.csv` from the files section on the left-hand side and submit it to Kaggle."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "GuljYSPHcZir"
      },
      "source": [
        "with open('prediction.csv', 'w') as f:\n",
        "    f.write('Id,Class\\n')\n",
        "    for i, y in enumerate(predict):\n",
        "        f.write('{},{}\\n'.format(i, y))"
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
 }
--- a/Learning/范例/HW02/HW02-2.ipynb
+++ b/Learning/范例/HW02/HW02-2.ipynb
@@ -0,0 +1,547 @@
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "SHARE MLSpring2021 - HW2-2.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/ga642381/ML2021-Spring/blob/main/HW02/HW02-2.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "eNSV4QGHS1I1"
      },
      "source": [
        "# **Homework 2-2 Hessian Matrix**\r\n",
        "\r\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "z0eNH3RD73Ye"
      },
      "source": [
        "## Hessian Matrix\n",
        "Imagine we are training a neural network and we are trying to find out whether the model is at **local minima like, saddle point, or none of the above**. We can make our decision by calculating the Hessian matrix.\n",
        "\n",
        "In practice, it is really hard to find a point where the gradient equals zero or all of the eigenvalues in Hessian matrix are greater than zero. In this homework, we make the following two assumptions:\n",
        "1. View gradient norm less than 1e-3 as **gradient equals to zero**.\n",
        "2. If minimum ratio is greater than 0.5 and gradient norm is less than 1e-3, then we assume that the model is at “local minima like”.\n",
        "\n",
        "> Minimum ratio is defined as the proportion of positive eigenvalues."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "lezCgM8U8KJl"
      },
      "source": [
        "## IMPORTANT NOTICE\n",
        "In this homework, students with different student IDs will get different answers. Make sure to fill in your `student_id` in the following block correctly. Otherwise, your code may not run correctly and you will get a wrong answer."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Bbr6MTQ488GH"
      },
      "source": [
        "student_id = 'your_student_id' # fill with your student ID\n",
        "\n",
        "assert student_id != 'your_student_id', 'Please fill in your student_id before you start.'"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "XHz08Ybg-dmB"
      },
      "source": [
        "## Calculate Hessian Matrix\n",
        "The computation of Hessian is done by TA, you don't need to and shouldn't change the following code. The only thing you need to do is to run the following blocks and determine whether the model is at `local minima like`, `saddle point`, or `none of the above` according to the value of `gradient norm` and `minimum ratio`."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "zvDeNevCUvDW"
      },
      "source": [
        "### Install Package to Compute Hessian.\n",
        "\n",
        "The autograd-lib library is used to compute Hessian matrix. You can check the full document here https://github.com/cybertronai/autograd-lib."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "r135K45psHwF",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "8492ab08-8ad9-4525-db9c-35884eaa0641"
      },
      "source": [
        "!pip install autograd-lib"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Requirement already satisfied: autograd-lib in /usr/local/lib/python3.7/dist-packages (0.0.7)\n",
            "Requirement already satisfied: gin-config in /usr/local/lib/python3.7/dist-packages (from autograd-lib) (0.4.0)\n",
            "Requirement already satisfied: seaborn in /usr/local/lib/python3.7/dist-packages (from autograd-lib) (0.11.1)\n",
            "Requirement already satisfied: pytorch-lightning in /usr/local/lib/python3.7/dist-packages (from autograd-lib) (1.2.3)\n",
            "Requirement already satisfied: scipy>=1.0 in /usr/local/lib/python3.7/dist-packages (from seaborn->autograd-lib) (1.4.1)\n",
            "Requirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.7/dist-packages (from seaborn->autograd-lib) (1.19.5)\n",
            "Requirement already satisfied: pandas>=0.23 in /usr/local/lib/python3.7/dist-packages (from seaborn->autograd-lib) (1.1.5)\n",
            "Requirement already satisfied: matplotlib>=2.2 in /usr/local/lib/python3.7/dist-packages (from seaborn->autograd-lib) (3.2.2)\n",
            "Requirement already satisfied: torch>=1.4 in /usr/local/lib/python3.7/dist-packages (from pytorch-lightning->autograd-lib) (1.8.0+cu101)\n",
            "Requirement already satisfied: tensorboard>=2.2.0 in /usr/local/lib/python3.7/dist-packages (from pytorch-lightning->autograd-lib) (2.4.1)\n",
            "Requirement already satisfied: tqdm>=4.41.0 in /usr/local/lib/python3.7/dist-packages (from pytorch-lightning->autograd-lib) (4.41.1)\n",
            "Requirement already satisfied: future>=0.17.1 in /usr/local/lib/python3.7/dist-packages (from pytorch-lightning->autograd-lib) (0.18.2)\n",
            "Requirement already satisfied: fsspec[http]>=0.8.1 in /usr/local/lib/python3.7/dist-packages (from pytorch-lightning->autograd-lib) (0.8.7)\n",
            "Requirement already satisfied: PyYAML!=5.4.*,>=5.1 in /usr/local/lib/python3.7/dist-packages (from pytorch-lightning->autograd-lib) (5.3.1)\n",
            "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.23->seaborn->autograd-lib) (2.8.1)\n",
            "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.23->seaborn->autograd-lib) (2018.9)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=2.2->seaborn->autograd-lib) (0.10.0)\n",
            "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=2.2->seaborn->autograd-lib) (2.4.7)\n",
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=2.2->seaborn->autograd-lib) (1.3.1)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch>=1.4->pytorch-lightning->autograd-lib) (3.7.4.3)\n",
            "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (0.4.3)\n",
            "Requirement already satisfied: wheel>=0.26; python_version >= \"3\" in /usr/local/lib/python3.7/dist-packages (from tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (0.36.2)\n",
            "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (1.27.1)\n",
            "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (1.15.0)\n",
            "Requirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (3.12.4)\n",
            "Requirement already satisfied: grpcio>=1.24.3 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (1.32.0)\n",
            "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (1.0.1)\n",
            "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (0.10.0)\n",
            "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (2.23.0)\n",
            "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (3.3.4)\n",
            "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (54.0.0)\n",
            "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (1.8.0)\n",
            "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from fsspec[http]>=0.8.1->pytorch-lightning->autograd-lib) (3.7.0)\n",
            "Requirement already satisfied: aiohttp; extra == \"http\" in /usr/local/lib/python3.7/dist-packages (from fsspec[http]>=0.8.1->pytorch-lightning->autograd-lib) (3.7.4.post0)\n",
            "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (1.3.0)\n",
            "Requirement already satisfied: rsa<5,>=3.1.4; python_version >= \"3.6\" in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (4.7.2)\n",
            "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (4.2.1)\n",
            "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (0.2.8)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (1.24.3)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (3.0.4)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (2.10)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.21.0->tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (2020.12.5)\n",
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->fsspec[http]>=0.8.1->pytorch-lightning->autograd-lib) (3.4.1)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp; extra == \"http\"->fsspec[http]>=0.8.1->pytorch-lightning->autograd-lib) (5.1.0)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp; extra == \"http\"->fsspec[http]>=0.8.1->pytorch-lightning->autograd-lib) (20.3.0)\n",
            "Requirement already satisfied: async-timeout<4.0,>=3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp; extra == \"http\"->fsspec[http]>=0.8.1->pytorch-lightning->autograd-lib) (3.0.1)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp; extra == \"http\"->fsspec[http]>=0.8.1->pytorch-lightning->autograd-lib) (1.6.3)\n",
            "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (3.1.0)\n",
            "Requirement already satisfied: pyasn1>=0.1.3 in /usr/local/lib/python3.7/dist-packages (from rsa<5,>=3.1.4; python_version >= \"3.6\"->google-auth<2,>=1.6.3->tensorboard>=2.2.0->pytorch-lightning->autograd-lib) (0.4.8)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ZFGBCIFmVLS_"
      },
      "source": [
        "### Import Libraries\r\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "_-vjBvH0uqA-"
      },
      "source": [
        "import numpy as np\r\n",
        "from math import pi\r\n",
        "from collections import defaultdict\r\n",
        "from autograd_lib import autograd_lib\r\n",
        "\r\n",
        "import torch\r\n",
        "import torch.nn as nn\r\n",
        "from torch.utils.data import DataLoader, Dataset\r\n",
        "\r\n",
        "import warnings\r\n",
        "warnings.filterwarnings(\"ignore\")"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ubbsl4dUVUj6"
      },
      "source": [
        "### Define NN Model\n",
        "The NN model here is used to fit a single variable math function.\n",
        "$$f(x) = \\frac{\\sin(5\\pi x)}{5\\pi x}.$$"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "uvdOpR9lVaJQ"
      },
      "source": [
        "class MathRegressor(nn.Module):\r\n",
        "    def __init__(self, num_hidden=128):\r\n",
        "        super().__init__()\r\n",
        "        self.regressor = nn.Sequential(\r\n",
        "            nn.Linear(1, num_hidden),\r\n",
        "            nn.ReLU(),\r\n",
        "            nn.Linear(num_hidden, 1)\r\n",
        "        )\r\n",
        "\r\n",
        "    def forward(self, x):\r\n",
        "        x = self.regressor(x)\r\n",
        "        return x"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "3nO0POKbWU9o"
      },
      "source": [
        "### Get Pretrained Checkpoints\n",
        "The pretrained checkpoints is done by TA. Each student will get a different checkpoint."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "rUG_tQKLbIKB",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "46ab3391-f669-45cf-b3ae-a3a6ad901f4a"
      },
      "source": [
        "!gdown --id 1ym6G7KKNkbsqSnMmnxdQKHO1JBoF0LPR"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Downloading...\n",
            "From: https://drive.google.com/uc?id=1ym6G7KKNkbsqSnMmnxdQKHO1JBoF0LPR\n",
            "To: /content/data.pth\n",
            "\r  0% 0.00/34.5k [00:00<?, ?B/s]\r100% 34.5k/34.5k [00:00<00:00, 58.8MB/s]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "kOFibHKCek_A"
      },
      "source": [
        "### Load Pretrained Checkpoints and Training Data"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zkLZoCR51D7P"
      },
      "source": [
        "# find the key from student_id\n",
        "import re\n",
        "\n",
        "key = student_id[-1]\n",
        "if re.match('[0-9]', key) is not None:\n",
        "    key = int(key)\n",
        "else:\n",
        "    key = ord(key) % 10"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "OSU8vnXEbY6q"
      },
      "source": [
        "# load checkpoint and data corresponding to the key\r\n",
        "model = MathRegressor()\r\n",
        "autograd_lib.register(model)\r\n",
        "\r\n",
        "data = torch.load('data.pth')[key]\r\n",
        "model.load_state_dict(data['model'])\r\n",
        "train, target = data['data']"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "EyBX5Gvvm_IW"
      },
      "source": [
        "### Function to compute gradient norm"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2i8qGj2dnYBN"
      },
      "source": [
        "# function to compute gradient norm\n",
        "def compute_gradient_norm(model, criterion, train, target):\n",
        "    model.train()\n",
        "    model.zero_grad()\n",
        "    output = model(train)\n",
        "    loss = criterion(output, target)\n",
        "    loss.backward()\n",
        "\n",
        "    grads = []\n",
        "    for p in model.regressor.children():\n",
        "        if isinstance(p, nn.Linear):\n",
        "            param_norm = p.weight.grad.norm(2).item()\n",
        "            grads.append(param_norm)\n",
        "\n",
        "    grad_mean = np.mean(grads) # compute mean of gradient norms\n",
        "\n",
        "    return grad_mean"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "BSHRU6saoOnf"
      },
      "source": [
        "### Function to compute minimum ratio"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zizIq6Y_o_UK"
      },
      "source": [
        "# source code from the official document https://github.com/cybertronai/autograd-lib\n",
        "\n",
        "# helper function to save activations\n",
        "def save_activations(layer, A, _):\n",
        "    '''\n",
        "    A is the input of the layer, we use batch size of 6 here\n",
        "    layer 1: A has size of (6, 1)\n",
        "    layer 2: A has size of (6, 128)\n",
        "    '''\n",
        "    activations[layer] = A\n",
        "\n",
        "# helper function to compute Hessian matrix\n",
        "def compute_hess(layer, _, B):\n",
        "    '''\n",
        "    B is the backprop value of the layer\n",
        "    layer 1: B has size of (6, 128)\n",
        "    layer 2: B ahs size of (6, 1)\n",
        "    '''\n",
        "    A = activations[layer]\n",
        "    BA = torch.einsum('nl,ni->nli', B, A) # do batch-wise outer product\n",
        "\n",
        "    # full Hessian\n",
        "    hess[layer] += torch.einsum('nli,nkj->likj', BA, BA) # do batch-wise outer product, then sum over the batch"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "l0r4R_-soT58"
      },
      "source": [
        "# function to compute the minimum ratio\n",
        "def compute_minimum_ratio(model, criterion, train, target):\n",
        "    model.zero_grad()\n",
        "    # compute Hessian matrix\n",
        "    # save the gradient of each layer\n",
        "    with autograd_lib.module_hook(save_activations):\n",
        "        output = model(train)\n",
        "        loss = criterion(output, target)\n",
        "\n",
        "    # compute Hessian according to the gradient value stored in the previous step\n",
        "    with autograd_lib.module_hook(compute_hess):\n",
        "        autograd_lib.backward_hessian(output, loss='LeastSquares')\n",
        "\n",
        "    layer_hess = list(hess.values())\n",
        "    minimum_ratio = []\n",
        "\n",
        "    # compute eigenvalues of the Hessian matrix\n",
        "    for h in layer_hess:\n",
        "        size = h.shape[0] * h.shape[1]\n",
        "        h = h.reshape(size, size)\n",
        "        h_eig = torch.symeig(h).eigenvalues # torch.symeig() returns eigenvalues and eigenvectors of a real symmetric matrix\n",
        "        num_greater = torch.sum(h_eig > 0).item()\n",
        "        minimum_ratio.append(num_greater / len(h_eig))\n",
        "\n",
        "    ratio_mean = np.mean(minimum_ratio) # compute mean of minimum ratio\n",
        "\n",
        "    return ratio_mean"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ABZhFwVZY3x3"
      },
      "source": [
        "### Mathematical Derivation\n",
        "\n",
        "Method used here: https://en.wikipedia.org/wiki/Gauss–Newton_algorithm\n",
        "\n",
        "> **Notations** \\\\\n",
        "> $\\mathbf{A}$: the input of the layer. \\\\\n",
        "> $\\mathbf{B}$: the backprop value. \\\\\n",
        "> $\\mathbf{Z}$: the output of the layer. \\\\\n",
        "> $L$: the total loss, mean squared error was used here, $L=e^2$. \\\\\n",
        "> $w$: the weight value.\n",
        "\n",
        "Assume that the input dimension of the layer is $n$, and the output dimension of the layer is $m$.\n",
        "\n",
        "The derivative of the loss is\n",
        "\n",
        "\\begin{align*}\n",
        "    \\left(\\frac{\\partial L}{\\partial w}\\right)_{nm} &= \\mathbf{A}_m \\mathbf{B}_n,\n",
        "\\end{align*}\n",
        "\n",
        "which can be written as\n",
        "\n",
        "\\begin{align*}\n",
        "    \\frac{\\partial L}{\\partial w} &= \\mathbf{B} \\times \\mathbf{A}.\n",
        "\\end{align*}\n",
        "\n",
        "The Hessian can be derived as\n",
        "\n",
        "\\begin{align*}\n",
        "    \\mathbf{H}_{ij}&=\\frac{\\partial^2 L}{\\partial w_i \\partial w_j} \\\\\n",
        "    &= \\frac{\\partial}{\\partial w_i}\\left(\\frac{\\partial L}{\\partial w_j}\\right) \\\\\n",
        "    &= \\frac{\\partial}{\\partial w_i}\\left(\\frac{2e\\partial e}{\\partial w_j}\\right) \\\\\n",
        "    &= 2\\frac{\\partial e}{\\partial w_i}\\frac{\\partial e}{\\partial w_j}+2e\\frac{\\partial^2 e}{\\partial w_j \\partial w_i}.\n",
        "\\end{align*}\n",
        "\n",
        "We neglect the second-order derivative term because the term is relatively small ($e$ is small)\n",
        "\n",
        "\\begin{align*}\n",
        "    \\mathbf{H}_{ij}\n",
        "    &\\propto \\frac{\\partial e}{\\partial w_i}\\frac{\\partial e}{\\partial w_j},\n",
        "\\end{align*}\n",
        "\n",
        "and as the error $e$ is a constant\n",
        "\n",
        "\\begin{align*}\n",
        "    \\mathbf{H}_{ij}\n",
        "    &\\propto \\frac{\\partial L}{\\partial w_i}\\frac{\\partial L}{\\partial w_j},\n",
        "\\end{align*}\n",
        "\n",
        "then the full Hessian becomes\n",
        "\n",
        "\\begin{align*}\n",
        "    \\mathbf{H} &\\propto (\\mathbf{B}\\times\\mathbf{A})\\times(\\mathbf{B}\\times\\mathbf{A}).\n",
        "\\end{align*}\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "1X-2uxwTcB9u"
      },
      "source": [
        "# the main function to compute gradient norm and minimum ratio\r\n",
        "def main(model, train, target):\r\n",
        "    criterion = nn.MSELoss()\r\n",
        "\r\n",
        "    gradient_norm = compute_gradient_norm(model, criterion, train, target)\r\n",
        "    minimum_ratio = compute_minimum_ratio(model, criterion, train, target)\r\n",
        "\r\n",
        "    print('gradient norm: {}, minimum ratio: {}'.format(gradient_norm, minimum_ratio))"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "uwHyQHc9w8k1"
      },
      "source": [
        "After running this block, you will get the value of `gradient norm` and `minimum ratio`. Determine whether the model is at `local minima like`, `saddle point`, or `none of the above`, and then submit your choice to NTU COOL."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "877W_ShIzS7a",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "6c90fdd9-0bbd-405c-c781-457d265c1606"
      },
      "source": [
        "if __name__ == '__main__':\n",
        "    # fix random seed\n",
        "    torch.manual_seed(0)\n",
        "\n",
        "    # reset compute dictionaries\n",
        "    activations = defaultdict(int)\n",
        "    hess = defaultdict(float)\n",
        "\n",
        "    # compute Hessian\n",
        "    main(model, train, target)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "gradient norm: 0.07222428917884827, minimum ratio: 0.46484375\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
 }
--- a/Learning/范例/HW02/HW02.pdf
+++ b/Learning/范例/HW02/HW02.pdf