|
@@ -0,0 +1,614 @@ |
|
|
|
|
|
{ |
|
|
|
|
|
"nbformat": 4, |
|
|
|
|
|
"nbformat_minor": 0, |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"colab": { |
|
|
|
|
|
"name": "Pytorch Tutorial", |
|
|
|
|
|
"provenance": [], |
|
|
|
|
|
"collapsed_sections": [] |
|
|
|
|
|
}, |
|
|
|
|
|
"kernelspec": { |
|
|
|
|
|
"name": "python3", |
|
|
|
|
|
"display_name": "Python 3" |
|
|
|
|
|
}, |
|
|
|
|
|
"accelerator": "GPU" |
|
|
|
|
|
}, |
|
|
|
|
|
"cells": [ |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "tHILOGjOQbsQ" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# **Pytorch Tutorial**\r\n" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "C1zA7GupxdJv" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"import torch" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "6Eqj90EkWbWx" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"**1. Pytorch Documentation Explanation with torch.max**\r\n", |
|
|
|
|
|
"\r\n" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "JCXOg-iSQuk7" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"x = torch.randn(4,5)\r\n", |
|
|
|
|
|
"y = torch.randn(4,5)\r\n", |
|
|
|
|
|
"z = torch.randn(4,5)\r\n", |
|
|
|
|
|
"print(x)\r\n", |
|
|
|
|
|
"print(y)\r\n", |
|
|
|
|
|
"print(z)" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "EEqa9GFoWF78" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 1. max of entire tensor (torch.max(input) → Tensor)\r\n", |
|
|
|
|
|
"m = torch.max(x)\r\n", |
|
|
|
|
|
"print(m)" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "wffThGDyWKxJ" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 2. max along a dimension (torch.max(input, dim, keepdim=False, *, out=None) → (Tensor, LongTensor))\r\n", |
|
|
|
|
|
"m, idx = torch.max(x,0)\r\n", |
|
|
|
|
|
"print(m)\r\n", |
|
|
|
|
|
"print(idx)" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "oKDQW3tIXKg-" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 2-2\r\n", |
|
|
|
|
|
"m, idx = torch.max(input=x,dim=0)\r\n", |
|
|
|
|
|
"print(m)\r\n", |
|
|
|
|
|
"print(idx)" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "6QZ6WRLyX3De" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 2-3\r\n", |
|
|
|
|
|
"m, idx = torch.max(x,0,False)\r\n", |
|
|
|
|
|
"print(m)\r\n", |
|
|
|
|
|
"print(idx)" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "nqGuctkKbUEn" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 2-4\r\n", |
|
|
|
|
|
"m, idx = torch.max(x,dim=0,keepdim=True)\r\n", |
|
|
|
|
|
"print(m)\r\n", |
|
|
|
|
|
"print(idx)" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "9OMzxuMlZPIu" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 2-5\r\n", |
|
|
|
|
|
"p = (m,idx)\r\n", |
|
|
|
|
|
"torch.max(x,0,False,out=p)\r\n", |
|
|
|
|
|
"print(p[0])\r\n", |
|
|
|
|
|
"print(p[1])\r\n" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "uhd4TqGTbD2c" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 2-6\r\n", |
|
|
|
|
|
"p = (m,idx)\r\n", |
|
|
|
|
|
"torch.max(x,0,False,p)\r\n", |
|
|
|
|
|
"print(p[0])\r\n", |
|
|
|
|
|
"print(p[1])" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "wbxjUSOXxN0n" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 2-7\r\n", |
|
|
|
|
|
"m, idx = torch.max(x,True)" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "iMwhGLlGWYaR" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 3. max(choose max) operators on two tensors (torch.max(input, other, *, out=None) → Tensor)\r\n", |
|
|
|
|
|
"t = torch.max(x,y)\r\n", |
|
|
|
|
|
"print(t)" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "nFxRKu2Dedwb" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"**2. Common errors**\r\n", |
|
|
|
|
|
"\r\n" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "KMcRyMxGwhul" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"The following code blocks show some common errors while using the torch library. First, execute the code with error, and then execute the next code block to fix the error. You need to change the runtime to GPU.\r\n" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "eX-kKdi6ynFf" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"import torch" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "-muJ4KKreoP2", |
|
|
|
|
|
"colab": { |
|
|
|
|
|
"base_uri": "https://localhost:8080/", |
|
|
|
|
|
"height": 363 |
|
|
|
|
|
}, |
|
|
|
|
|
"outputId": "c1d5c3a5-9540-4145-d80c-3cbca18a1deb" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 1. different device error\r\n", |
|
|
|
|
|
"model = torch.nn.Linear(5,1).to(\"cuda:0\")\r\n", |
|
|
|
|
|
"x = torch.Tensor([1,2,3,4,5]).to(\"cpu\")\r\n", |
|
|
|
|
|
"y = model(x)" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [ |
|
|
|
|
|
{ |
|
|
|
|
|
"output_type": "error", |
|
|
|
|
|
"ename": "RuntimeError", |
|
|
|
|
|
"evalue": "ignored", |
|
|
|
|
|
"traceback": [ |
|
|
|
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
|
|
|
|
|
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", |
|
|
|
|
|
"\u001b[0;32m<ipython-input-2-12e5b7d55705>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cuda:0\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", |
|
|
|
|
|
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 725\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 726\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 727\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 728\u001b[0m for hook in itertools.chain(\n\u001b[1;32m 729\u001b[0m \u001b[0m_global_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
|
|
|
|
|
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 92\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mextra_repr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
|
|
|
|
|
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mlinear\u001b[0;34m(input, weight, bias)\u001b[0m\n\u001b[1;32m 1690\u001b[0m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maddmm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1691\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1692\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmatmul\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1693\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mbias\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1694\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
|
|
|
|
|
"\u001b[0;31mRuntimeError\u001b[0m: Tensor for 'out' is on CPU, Tensor for argument #1 'self' is on CPU, but expected them to be on GPU (while checking arguments for addmm)" |
|
|
|
|
|
] |
|
|
|
|
|
} |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "a54PqxJLe9-c", |
|
|
|
|
|
"colab": { |
|
|
|
|
|
"base_uri": "https://localhost:8080/" |
|
|
|
|
|
}, |
|
|
|
|
|
"outputId": "909d3693-236f-4419-f269-8fb443ef7534" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 1. different device error (fixed)\r\n", |
|
|
|
|
|
"x = torch.Tensor([1,2,3,4,5]).to(\"cuda:0\")\r\n", |
|
|
|
|
|
"y = model(x)\r\n", |
|
|
|
|
|
"print(y.shape)" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [ |
|
|
|
|
|
{ |
|
|
|
|
|
"output_type": "stream", |
|
|
|
|
|
"text": [ |
|
|
|
|
|
"torch.Size([1])\n" |
|
|
|
|
|
], |
|
|
|
|
|
"name": "stdout" |
|
|
|
|
|
} |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "n7OHtZwbi7Qw", |
|
|
|
|
|
"colab": { |
|
|
|
|
|
"base_uri": "https://localhost:8080/", |
|
|
|
|
|
"height": 201 |
|
|
|
|
|
}, |
|
|
|
|
|
"outputId": "2a7d2dd0-6498-4da0-9591-3554c1739046" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 2. mismatched dimensions error\r\n", |
|
|
|
|
|
"x = torch.randn(4,5)\r\n", |
|
|
|
|
|
"y= torch.randn(5,4)\r\n", |
|
|
|
|
|
"z = x + y" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [ |
|
|
|
|
|
{ |
|
|
|
|
|
"output_type": "error", |
|
|
|
|
|
"ename": "RuntimeError", |
|
|
|
|
|
"evalue": "ignored", |
|
|
|
|
|
"traceback": [ |
|
|
|
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
|
|
|
|
|
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", |
|
|
|
|
|
"\u001b[0;32m<ipython-input-4-7fa8b244df3c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", |
|
|
|
|
|
"\u001b[0;31mRuntimeError\u001b[0m: The size of tensor a (5) must match the size of tensor b (4) at non-singleton dimension 1" |
|
|
|
|
|
] |
|
|
|
|
|
} |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "qVynzvrskFCD", |
|
|
|
|
|
"colab": { |
|
|
|
|
|
"base_uri": "https://localhost:8080/" |
|
|
|
|
|
}, |
|
|
|
|
|
"outputId": "926dc01c-be6f-48e1-ad39-a5bcecebc513" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 2. mismatched dimensions error (fixed)\r\n", |
|
|
|
|
|
"y= y.transpose(0,1)\r\n", |
|
|
|
|
|
"z = x + y\r\n", |
|
|
|
|
|
"print(z.shape)" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [ |
|
|
|
|
|
{ |
|
|
|
|
|
"output_type": "stream", |
|
|
|
|
|
"text": [ |
|
|
|
|
|
"torch.Size([4, 5])\n" |
|
|
|
|
|
], |
|
|
|
|
|
"name": "stdout" |
|
|
|
|
|
} |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "Hgzgb9gJANod", |
|
|
|
|
|
"colab": { |
|
|
|
|
|
"base_uri": "https://localhost:8080/", |
|
|
|
|
|
"height": 398 |
|
|
|
|
|
}, |
|
|
|
|
|
"outputId": "21b58850-b3f1-4f2a-db5d-cc45e47ccbea" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 3. cuda out of memory error\n", |
|
|
|
|
|
"import torch\n", |
|
|
|
|
|
"import torchvision.models as models\n", |
|
|
|
|
|
"resnet18 = models.resnet18().to(\"cuda:0\") # Neural Networks for Image Recognition\n", |
|
|
|
|
|
"data = torch.randn(2048,3,244,244) # Create fake data (512 images)\n", |
|
|
|
|
|
"out = resnet18(data.to(\"cuda:0\")) # Use Data as Input and Feed to Model\n", |
|
|
|
|
|
"print(out.shape)\n" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [ |
|
|
|
|
|
{ |
|
|
|
|
|
"output_type": "error", |
|
|
|
|
|
"ename": "RuntimeError", |
|
|
|
|
|
"evalue": "ignored", |
|
|
|
|
|
"traceback": [ |
|
|
|
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
|
|
|
|
|
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", |
|
|
|
|
|
"\u001b[0;32m<ipython-input-8-711923c7f347>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mresnet18\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodels\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresnet18\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cuda:0\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# Neural Networks for Image Recognition\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2048\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m244\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m244\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# Create fake data (512 images)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresnet18\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cuda:0\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# Use Data as Input and Feed to Model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
|
|
|
|
|
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 725\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 726\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 727\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 728\u001b[0m for hook in itertools.chain(\n\u001b[1;32m 729\u001b[0m \u001b[0m_global_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
|
|
|
|
|
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torchvision/models/resnet.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 220\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 221\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", |
|
|
|
|
|
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torchvision/models/resnet.py\u001b[0m in \u001b[0;36m_forward_impl\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 202\u001b[0m \u001b[0;31m# See note [TorchScript super()]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconv1\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 204\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbn1\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 205\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrelu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 206\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmaxpool\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
|
|
|
|
|
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 725\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 726\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 727\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 728\u001b[0m for hook in itertools.chain(\n\u001b[1;32m 729\u001b[0m \u001b[0m_global_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
|
|
|
|
|
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/batchnorm.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrunning_mean\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrack_running_stats\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 135\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrunning_var\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrack_running_stats\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 136\u001b[0;31m self.weight, self.bias, bn_training, exponential_average_factor, self.eps)\n\u001b[0m\u001b[1;32m 137\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", |
|
|
|
|
|
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mbatch_norm\u001b[0;34m(input, running_mean, running_var, weight, bias, training, momentum, eps)\u001b[0m\n\u001b[1;32m 2056\u001b[0m return torch.batch_norm(\n\u001b[1;32m 2057\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrunning_mean\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrunning_var\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2058\u001b[0;31m \u001b[0mtraining\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmomentum\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meps\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackends\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcudnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menabled\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2059\u001b[0m )\n\u001b[1;32m 2060\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", |
|
|
|
|
|
"\u001b[0;31mRuntimeError\u001b[0m: CUDA out of memory. Tried to allocate 7.27 GiB (GPU 0; 14.76 GiB total capacity; 8.74 GiB already allocated; 4.42 GiB free; 9.42 GiB reserved in total by PyTorch)" |
|
|
|
|
|
] |
|
|
|
|
|
} |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "VPksKnB_w343", |
|
|
|
|
|
"colab": { |
|
|
|
|
|
"base_uri": "https://localhost:8080/" |
|
|
|
|
|
}, |
|
|
|
|
|
"outputId": "fbee46ad-e63e-4bfc-8971-452895dd7a15" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 3. cuda out of memory error (fixed)\n", |
|
|
|
|
|
"for d in data:\n", |
|
|
|
|
|
" out = resnet18(d.to(\"cuda:0\").unsqueeze(0))\n", |
|
|
|
|
|
"print(out.shape)" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [ |
|
|
|
|
|
{ |
|
|
|
|
|
"output_type": "stream", |
|
|
|
|
|
"text": [ |
|
|
|
|
|
"torch.Size([1, 1000])\n" |
|
|
|
|
|
], |
|
|
|
|
|
"name": "stdout" |
|
|
|
|
|
} |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "vqszlxEE0Bk0", |
|
|
|
|
|
"colab": { |
|
|
|
|
|
"base_uri": "https://localhost:8080/", |
|
|
|
|
|
"height": 346 |
|
|
|
|
|
}, |
|
|
|
|
|
"outputId": "a698b34d-00a8-4067-ddc5-180cb4c8eeaa" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 4. mismatched tensor type\n", |
|
|
|
|
|
"import torch.nn as nn\n", |
|
|
|
|
|
"L = nn.CrossEntropyLoss()\n", |
|
|
|
|
|
"outs = torch.randn(5,5)\n", |
|
|
|
|
|
"labels = torch.Tensor([1,2,3,4,0])\n", |
|
|
|
|
|
"lossval = L(outs,labels) # Calculate CrossEntropyLoss between outs and labels" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [ |
|
|
|
|
|
{ |
|
|
|
|
|
"output_type": "error", |
|
|
|
|
|
"ename": "RuntimeError", |
|
|
|
|
|
"evalue": "ignored", |
|
|
|
|
|
"traceback": [ |
|
|
|
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
|
|
|
|
|
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", |
|
|
|
|
|
"\u001b[0;32m<ipython-input-10-60a5d1aad216>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mouts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mlossval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mL\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mouts\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# Calculate CrossEntropyLoss between outs and labels\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", |
|
|
|
|
|
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 725\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 726\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 727\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 728\u001b[0m for hook in itertools.chain(\n\u001b[1;32m 729\u001b[0m \u001b[0m_global_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
|
|
|
|
|
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/modules/loss.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input, target)\u001b[0m\n\u001b[1;32m 960\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 961\u001b[0m return F.cross_entropy(input, target, weight=self.weight,\n\u001b[0;32m--> 962\u001b[0;31m ignore_index=self.ignore_index, reduction=self.reduction)\n\u001b[0m\u001b[1;32m 963\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 964\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", |
|
|
|
|
|
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mcross_entropy\u001b[0;34m(input, target, weight, size_average, ignore_index, reduce, reduction)\u001b[0m\n\u001b[1;32m 2466\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msize_average\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mreduce\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2467\u001b[0m \u001b[0mreduction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_Reduction\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlegacy_get_string\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msize_average\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreduce\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2468\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mnll_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlog_softmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreduction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2469\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2470\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", |
|
|
|
|
|
"\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mnll_loss\u001b[0;34m(input, target, weight, size_average, ignore_index, reduce, reduction)\u001b[0m\n\u001b[1;32m 2262\u001b[0m .format(input.size(0), target.size(0)))\n\u001b[1;32m 2263\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdim\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2264\u001b[0;31m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_nn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnll_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_Reduction\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_enum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreduction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_index\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2265\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mdim\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2266\u001b[0m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_nn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnll_loss2d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_Reduction\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_enum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreduction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_index\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
|
|
|
|
|
"\u001b[0;31mRuntimeError\u001b[0m: expected scalar type Long but found Float" |
|
|
|
|
|
] |
|
|
|
|
|
} |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "CZwgwup_1dgS", |
|
|
|
|
|
"colab": { |
|
|
|
|
|
"base_uri": "https://localhost:8080/" |
|
|
|
|
|
}, |
|
|
|
|
|
"outputId": "aaf1de76-7ef2-4ca4-b87d-8482a3117249" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"# 4. mismatched tensor type (fixed)\n", |
|
|
|
|
|
"labels = labels.long()\n", |
|
|
|
|
|
"lossval = L(outs,labels)\n", |
|
|
|
|
|
"print(lossval)" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [ |
|
|
|
|
|
{ |
|
|
|
|
|
"output_type": "stream", |
|
|
|
|
|
"text": [ |
|
|
|
|
|
"tensor(2.6215)\n" |
|
|
|
|
|
], |
|
|
|
|
|
"name": "stdout" |
|
|
|
|
|
} |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "dSuNdA8F06dK" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"**3. More on dataset and dataloader**\r\n" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "in84z_xu1rE6" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"A dataset is a cluster of data in a organized way. A dataloader is a loader which can iterate through the data set." |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "34zfh-c22Qqs" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"Let a dataset be the English alphabets \"abcdefghijklmnopqrstuvwxyz\"" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "TaiHofty1qKA" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"dataset = \"abcdefghijklmnopqrstuvwxyz\"" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "h0jwhVa12h3a" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"A simple dataloader could be implemented with the python code \"for\"" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "bWC5Wwbv2egy" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"for datapoint in dataset:\r\n", |
|
|
|
|
|
" print(datapoint)" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "n33VKzkG2y2U" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"When using the dataloader, we often like to shuffle the data. This is where torch.utils.data.DataLoader comes in handy. If each data is an index (0,1,2...) from the view of torch.utils.data.DataLoader, shuffling can simply be done by shuffling an index array. \r\n", |
|
|
|
|
|
"\r\n" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "9MXUUKQ65APf" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"torch.utils.data.DataLoader will need two imformation to fulfill its role. First, it needs to know the length of the data. Second, once torch.utils.data.DataLoader outputs the index of the shuffling results, the dataset needs to return the corresponding data." |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "BV5txsjK5j4j" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"Therefore, torch.utils.data.Dataset provides the imformation by two functions, `__len__()` and `__getitem__()` to support torch.utils.data.Dataloader" |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "A0IEkemJ5ajD" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"import torch\r\n", |
|
|
|
|
|
"import torch.utils.data \r\n", |
|
|
|
|
|
"class ExampleDataset(torch.utils.data.Dataset):\r\n", |
|
|
|
|
|
" def __init__(self):\r\n", |
|
|
|
|
|
" self.data = \"abcdefghijklmnopqrstuvwxyz\"\r\n", |
|
|
|
|
|
" \r\n", |
|
|
|
|
|
" def __getitem__(self,idx): # if the index is idx, what will be the data?\r\n", |
|
|
|
|
|
" return self.data[idx]\r\n", |
|
|
|
|
|
" \r\n", |
|
|
|
|
|
" def __len__(self): # What is the length of the dataset\r\n", |
|
|
|
|
|
" return len(self.data)\r\n", |
|
|
|
|
|
"\r\n", |
|
|
|
|
|
"dataset1 = ExampleDataset() # create the dataset\r\n", |
|
|
|
|
|
"dataloader = torch.utils.data.DataLoader(dataset = dataset1,shuffle = True,batch_size = 1)\r\n", |
|
|
|
|
|
"for datapoint in dataloader:\r\n", |
|
|
|
|
|
" print(datapoint)" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "markdown", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "nTt-ZTid9S2n" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"A simple data augmentation technique can be done by changing the code in `__len__()` and `__getitem__()`. Suppose we want to double the length of the dataset by adding in the uppercase letters, using only the lowercase dataset, you can change the dataset to the following." |
|
|
|
|
|
] |
|
|
|
|
|
}, |
|
|
|
|
|
{ |
|
|
|
|
|
"cell_type": "code", |
|
|
|
|
|
"metadata": { |
|
|
|
|
|
"id": "7Wn3BA2j-NXl" |
|
|
|
|
|
}, |
|
|
|
|
|
"source": [ |
|
|
|
|
|
"import torch.utils.data \r\n", |
|
|
|
|
|
"class ExampleDataset(torch.utils.data.Dataset):\r\n", |
|
|
|
|
|
" def __init__(self):\r\n", |
|
|
|
|
|
" self.data = \"abcdefghijklmnopqrstuvwxyz\"\r\n", |
|
|
|
|
|
" \r\n", |
|
|
|
|
|
" def __getitem__(self,idx): # if the index is idx, what will be the data?\r\n", |
|
|
|
|
|
" if idx >= len(self.data): # if the index >= 26, return upper case letter\r\n", |
|
|
|
|
|
" return self.data[idx%26].upper()\r\n", |
|
|
|
|
|
" else: # if the index < 26, return lower case, return lower case letter\r\n", |
|
|
|
|
|
" return self.data[idx]\r\n", |
|
|
|
|
|
" \r\n", |
|
|
|
|
|
" def __len__(self): # What is the length of the dataset\r\n", |
|
|
|
|
|
" return 2 * len(self.data) # The length is now twice as large\r\n", |
|
|
|
|
|
"\r\n", |
|
|
|
|
|
"dataset1 = ExampleDataset() # create the dataset\r\n", |
|
|
|
|
|
"dataloader = torch.utils.data.DataLoader(dataset = dataset1,shuffle = True,batch_size = 1)\r\n", |
|
|
|
|
|
"for datapoint in dataloader:\r\n", |
|
|
|
|
|
" print(datapoint)" |
|
|
|
|
|
], |
|
|
|
|
|
"execution_count": null, |
|
|
|
|
|
"outputs": [] |
|
|
|
|
|
} |
|
|
|
|
|
] |
|
|
|
|
|
} |