Browse Source

上传了文档文件和若干 TODO

tags/v0.4.10
ChenXin 5 years ago
parent
commit
584f3a615f
4 changed files with 1626 additions and 107 deletions
  1. +3
    -0
      docs/source/user/task1.rst
  2. +51
    -62
      tutorials/fastnlp_10min_tutorial.ipynb
  3. +1571
    -44
      tutorials/fastnlp_1min_tutorial.ipynb
  4. +1
    -1
      tutorials/fastnlp_test_tutorial.ipynb

+ 3
- 0
docs/source/user/task1.rst View File

@@ -0,0 +1,3 @@
=====================
用 fastNLP 分类
=====================

+ 51
- 62
tutorials/fastnlp_10min_tutorial.ipynb View File

@@ -41,7 +41,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 1,
"metadata": {},
"outputs": [
{
@@ -63,7 +63,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 2,
"metadata": {},
"outputs": [
{
@@ -97,7 +97,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -107,7 +107,7 @@
"'label': 0 type=str}"
]
},
"execution_count": 8,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -128,7 +128,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -148,7 +148,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -168,7 +168,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -191,7 +191,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -221,7 +221,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -249,7 +249,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@@ -263,7 +263,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 10,
"metadata": {},
"outputs": [
{
@@ -295,17 +295,17 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'raw_sentence': a welcome relief from baseball movies that try too hard to be mythic , this one is a sweet and modest and ultimately winning story . type=str,\n",
"'label': 3 type=int,\n",
"'words': [4, 1, 1, 18, 1, 1, 13, 1, 1, 1, 8, 26, 1, 5, 35, 1, 11, 4, 1, 10, 1, 10, 1, 1, 1, 2] type=list,\n",
"'seq_len': 26 type=int}\n"
"{'raw_sentence': the performances are an absolute joy . type=str,\n",
"'label': 4 type=int,\n",
"'words': [3, 1, 1, 26, 1, 1, 2] type=list,\n",
"'seq_len': 7 type=int}\n"
]
}
],
@@ -327,9 +327,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 12,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"batch_x has: {'words': tensor([[ 15, 72, 15, 73, 74, 7, 3, 75, 6, 3, 16, 16,\n",
" 76, 2],\n",
" [ 15, 72, 15, 73, 74, 7, 3, 75, 6, 3, 16, 16,\n",
" 76, 2]])}\n",
"batch_y has: {'label': tensor([ 1, 1])}\n"
]
}
],
"source": [
"# 如果你们需要做强化学习或者GAN之类的项目,你们也可以使用这些数据预处理的工具\n",
"from fastNLP.core.batch import Batch\n",
@@ -352,7 +364,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 15,
"metadata": {},
"outputs": [
{
@@ -360,7 +372,7 @@
"text/plain": [
"CNNText(\n",
" (embed): Embedding(\n",
" (embed): Embedding(59, 50, padding_idx=0)\n",
" 77, 50\n",
" (dropout): Dropout(p=0.0)\n",
" )\n",
" (conv_pool): ConvMaxpool(\n",
@@ -377,14 +389,14 @@
")"
]
},
"execution_count": 17,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from fastNLP.models import CNNText\n",
"model = CNNText(embed_num=len(vocab), embed_dim=50, num_classes=5, padding=2, dropout=0.1)\n",
"model = CNNText((len(vocab), 50), num_classes=5, padding=2, dropout=0.1)\n",
"model"
]
},
@@ -448,7 +460,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
@@ -485,7 +497,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@@ -508,7 +520,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
@@ -517,7 +529,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -525,48 +537,25 @@
"output_type": "stream",
"text": [
"input fields after batch(if batch size is 2):\n",
"\tword_seq: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 26]) \n",
"\tword_seq: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 11]) \n",
"target fields after batch(if batch size is 2):\n",
"\tlabel_seq: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
"\n",
"training epochs started 2019-01-12 17-07-51\n"
"\n"
]
},
{
"data": {
"text/plain": [
"HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=10), HTML(value='')), layout=Layout(display='…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Evaluation at Epoch 1/5. Step:2/10. AccuracyMetric: acc=0.425926\n",
"Evaluation at Epoch 2/5. Step:4/10. AccuracyMetric: acc=0.425926\n",
"Evaluation at Epoch 3/5. Step:6/10. AccuracyMetric: acc=0.611111\n",
"Evaluation at Epoch 4/5. Step:8/10. AccuracyMetric: acc=0.648148\n",
"Evaluation at Epoch 5/5. Step:10/10. AccuracyMetric: acc=0.703704\n",
"\n",
"In Epoch:5/Step:10, got best dev performance:AccuracyMetric: acc=0.703704\n",
"Reloaded the best model.\n"
"ename": "NameError",
"evalue": "\nProblems occurred when calling CNNText.forward(self, words, seq_len=None)\n\tmissing param: ['words']\n\tunused field: ['word_seq']\n\tSuggestion: You need to provide ['words'] in DataSet and set it as input. ",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-19-ff7d68caf88a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0msave_path\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m32\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m n_epochs=5)\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0moverfit_trainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/fdujyn/anaconda3/lib/python3.6/site-packages/fastNLP/core/trainer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, train_data, model, optimizer, loss, batch_size, sampler, update_every, n_epochs, print_every, dev_data, metrics, metric_key, validate_every, save_path, prefetch, use_tqdm, device, callbacks, check_code_level)\u001b[0m\n\u001b[1;32m 447\u001b[0m _check_code(dataset=train_data, model=model, losser=losser, metrics=metrics, dev_data=dev_data,\n\u001b[1;32m 448\u001b[0m \u001b[0mmetric_key\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmetric_key\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcheck_level\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcheck_code_level\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 449\u001b[0;31m batch_size=min(batch_size, DEFAULT_CHECK_BATCH_SIZE))\n\u001b[0m\u001b[1;32m 450\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 451\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain_data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/fdujyn/anaconda3/lib/python3.6/site-packages/fastNLP/core/trainer.py\u001b[0m in \u001b[0;36m_check_code\u001b[0;34m(dataset, model, losser, metrics, batch_size, dev_data, metric_key, check_level)\u001b[0m\n\u001b[1;32m 808\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minfo_str\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 809\u001b[0m _check_forward_error(forward_func=model.forward, dataset=dataset,\n\u001b[0;32m--> 810\u001b[0;31m batch_x=batch_x, check_level=check_level)\n\u001b[0m\u001b[1;32m 811\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 812\u001b[0m \u001b[0mrefined_batch_x\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_build_args\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mbatch_x\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/fdujyn/anaconda3/lib/python3.6/site-packages/fastNLP/core/utils.py\u001b[0m in \u001b[0;36m_check_forward_error\u001b[0;34m(forward_func, batch_x, dataset, check_level)\u001b[0m\n\u001b[1;32m 594\u001b[0m \u001b[0msugg_str\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0msuggestions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 595\u001b[0m \u001b[0merr_str\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'\\n'\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'\\n'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merrs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'\\n\\tSuggestion: '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0msugg_str\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 596\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mNameError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr_str\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 597\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_unused\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 598\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcheck_level\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mWARNING_CHECK_LEVEL\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: \nProblems occurred when calling CNNText.forward(self, words, seq_len=None)\n\tmissing param: ['words']\n\tunused field: ['word_seq']\n\tSuggestion: You need to provide ['words'] in DataSet and set it as input. "
]
},
{
"data": {
"text/plain": [
"{'best_eval': {'AccuracyMetric': {'acc': 0.703704}},\n",
" 'best_epoch': 5,\n",
" 'best_step': 10,\n",
" 'seconds': 0.62}"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [


+ 1571
- 44
tutorials/fastnlp_1min_tutorial.ipynb
File diff suppressed because it is too large
View File


+ 1
- 1
tutorials/fastnlp_test_tutorial.ipynb View File

@@ -89,7 +89,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
"version": "3.6.7"
}
},
"nbformat": 4,


Loading…
Cancel
Save