diff --git a/fastNLP/core/callback.py b/fastNLP/core/callback.py index c944ec96..f337975a 100644 --- a/fastNLP/core/callback.py +++ b/fastNLP/core/callback.py @@ -584,7 +584,9 @@ class TensorboardCallback(Callback): path = os.path.join(save_dir, 'tensorboard_logs_{}'.format(self.trainer.start_time)) if tensorboardX_flag: self._summary_writer = SummaryWriter(path) - + else: + self._summary_writer = None + def on_batch_begin(self, batch_x, batch_y, indices): if "model" in self.options and self.graph_added is False: # tesorboardX 这里有大bug,暂时没法画模型图 @@ -596,10 +598,10 @@ class TensorboardCallback(Callback): self.graph_added = True def on_backward_begin(self, loss): - if "loss" in self.options: + if "loss" in self.options and self._summary_writer: self._summary_writer.add_scalar("loss", loss.item(), global_step=self.trainer.step) - if "model" in self.options: + if "model" in self.options and self._summary_writer: for name, param in self.trainer.model.named_parameters(): if param.requires_grad: self._summary_writer.add_scalar(name + "_mean", param.mean(), global_step=self.trainer.step) @@ -608,15 +610,16 @@ class TensorboardCallback(Callback): global_step=self.trainer.step) def on_valid_end(self, eval_result, metric_key, optimizer, is_better_eval): - if "metric" in self.options: + if "metric" in self.options and self._summary_writer: for name, metric in eval_result.items(): for metric_key, metric_val in metric.items(): self._summary_writer.add_scalar("valid_{}_{}".format(name, metric_key), metric_val, global_step=self.trainer.step) def on_train_end(self): - self._summary_writer.close() - del self._summary_writer + if self._summary_writer: + self._summary_writer.close() + del self._summary_writer def on_exception(self, exception): if hasattr(self, "_summary_writer"): diff --git a/test/core/test_dataset.py b/test/core/test_dataset.py index 69548e73..0228f207 100644 --- a/test/core/test_dataset.py +++ b/test/core/test_dataset.py @@ -172,7 +172,7 @@ class TestDataSetMethods(unittest.TestCase): def split_sent(ins): return ins['raw_sentence'].split() csv_loader = CSVLoader(headers=['raw_sentence', 'label'],sep='\t') - dataset = csv_loader.load('../data_for_tests/tutorial_sample_dataset.csv') + dataset = csv_loader.load('test/data_for_tests/tutorial_sample_dataset.csv') dataset.drop(lambda x: len(x['raw_sentence'].split()) == 0, inplace=True) dataset.apply(split_sent, new_field_name='words', is_input=True) # print(dataset) diff --git a/test/test_tutorials.py b/test/test_tutorials.py index 4b1889d4..255b391e 100644 --- a/test/test_tutorials.py +++ b/test/test_tutorials.py @@ -10,7 +10,7 @@ from fastNLP.core.metrics import AccuracyMetric class TestTutorial(unittest.TestCase): def test_fastnlp_10min_tutorial(self): # 从csv读取数据到DataSet - sample_path = "data_for_tests/tutorial_sample_dataset.csv" + sample_path = "test/data_for_tests/tutorial_sample_dataset.csv" dataset = DataSet.read_csv(sample_path, headers=('raw_sentence', 'label'), sep='\t') print(len(dataset)) @@ -113,14 +113,14 @@ class TestTutorial(unittest.TestCase): def test_fastnlp_1min_tutorial(self): # tutorials/fastnlp_1min_tutorial.ipynb - data_path = "tutorials/sample_data/tutorial_sample_dataset.csv" + data_path = "test/data_for_tests/tutorial_sample_dataset.csv" ds = DataSet.read_csv(data_path, headers=('raw_sentence', 'label'), sep='\t') print(ds[1]) # 将所有数字转为小写 ds.apply(lambda x: x['raw_sentence'].lower(), new_field_name='raw_sentence') # label转int - ds.apply(lambda x: int(x['label']), new_field_name='label_seq', is_target=True) + ds.apply(lambda x: int(x['label']), new_field_name='target', is_target=True) def split_sent(ins): return ins['raw_sentence'].split() @@ -137,9 +137,9 @@ class TestTutorial(unittest.TestCase): train_data.apply(lambda x: [vocab.add(word) for word in x['words']]) # index句子, Vocabulary.to_index(word) - train_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='word_seq', + train_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='words', is_input=True) - dev_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='word_seq', + dev_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='words', is_input=True) from fastNLP.models import CNNText @@ -152,14 +152,14 @@ class TestTutorial(unittest.TestCase): dev_data=dev_data, loss=CrossEntropyLoss(), optimizer= Adam(), - metrics=AccuracyMetric(target='label_seq') + metrics=AccuracyMetric(target='target') ) trainer.train() print('Train finished!') def test_fastnlp_advanced_tutorial(self): import os - os.chdir("tutorials/fastnlp_advanced_tutorial") + os.chdir("test/tutorials/fastnlp_advanced_tutorial") from fastNLP import DataSet from fastNLP import Instance diff --git a/tutorials/fastnlp_advanced_tutorial/advance_tutorial.ipynb b/tutorials/fastnlp_advanced_tutorial/advance_tutorial.ipynb index 64eb3462..7e487933 100644 --- a/tutorials/fastnlp_advanced_tutorial/advance_tutorial.ipynb +++ b/tutorials/fastnlp_advanced_tutorial/advance_tutorial.ipynb @@ -170,11 +170,11 @@ { "data": { "text/plain": [ - "DataSet({'image': tensor([[ 4.7106e-01, -1.2246e+00, 3.1234e-01, -1.6781e+00, -8.7967e-01],\n", - " [ 1.1454e+00, 1.2236e-01, 3.0258e-01, -1.5454e+00, 8.9201e-01],\n", - " [-5.7143e-03, 3.9488e-01, 2.0287e-01, -1.5726e+00, 9.3171e-01],\n", - " [ 6.8914e-01, -2.6302e-01, -8.2694e-01, 9.5942e-01, -5.2589e-01],\n", - " [-5.7798e-03, -9.1621e-03, 1.0077e-03, 9.1716e-02, 1.0565e+00]]) type=torch.Tensor,\n", + "DataSet({'image': tensor([[ 0.3582, -1.0358, 1.4785, -1.5288, -0.9982],\n", + " [-0.3973, -0.4294, 0.9215, -1.9631, -1.6556],\n", + " [ 0.3313, -1.7714, 0.8729, 0.6976, -1.3172],\n", + " [-0.6403, 0.5023, -0.9919, 1.1178, -0.3710],\n", + " [-0.3692, 1.8631, -1.3646, -0.7290, -1.0774]]) type=torch.Tensor,\n", "'label': 0 type=int})" ] }, @@ -524,7 +524,11 @@ "outputs": [], "source": [ "# 设定特征域、标签域\n", - "data_set.set_input(\"premise\", \"premise_len\", \"hypothesis\", \"hypothesis_len\")\n", + "data_set.rename_field(\"premise\",\"words1\")\n", + "data_set.rename_field(\"premise_len\",\"seq_len1\")\n", + "data_set.rename_field(\"hypothesis\",\"words2\")\n", + "data_set.rename_field(\"hypothesis_len\",\"seq_len2\")\n", + "data_set.set_input(\"words1\", \"seq_len1\", \"words2\", \"seq_len2\")\n", "data_set.set_target(\"truth\")" ] }, @@ -536,10 +540,10 @@ { "data": { "text/plain": [ - "{'premise': ['a', 'woman', 'is', 'walking', 'across', 'the', 'street', 'eating', 'a', 'banana', ',', 'while', 'a', 'man', 'is', 'following', 'with', 'his', 'briefcase', '.'] type=list,\n", - "'hypothesis': ['a', 'woman', 'eating', 'a', 'banana', 'crosses', 'a', 'street'] type=list,\n", - "'premise_len': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", - "'hypothesis_len': [1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", + "{'words1': ['a', 'woman', 'is', 'walking', 'across', 'the', 'street', 'eating', 'a', 'banana', ',', 'while', 'a', 'man', 'is', 'following', 'with', 'his', 'briefcase', '.'] type=list,\n", + "'seq_len1': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", + "'words2': ['a', 'woman', 'eating', 'a', 'banana', 'crosses', 'a', 'street'] type=list,\n", + "'seq_len2': [1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", "'label': 0 type=int}" ] }, @@ -613,7 +617,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -622,49 +626,49 @@ "vocab = Vocabulary(max_size=10000, min_freq=2, unknown='', padding='')\n", "\n", "# 构建词表\n", - "train_data.apply(lambda x: [vocab.add(word) for word in x['premise']])\n", - "train_data.apply(lambda x: [vocab.add(word) for word in x['hypothesis']])\n", + "train_data.apply(lambda x: [vocab.add(word) for word in x['words1']])\n", + "train_data.apply(lambda x: [vocab.add(word) for word in x['words2']])\n", "vocab.build_vocab()" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "({'premise': [2, 10, 9, 2, 15, 115, 6, 11, 5, 132, 17, 2, 76, 9, 77, 55, 3] type=list,\n", - " 'hypothesis': [1, 2, 56, 17, 1, 4, 13, 49, 123, 12, 6, 11, 3] type=list,\n", - " 'premise_len': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", - " 'hypothesis_len': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", - " 'label': 0 type=int},\n", - " {'premise': [50, 124, 10, 7, 68, 91, 92, 38, 2, 55, 3] type=list,\n", - " 'hypothesis': [21, 10, 5, 2, 55, 7, 99, 64, 48, 1, 22, 1, 3] type=list,\n", - " 'premise_len': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", - " 'hypothesis_len': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", + "({'words1': [2, 9, 4, 2, 75, 85, 7, 86, 76, 77, 87, 88, 89, 2, 90, 3] type=list,\n", + " 'seq_len1': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", + " 'words2': [18, 9, 10, 1, 3] type=list,\n", + " 'seq_len2': [1, 1, 1, 1, 1] type=list,\n", " 'label': 1 type=int},\n", - " {'premise': [13, 24, 4, 14, 29, 5, 25, 4, 8, 39, 9, 14, 34, 4, 40, 41, 4, 16, 12, 2, 11, 4, 30, 28, 2, 42, 8, 2, 43, 44, 17, 2, 45, 35, 26, 31, 27, 5, 6, 32, 3] type=list,\n", - " 'hypothesis': [37, 49, 123, 30, 28, 2, 55, 12, 2, 11, 3] type=list,\n", - " 'premise_len': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", - " 'hypothesis_len': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", - " 'label': 0 type=int})" + " {'words1': [22, 32, 5, 110, 81, 111, 112, 5, 82, 3] type=list,\n", + " 'seq_len1': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", + " 'words2': [64, 32, 82, 133, 84, 3] type=list,\n", + " 'seq_len2': [1, 1, 1, 1, 1, 1] type=list,\n", + " 'label': 0 type=int},\n", + " {'words1': [2, 9, 97, 1, 20, 7, 54, 5, 1, 1, 70, 2, 11, 110, 2, 62, 3] type=list,\n", + " 'seq_len1': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", + " 'words2': [23, 1, 58, 10, 12, 1, 70, 133, 84, 3] type=list,\n", + " 'seq_len2': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", + " 'label': 1 type=int})" ] }, - "execution_count": 23, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 根据词表index句子\n", - "train_data.apply(lambda x: [vocab.to_index(word) for word in x['premise']], new_field_name='premise')\n", - "train_data.apply(lambda x: [vocab.to_index(word) for word in x['hypothesis']], new_field_name='hypothesis')\n", - "dev_data.apply(lambda x: [vocab.to_index(word) for word in x['premise']], new_field_name='premise')\n", - "dev_data.apply(lambda x: [vocab.to_index(word) for word in x['hypothesis']], new_field_name='hypothesis')\n", - "test_data.apply(lambda x: [vocab.to_index(word) for word in x['premise']], new_field_name='premise')\n", - "test_data.apply(lambda x: [vocab.to_index(word) for word in x['hypothesis']], new_field_name='hypothesis')\n", + "train_data.apply(lambda x: [vocab.to_index(word) for word in x['words1']], new_field_name='words1')\n", + "train_data.apply(lambda x: [vocab.to_index(word) for word in x['words2']], new_field_name='words2')\n", + "dev_data.apply(lambda x: [vocab.to_index(word) for word in x['words1']], new_field_name='words1')\n", + "dev_data.apply(lambda x: [vocab.to_index(word) for word in x['words2']], new_field_name='words2')\n", + "test_data.apply(lambda x: [vocab.to_index(word) for word in x['words1']], new_field_name='words1')\n", + "test_data.apply(lambda x: [vocab.to_index(word) for word in x['words2']], new_field_name='words2')\n", "train_data[-1], dev_data[-1], test_data[-1]" ] }, @@ -679,7 +683,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -703,35 +707,35 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "({'premise': [1037, 2158, 1998, 1037, 2450, 2892, 1996, 2395, 1999, 2392, 1997, 1037, 10733, 1998, 100, 4825, 1012] type=list,\n", - " 'hypothesis': [100, 1037, 3232, 1997, 7884, 1010, 2048, 2111, 3328, 2408, 1996, 2395, 1012] type=list,\n", - " 'premise_len': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", - " 'hypothesis_len': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", - " 'label': 0 type=int},\n", - " {'premise': [2019, 3080, 2158, 2003, 5948, 4589, 10869, 2012, 1037, 4825, 1012] type=list,\n", - " 'hypothesis': [100, 2158, 1999, 1037, 4825, 2003, 3403, 2005, 2010, 7954, 2000, 7180, 1012] type=list,\n", - " 'premise_len': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", - " 'hypothesis_len': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", - " 'label': 1 type=int})" + "({'words1': [1037, 2450, 1999, 1037, 2665, 6598, 1998, 7415, 2058, 2014, 2132, 2559, 2875, 1037, 3028, 1012] type=list,\n", + " 'seq_len1': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", + " 'words2': [100, 2450, 2003, 3147, 1012] type=list,\n", + " 'seq_len2': [1, 1, 1, 1, 1] type=list,\n", + " 'label': 1 type=int},\n", + " {'words1': [2048, 2308, 1010, 3173, 2833, 100, 16143, 1010, 8549, 1012] type=list,\n", + " 'seq_len1': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] type=list,\n", + " 'words2': [100, 2308, 8549, 2169, 2060, 1012] type=list,\n", + " 'seq_len2': [1, 1, 1, 1, 1, 1] type=list,\n", + " 'label': 0 type=int})" ] }, - "execution_count": 25, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 根据词表index句子\n", - "train_data_2.apply(lambda x: [vocab_bert.to_index(word) for word in x['premise']], new_field_name='premise')\n", - "train_data_2.apply(lambda x: [vocab_bert.to_index(word) for word in x['hypothesis']], new_field_name='hypothesis')\n", - "dev_data_2.apply(lambda x: [vocab_bert.to_index(word) for word in x['premise']], new_field_name='premise')\n", - "dev_data_2.apply(lambda x: [vocab_bert.to_index(word) for word in x['hypothesis']], new_field_name='hypothesis')\n", + "train_data_2.apply(lambda x: [vocab_bert.to_index(word) for word in x['words1']], new_field_name='words1')\n", + "train_data_2.apply(lambda x: [vocab_bert.to_index(word) for word in x['words2']], new_field_name='words2')\n", + "dev_data_2.apply(lambda x: [vocab_bert.to_index(word) for word in x['words1']], new_field_name='words1')\n", + "dev_data_2.apply(lambda x: [vocab_bert.to_index(word) for word in x['words2']], new_field_name='words2')\n", "train_data_2[-1], dev_data_2[-1]" ] }, @@ -747,7 +751,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -760,10 +764,10 @@ " 'num_classes': 3,\n", " 'gpu': True,\n", " 'batch_size': 32,\n", - " 'vocab_size': 156}" + " 'vocab_size': 143}" ] }, - "execution_count": 26, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -779,7 +783,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -788,21 +792,17 @@ "ESIM(\n", " (drop): Dropout(p=0.3)\n", " (embedding): Embedding(\n", - " (embed): Embedding(156, 300, padding_idx=0)\n", + " 143, 300\n", " (dropout): Dropout(p=0.3)\n", " )\n", - " (embedding_layer): Linear(\n", - " (linear): Linear(in_features=300, out_features=300, bias=True)\n", - " )\n", + " (embedding_layer): Linear(in_features=300, out_features=300, bias=True)\n", " (encoder): LSTM(\n", " (lstm): LSTM(300, 300, batch_first=True, bidirectional=True)\n", " )\n", - " (bi_attention): Bi_Attention()\n", + " (bi_attention): BiAttention()\n", " (mean_pooling): MeanPoolWithMask()\n", " (max_pooling): MaxPoolWithMask()\n", - " (inference_layer): Linear(\n", - " (linear): Linear(in_features=1200, out_features=300, bias=True)\n", - " )\n", + " (inference_layer): Linear(in_features=1200, out_features=300, bias=True)\n", " (decoder): LSTM(\n", " (lstm): LSTM(300, 300, batch_first=True, bidirectional=True)\n", " )\n", @@ -816,7 +816,7 @@ ")" ] }, - "execution_count": 27, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -824,49 +824,10 @@ "source": [ "# step 2:加载ESIM模型\n", "from fastNLP.models import ESIM\n", - "model = ESIM(**args.data)\n", + "model = ESIM(args[\"vocab_size\"], args[\"embed_dim\"], args[\"hidden_size\"], args[\"dropout\"], args[\"num_classes\"])\n", "model" ] }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "CNNText(\n", - " (embed): Embedding(\n", - " (embed): Embedding(156, 50, padding_idx=0)\n", - " (dropout): Dropout(p=0.0)\n", - " )\n", - " (conv_pool): ConvMaxpool(\n", - " (convs): ModuleList(\n", - " (0): Conv1d(50, 3, kernel_size=(3,), stride=(1,), padding=(2,))\n", - " (1): Conv1d(50, 4, kernel_size=(4,), stride=(1,), padding=(2,))\n", - " (2): Conv1d(50, 5, kernel_size=(5,), stride=(1,), padding=(2,))\n", - " )\n", - " )\n", - " (dropout): Dropout(p=0.1)\n", - " (fc): Linear(\n", - " (linear): Linear(in_features=12, out_features=5, bias=True)\n", - " )\n", - ")" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 另一个例子:加载CNN文本分类模型\n", - "from fastNLP.models import CNNText\n", - "cnn_text_model = CNNText(embed_num=len(vocab), embed_dim=50, num_classes=5, padding=2, dropout=0.1)\n", - "cnn_text_model" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -1009,54 +970,25 @@ "name": "stdout", "output_type": "stream", "text": [ - "training epochs started 2019-04-14-23-22-28\n", - "[epoch: 1 step: 1] train loss: 1.51372 time: 0:00:00\n", - "[epoch: 1 step: 2] train loss: 1.26874 time: 0:00:00\n", - "[epoch: 1 step: 3] train loss: 1.49786 time: 0:00:00\n", - "[epoch: 1 step: 4] train loss: 1.37505 time: 0:00:00\n", - "Evaluation at Epoch 1/5. Step:4/20. AccuracyMetric: acc=0.344828\n", - "\n", - "[epoch: 2 step: 5] train loss: 1.21877 time: 0:00:00\n", - "[epoch: 2 step: 6] train loss: 1.14183 time: 0:00:00\n", - "[epoch: 2 step: 7] train loss: 1.15934 time: 0:00:00\n", - "[epoch: 2 step: 8] train loss: 1.55148 time: 0:00:00\n", - "Evaluation at Epoch 2/5. Step:8/20. AccuracyMetric: acc=0.344828\n", - "\n", - "[epoch: 3 step: 9] train loss: 1.1457 time: 0:00:00\n", - "[epoch: 3 step: 10] train loss: 1.0547 time: 0:00:00\n", - "[epoch: 3 step: 11] train loss: 1.40139 time: 0:00:00\n", - "[epoch: 3 step: 12] train loss: 0.551445 time: 0:00:00\n", - "Evaluation at Epoch 3/5. Step:12/20. AccuracyMetric: acc=0.275862\n", - "\n", - "[epoch: 4 step: 13] train loss: 1.07965 time: 0:00:00\n", - "[epoch: 4 step: 14] train loss: 1.04118 time: 0:00:00\n", - "[epoch: 4 step: 15] train loss: 1.11719 time: 0:00:00\n", - "[epoch: 4 step: 16] train loss: 1.09861 time: 0:00:00\n", - "Evaluation at Epoch 4/5. Step:16/20. AccuracyMetric: acc=0.275862\n", - "\n", - "[epoch: 5 step: 17] train loss: 1.10795 time: 0:00:00\n", - "[epoch: 5 step: 18] train loss: 1.26715 time: 0:00:00\n", - "[epoch: 5 step: 19] train loss: 1.19875 time: 0:00:00\n", - "[epoch: 5 step: 20] train loss: 1.09862 time: 0:00:00\n", - "Evaluation at Epoch 5/5. Step:20/20. AccuracyMetric: acc=0.37931\n", - "\n", - "\n", - "In Epoch:5/Step:20, got best dev performance:AccuracyMetric: acc=0.37931\n", - "Reloaded the best model.\n" + "training epochs started 2019-05-14-19-49-25\n" ] }, { - "data": { - "text/plain": [ - "{'best_eval': {'AccuracyMetric': {'acc': 0.37931}},\n", - " 'best_epoch': 5,\n", - " 'best_step': 20,\n", - " 'seconds': 0.5}" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" + "ename": "AssertionError", + "evalue": "seq_len can only have one dimension, got False.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0muse_tqdm\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m )\n\u001b[0;32m---> 19\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/Users/fdujyn/anaconda3/lib/python3.6/site-packages/fastNLP/core/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, load_best_model)\u001b[0m\n\u001b[1;32m 522\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallback_manager\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_train_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 524\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 525\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallback_manager\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_train_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 526\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mCallbackException\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/fdujyn/anaconda3/lib/python3.6/site-packages/fastNLP/core/trainer.py\u001b[0m in \u001b[0;36m_train\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 573\u001b[0m \u001b[0;31m# negative sampling; replace unknown; re-weight batch_y\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 574\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallback_manager\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_batch_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_x\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_y\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindices\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 575\u001b[0;31m \u001b[0mprediction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_x\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 576\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 577\u001b[0m \u001b[0;31m# edit prediction\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/fdujyn/anaconda3/lib/python3.6/site-packages/fastNLP/core/trainer.py\u001b[0m in \u001b[0;36m_data_forward\u001b[0;34m(self, network, x)\u001b[0m\n\u001b[1;32m 661\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_data_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnetwork\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 662\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_build_args\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnetwork\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 663\u001b[0;31m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnetwork\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 664\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 665\u001b[0m raise TypeError(\n", + "\u001b[0;32m/Users/fdujyn/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 489\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 490\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 491\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 492\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 493\u001b[0m \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/fdujyn/anaconda3/lib/python3.6/site-packages/fastNLP/models/snli.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, words1, words2, seq_len1, seq_len2, target)\u001b[0m\n\u001b[1;32m 76\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mseq_len1\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 78\u001b[0;31m \u001b[0mseq_len1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mseq_len_to_mask\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseq_len1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 79\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[0mseq_len1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mones\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpremise0\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpremise0\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/fdujyn/anaconda3/lib/python3.6/site-packages/fastNLP/core/utils.py\u001b[0m in \u001b[0;36mseq_len_to_mask\u001b[0;34m(seq_len)\u001b[0m\n\u001b[1;32m 626\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 627\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseq_len\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 628\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mseq_len\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdim\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34mf\"seq_len can only have one dimension, got {seq_len.dim() == 1}.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 629\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mseq_len\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 630\u001b[0m \u001b[0mmax_len\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mseq_len\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlong\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAssertionError\u001b[0m: seq_len can only have one dimension, got False." + ] } ], "source": [ @@ -1073,7 +1005,6 @@ " print_every=-1,\n", " validate_every=-1,\n", " dev_data=dev_data,\n", - " use_cuda=True,\n", " optimizer=Adam(lr=1e-3, weight_decay=0),\n", " check_code_level=-1,\n", " metric_key='acc',\n", @@ -1178,7 +1109,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.6.7" } }, "nbformat": 4,