Browse Source

增加了结合 fitlog 的教程

tags/v0.4.10
ChenXin 5 years ago
parent
commit
34a638175f
5 changed files with 141 additions and 19 deletions
  1. BIN
      docs/source/figures/fitlogChart.png
  2. BIN
      docs/source/figures/fitlogTable.png
  3. +1
    -0
      docs/source/index.rst
  4. +118
    -1
      docs/source/user/with_fitlog.rst
  5. +22
    -18
      fastNLP/core/callback.py

BIN
docs/source/figures/fitlogChart.png View File

Before After
Width: 2556  |  Height: 1450  |  Size: 272 kB

BIN
docs/source/figures/fitlogTable.png View File

Before After
Width: 2552  |  Height: 858  |  Size: 168 kB

+ 1
- 0
docs/source/index.rst View File

@@ -55,6 +55,7 @@ fastNLP 在 :mod:`~fastNLP.models` 模块中内置了如 :class:`~fastNLP.models
安装指南 <user/installation> 安装指南 <user/installation>
快速入门 <user/quickstart> 快速入门 <user/quickstart>
详细指南 <user/tutorial_one> 详细指南 <user/tutorial_one>
科研指南 <user/with_fitlog>


API 文档 API 文档
------------- -------------


+ 118
- 1
docs/source/user/with_fitlog.rst View File

@@ -2,4 +2,121 @@
科研向导 科研向导
================= =================


本文介绍使用 fastNLP 和 fitlog 进行科学研究的方法
本文介绍使用 fastNLP 和 fitlog 结合进行科研的方法。

首先,我们需要安装 `fitlog <https://fitlog.readthedocs.io/>`_ 。你需要确认你的电脑中没有其它名为为 `fitlog` 的命令。

我们从命令行中进入到一个文件夹,现在我们要在文件夹中创建我们的 fastNLP 项目。你可以在命令行输入 `fitlog init test1` ,
然后你会看到如下提示::

Initialized empty Git repository in /Users/fdujyn/workspaces/test1/.git/
Auto commit by fitlog
Initialized empty Git repository in /Users/fdujyn/workspaces/test1/.git/
Fitlog project test1 is initialized.

这表明你已经创建成功了项目文件夹,并且在项目文件夹中已经初始化了 Git。如果你不想初始化 Git,
可以参考文档 `命令行工具 <https://fitlog.readthedocs.io/zh/latest/user/command_line.html>`_

现在我们进入你创建的项目文件夹 test1 中,可以看到有一个名为 logs 的文件夹,后面我们将会在里面存放你的实验记录。
同时也有一个名为 main.py 的文件,是我们推荐你使用的入口文件。文件的内容如下::

import fitlog

fitlog.commit(__file__) # auto commit your codes
fitlog.add_hyper_in_file (__file__) # record your hyperparameters

"""
Your training code here, you may use these functions to log your result:
fitlog.add_hyper()
fitlog.add_loss()
fitlog.add_metric()
fitlog.add_best_metric()
......
"""

fitlog.finish() # finish the logging

我们推荐你保留除注释外的四行代码,它们有助于你的实验,
他们的具体用处参见文档 `用户 API <https://fitlog.readthedocs.io/zh/latest/fitlog.html>`_

我们假定你要进行前两个教程中的实验,并已经把数据复制到了项目根目录下的 tutorial_sample_dataset.csv 文件中。
现在我们编写如下的训练代码,使用 :class:`~fastNLP.core.callback.FitlogCallback` 进行实验记录保存::

import fitlog
from fastNLP import Vocabulary, Trainer, CrossEntropyLoss, AccuracyMetric
from fastNLP.io import CSVLoader
from fastNLP.models import CNNText
from fastNLP.core.callback import FitlogCallback

fitlog.commit(__file__) # auto commit your codes
fitlog.add_hyper_in_file (__file__) # record your hyperparameters

############hyper
word_embed = 50
dropout = 0.1
############hyper

loader = CSVLoader(headers=('raw_sentence', 'label'), sep='\t')
dataset = loader.load("tutorial_sample_dataset.csv")

dataset.apply(lambda x: x['raw_sentence'].lower(), new_field_name='sentence')
dataset.apply(lambda x: x['sentence'].split(), new_field_name='words', is_input=True)
dataset.apply(lambda x: int(x['label']), new_field_name='target', is_target=True)
vocab = Vocabulary(min_freq=2).from_dataset(dataset, field_name='words')
vocab.index_dataset(dataset, field_name='words',new_field_name='words')

model = CNNText((len(vocab),word_embed), num_classes=5, padding=2, dropout=dropout)

train_dev_data, test_data = dataset.split(0.1)
train_data, dev_data = train_dev_data.split(0.1)

trainer = Trainer(model=model, train_data=train_data, dev_data=dev_data,
loss=CrossEntropyLoss(), metrics=AccuracyMetric(),
callbacks=[FitlogCallback(test_data)])
trainer.train()

fitlog.finish() # finish the logging

用命令行在项目目录下执行 `python main.py` 之后,输出结果如下::

Auto commit by fitlog
input fields after batch(if batch size is 2):
words: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 11])
target fields after batch(if batch size is 2):
target: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2])

training epochs started 2019-05-23-21-11-51
Evaluation at Epoch 1/10. Step:2/20. AccuracyMetric: acc=0.285714

Evaluation at Epoch 2/10. Step:4/20. AccuracyMetric: acc=0.285714

Evaluation at Epoch 3/10. Step:6/20. AccuracyMetric: acc=0.285714

Evaluation at Epoch 4/10. Step:8/20. AccuracyMetric: acc=0.428571

Evaluation at Epoch 5/10. Step:10/20. AccuracyMetric: acc=0.571429

Evaluation at Epoch 6/10. Step:12/20. AccuracyMetric: acc=0.571429

Evaluation at Epoch 7/10. Step:14/20. AccuracyMetric: acc=0.285714

Evaluation at Epoch 8/10. Step:16/20. AccuracyMetric: acc=0.142857

Evaluation at Epoch 9/10. Step:18/20. AccuracyMetric: acc=0.285714

Evaluation at Epoch 10/10. Step:20/20. AccuracyMetric: acc=0.571429


In Epoch:5/Step:10, got best dev performance:AccuracyMetric: acc=0.571429
Reloaded the best model.

现在,我们在项目目录下输入 `fitlog log logs` ,命令行会启动一个网页,默认 url 为 ``0.0.0.0:5000`` 。
我们在浏览器中打开网页,可以看到如下的统计表格:

.. image:: ../figures/fitlogTable.png

如果我们点击action中的最后一个键钮,可以看到详细的 loss 图:

.. image:: ../figures/fitlogChart.png

更多的教程还在编写中,敬请期待~

+ 22
- 18
fastNLP/core/callback.py View File

@@ -54,6 +54,7 @@ __all__ = [
"GradientClipCallback", "GradientClipCallback",
"EarlyStopCallback", "EarlyStopCallback",
"TensorboardCallback", "TensorboardCallback",
"FitlogCallback",
"LRScheduler", "LRScheduler",
"ControlC", "ControlC",
@@ -65,6 +66,7 @@ import os


import torch import torch
from copy import deepcopy from copy import deepcopy

try: try:
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
@@ -81,6 +83,7 @@ try:
except: except:
pass pass



class Callback(object): class Callback(object):
""" """
别名::class:`fastNLP.Callback` :class:`fastNLP.core.callback.Callback` 别名::class:`fastNLP.Callback` :class:`fastNLP.core.callback.Callback`
@@ -431,14 +434,13 @@ class EarlyStopCallback(Callback):
else: else:
raise exception # 抛出陌生Error raise exception # 抛出陌生Error



class FitlogCallback(Callback): class FitlogCallback(Callback):
""" """
别名: :class:`fastNLP.FitlogCallback` :class:`fastNLP.core.callback.FitlogCallback`

该callback将loss和progress自动写入到fitlog中; 如果Trainer有dev的数据,将自动把dev的结果写入到log中; 同时还支持传入 该callback将loss和progress自动写入到fitlog中; 如果Trainer有dev的数据,将自动把dev的结果写入到log中; 同时还支持传入
一个(或多个)test数据集进行测试(只有在trainer具有dev时才能使用),每次在dev上evaluate之后会在这些数据集上验证一下。
并将验证结果写入到fitlog中。这些数据集的结果是根据dev上最好的结果报道的,即如果dev在第3个epoch取得了最佳,则
fitlog中记录的关于这些数据集的结果就是来自第三个epoch的结果。
一个(或多个)test数据集进行测试(只有在trainer具有dev时才能使用),每次在dev上evaluate之后会在这些数据集上验证一下。
并将验证结果写入到fitlog中。这些数据集的结果是根据dev上最好的结果报道的,即如果dev在第3个epoch取得了最佳,则
fitlog中记录的关于这些数据集的结果就是来自第三个epoch的结果。


:param DataSet,dict(DataSet) data: 传入DataSet对象,会使用多个Trainer中的metric对数据进行验证。如果需要传入多个 :param DataSet,dict(DataSet) data: 传入DataSet对象,会使用多个Trainer中的metric对数据进行验证。如果需要传入多个
DataSet请通过dict的方式传入,dict的key将作为对应dataset的name传递给fitlog。若tester不为None时,data需要通过 DataSet请通过dict的方式传入,dict的key将作为对应dataset的name传递给fitlog。若tester不为None时,data需要通过
@@ -447,7 +449,9 @@ class FitlogCallback(Callback):
:param int verbose: 是否在终端打印内容,0不打印 :param int verbose: 是否在终端打印内容,0不打印
:param bool log_exception: fitlog是否记录发生的exception信息 :param bool log_exception: fitlog是否记录发生的exception信息
""" """

# 还没有被导出到 fastNLP 层
# 别名: :class:`fastNLP.FitlogCallback` :class:`fastNLP.core.callback.FitlogCallback`
def __init__(self, data=None, tester=None, verbose=0, log_exception=False): def __init__(self, data=None, tester=None, verbose=0, log_exception=False):
super().__init__() super().__init__()
self.datasets = {} self.datasets = {}
@@ -460,7 +464,7 @@ class FitlogCallback(Callback):
assert 'test' not in data, "Cannot use `test` as DataSet key, when tester is passed." assert 'test' not in data, "Cannot use `test` as DataSet key, when tester is passed."
setattr(tester, 'verbose', 0) setattr(tester, 'verbose', 0)
self.testers['test'] = tester self.testers['test'] = tester
if isinstance(data, dict): if isinstance(data, dict):
for key, value in data.items(): for key, value in data.items():
assert isinstance(value, DataSet), f"Only DataSet object is allowed, not {type(value)}." assert isinstance(value, DataSet), f"Only DataSet object is allowed, not {type(value)}."
@@ -470,23 +474,23 @@ class FitlogCallback(Callback):
self.datasets['test'] = data self.datasets['test'] = data
else: else:
raise TypeError("data receives dict[DataSet] or DataSet object.") raise TypeError("data receives dict[DataSet] or DataSet object.")
self.verbose = verbose self.verbose = verbose
def on_train_begin(self): def on_train_begin(self):
if (len(self.datasets)>0 or len(self.testers)>0 ) and self.trainer.dev_data is None:
if (len(self.datasets) > 0 or len(self.testers) > 0) and self.trainer.dev_data is None:
raise RuntimeError("Trainer has no dev data, you cannot pass extra data to do evaluation.") raise RuntimeError("Trainer has no dev data, you cannot pass extra data to do evaluation.")
if len(self.datasets)>0:
if len(self.datasets) > 0:
for key, data in self.datasets.items(): for key, data in self.datasets.items():
tester = Tester(data=data, model=self.model, batch_size=self.batch_size, metrics=self.trainer.metrics, tester = Tester(data=data, model=self.model, batch_size=self.batch_size, metrics=self.trainer.metrics,
verbose=0) verbose=0)
self.testers[key] = tester self.testers[key] = tester
fitlog.add_progress(total_steps=self.n_steps) fitlog.add_progress(total_steps=self.n_steps)
def on_backward_begin(self, loss): def on_backward_begin(self, loss):
fitlog.add_loss(loss.item(), name='loss', step=self.step, epoch=self.epoch) fitlog.add_loss(loss.item(), name='loss', step=self.step, epoch=self.epoch)
def on_valid_end(self, eval_result, metric_key, optimizer, better_result): def on_valid_end(self, eval_result, metric_key, optimizer, better_result):
if better_result: if better_result:
eval_result = deepcopy(eval_result) eval_result = deepcopy(eval_result)
@@ -494,11 +498,11 @@ class FitlogCallback(Callback):
eval_result['epoch'] = self.epoch eval_result['epoch'] = self.epoch
fitlog.add_best_metric(eval_result) fitlog.add_best_metric(eval_result)
fitlog.add_metric(eval_result, step=self.step, epoch=self.epoch) fitlog.add_metric(eval_result, step=self.step, epoch=self.epoch)
if len(self.testers)>0:
if len(self.testers) > 0:
for key, tester in self.testers.items(): for key, tester in self.testers.items():
try: try:
eval_result = tester.test() eval_result = tester.test()
if self.verbose!=0:
if self.verbose != 0:
self.pbar.write("Evaluation on DataSet {}:".format(key)) self.pbar.write("Evaluation on DataSet {}:".format(key))
self.pbar.write(tester._format_eval_results(eval_result)) self.pbar.write(tester._format_eval_results(eval_result))
fitlog.add_metric(eval_result, name=key, step=self.step, epoch=self.epoch) fitlog.add_metric(eval_result, name=key, step=self.step, epoch=self.epoch)
@@ -506,10 +510,10 @@ class FitlogCallback(Callback):
fitlog.add_best_metric(eval_result, name=key) fitlog.add_best_metric(eval_result, name=key)
except Exception: except Exception:
self.pbar.write("Exception happens when evaluate on DataSet named `{}`.".format(key)) self.pbar.write("Exception happens when evaluate on DataSet named `{}`.".format(key))
def on_train_end(self): def on_train_end(self):
fitlog.finish() fitlog.finish()
def on_exception(self, exception): def on_exception(self, exception):
fitlog.finish(status=1) fitlog.finish(status=1)
if self._log_exception: if self._log_exception:


Loading…
Cancel
Save