Browse Source

修改了 models 部分 import 的顺序,__all__ 暴露的内容

tags/v0.4.10
ChenXin 5 years ago
parent
commit
9c07850019
35 changed files with 465 additions and 428 deletions
  1. +0
    -7
      docs/source/fastNLP.models.base_model.rst
  2. +0
    -7
      docs/source/fastNLP.models.bert.rst
  3. +0
    -7
      docs/source/fastNLP.models.enas_controller.rst
  4. +0
    -7
      docs/source/fastNLP.models.enas_model.rst
  5. +0
    -7
      docs/source/fastNLP.models.enas_trainer.rst
  6. +0
    -7
      docs/source/fastNLP.models.enas_utils.rst
  7. +0
    -6
      docs/source/fastNLP.models.rst
  8. +2
    -2
      fastNLP/core/batch.py
  9. +1
    -0
      fastNLP/core/callback.py
  10. +2
    -1
      fastNLP/core/dataset.py
  11. +2
    -2
      fastNLP/core/field.py
  12. +2
    -2
      fastNLP/core/losses.py
  13. +2
    -2
      fastNLP/core/metrics.py
  14. +2
    -2
      fastNLP/core/predictor.py
  15. +2
    -2
      fastNLP/core/sampler.py
  16. +1
    -0
      fastNLP/core/tester.py
  17. +3
    -3
      fastNLP/core/trainer.py
  18. +4
    -5
      fastNLP/core/utils.py
  19. +5
    -4
      fastNLP/io/__init__.py
  20. +13
    -6
      fastNLP/io/base_loader.py
  21. +36
    -28
      fastNLP/io/config_io.py
  22. +6
    -5
      fastNLP/io/dataset_loader.py
  23. +30
    -26
      fastNLP/io/embed_loader.py
  24. +10
    -5
      fastNLP/io/model_io.py
  25. +18
    -2
      fastNLP/models/__init__.py
  26. +5
    -5
      fastNLP/models/base_model.py
  27. +88
    -71
      fastNLP/models/biaffine_parser.py
  28. +9
    -8
      fastNLP/models/cnn_text_classification.py
  29. +1
    -0
      fastNLP/models/enas_controller.py
  30. +71
    -68
      fastNLP/models/enas_model.py
  31. +69
    -72
      fastNLP/models/enas_trainer.py
  32. +0
    -2
      fastNLP/models/enas_utils.py
  33. +9
    -1
      fastNLP/models/sequence_labeling.py
  34. +32
    -29
      fastNLP/models/snli.py
  35. +40
    -27
      fastNLP/models/star_transformer.py

+ 0
- 7
docs/source/fastNLP.models.base_model.rst View File

@@ -1,7 +0,0 @@
fastNLP.models.base\_model
==========================

.. automodule:: fastNLP.models.base_model
:members:
:undoc-members:
:show-inheritance:

+ 0
- 7
docs/source/fastNLP.models.bert.rst View File

@@ -1,7 +0,0 @@
fastNLP.models.bert
===================

.. automodule:: fastNLP.models.bert
:members:
:undoc-members:
:show-inheritance:

+ 0
- 7
docs/source/fastNLP.models.enas_controller.rst View File

@@ -1,7 +0,0 @@
fastNLP.models.enas\_controller
===============================

.. automodule:: fastNLP.models.enas_controller
:members:
:undoc-members:
:show-inheritance:

+ 0
- 7
docs/source/fastNLP.models.enas_model.rst View File

@@ -1,7 +0,0 @@
fastNLP.models.enas\_model
==========================

.. automodule:: fastNLP.models.enas_model
:members:
:undoc-members:
:show-inheritance:

+ 0
- 7
docs/source/fastNLP.models.enas_trainer.rst View File

@@ -1,7 +0,0 @@
fastNLP.models.enas\_trainer
============================

.. automodule:: fastNLP.models.enas_trainer
:members:
:undoc-members:
:show-inheritance:

+ 0
- 7
docs/source/fastNLP.models.enas_utils.rst View File

@@ -1,7 +0,0 @@
fastNLP.models.enas\_utils
==========================

.. automodule:: fastNLP.models.enas_utils
:members:
:undoc-members:
:show-inheritance:

+ 0
- 6
docs/source/fastNLP.models.rst View File

@@ -12,14 +12,8 @@ fastNLP.models
.. toctree:: .. toctree::
:titlesonly: :titlesonly:


fastNLP.models.base_model
fastNLP.models.bert
fastNLP.models.biaffine_parser fastNLP.models.biaffine_parser
fastNLP.models.cnn_text_classification fastNLP.models.cnn_text_classification
fastNLP.models.enas_controller
fastNLP.models.enas_model
fastNLP.models.enas_trainer
fastNLP.models.enas_utils
fastNLP.models.sequence_labeling fastNLP.models.sequence_labeling
fastNLP.models.snli fastNLP.models.snli
fastNLP.models.star_transformer fastNLP.models.star_transformer


+ 2
- 2
fastNLP/core/batch.py View File

@@ -3,12 +3,12 @@ batch 模块实现了 fastNLP 所需的 Batch 类。


""" """
import atexit import atexit
from queue import Empty, Full

import numpy as np import numpy as np
import torch import torch
import torch.multiprocessing as mp import torch.multiprocessing as mp


from queue import Empty, Full

from .sampler import RandomSampler from .sampler import RandomSampler


__all__ = [ __all__ = [


+ 1
- 0
fastNLP/core/callback.py View File

@@ -50,6 +50,7 @@ callback模块实现了 fastNLP 中的许多 callback 类,用于增强 :class:


""" """
import os import os

import torch import torch


try: try:


+ 2
- 1
fastNLP/core/dataset.py View File

@@ -273,9 +273,10 @@


""" """
import _pickle as pickle import _pickle as pickle
import numpy as np
import warnings import warnings


import numpy as np

from .field import AutoPadder from .field import AutoPadder
from .field import FieldArray from .field import FieldArray
from .instance import Instance from .instance import Instance


+ 2
- 2
fastNLP/core/field.py View File

@@ -3,10 +3,10 @@ field模块实现了 FieldArray 和若干 Padder。 FieldArray 是 :class:`~fas
原理部分请参考 :doc:`fastNLP.core.dataset` 原理部分请参考 :doc:`fastNLP.core.dataset`


""" """
import numpy as np

from copy import deepcopy from copy import deepcopy


import numpy as np

__all__ = [ __all__ = [
"FieldArray", "FieldArray",
"Padder", "Padder",


+ 2
- 2
fastNLP/core/losses.py View File

@@ -3,11 +3,11 @@ losses 模块定义了 fastNLP 中所需的各种损失函数,一般做为 :cl


""" """
import inspect import inspect
from collections import defaultdict

import torch import torch
import torch.nn.functional as F import torch.nn.functional as F


from collections import defaultdict

from .utils import _CheckError from .utils import _CheckError
from .utils import _CheckRes from .utils import _CheckRes
from .utils import _build_args from .utils import _build_args


+ 2
- 2
fastNLP/core/metrics.py View File

@@ -3,11 +3,11 @@ metrics 模块实现了 fastNLP 所需的各种常用衡量指标,一般做为


""" """
import inspect import inspect
from collections import defaultdict

import numpy as np import numpy as np
import torch import torch


from collections import defaultdict

from .utils import _CheckError from .utils import _CheckError
from .utils import _CheckRes from .utils import _CheckRes
from .utils import _build_args from .utils import _build_args


+ 2
- 2
fastNLP/core/predictor.py View File

@@ -2,10 +2,10 @@
..todo:: ..todo::
检查这个类是否需要 检查这个类是否需要
""" """
import torch

from collections import defaultdict from collections import defaultdict


import torch

from . import Batch from . import Batch
from . import DataSet from . import DataSet
from . import SequentialSampler from . import SequentialSampler


+ 2
- 2
fastNLP/core/sampler.py View File

@@ -1,10 +1,10 @@
""" """
sampler 子类实现了 fastNLP 所需的各种采样器。 sampler 子类实现了 fastNLP 所需的各种采样器。
""" """
import numpy as np

from itertools import chain from itertools import chain


import numpy as np

__all__ = [ __all__ = [
"Sampler", "Sampler",
"BucketSampler", "BucketSampler",


+ 1
- 0
fastNLP/core/tester.py View File

@@ -33,6 +33,7 @@ Tester在验证进行之前会调用model.eval()提示当前进入了evaluation


""" """
import warnings import warnings

import torch import torch
import torch.nn as nn import torch.nn as nn




+ 3
- 3
fastNLP/core/trainer.py View File

@@ -297,13 +297,13 @@ Example2.3
""" """


import os import os
import numpy as np
import time import time
from datetime import datetime, timedelta

import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn


from datetime import datetime, timedelta

try: try:
from tqdm.auto import tqdm from tqdm.auto import tqdm
except: except:


+ 4
- 5
fastNLP/core/utils.py View File

@@ -3,14 +3,13 @@ utils模块实现了 fastNLP 内部和外部所需的很多工具。其中用户
""" """
import _pickle import _pickle
import inspect import inspect
import numpy as np
import os import os
import torch
import torch.nn as nn
import warnings import warnings
from collections import Counter, namedtuple


from collections import Counter
from collections import namedtuple
import numpy as np
import torch
import torch.nn as nn


__all__ = [ __all__ = [
"cache_results", "cache_results",


+ 5
- 4
fastNLP/io/__init__.py View File

@@ -9,6 +9,11 @@


这些类的使用方法如下: 这些类的使用方法如下:
""" """
from .embed_loader import EmbedLoader
from .dataset_loader import DataSetLoader, CSVLoader, JsonLoader, ConllLoader, SNLILoader, SSTLoader, \
PeopleDailyCorpusLoader, Conll2003Loader
from .model_io import ModelLoader, ModelSaver

__all__ = [ __all__ = [
'EmbedLoader', 'EmbedLoader',
@@ -24,7 +29,3 @@ __all__ = [
'ModelLoader', 'ModelLoader',
'ModelSaver', 'ModelSaver',
] ]
from .embed_loader import EmbedLoader
from .dataset_loader import DataSetLoader, CSVLoader, JsonLoader, ConllLoader, SNLILoader, SSTLoader, \
PeopleDailyCorpusLoader, Conll2003Loader
from .model_io import ModelLoader as ModelLoader, ModelSaver as ModelSaver

+ 13
- 6
fastNLP/io/base_loader.py View File

@@ -1,15 +1,20 @@
import _pickle as pickle import _pickle as pickle
import os import os


__all__ = [
"BaseLoader"
]



class BaseLoader(object): class BaseLoader(object):
""" """
各个 Loader 的基类,提供了 API 的参考。 各个 Loader 的基类,提供了 API 的参考。


""" """
def __init__(self): def __init__(self):
super(BaseLoader, self).__init__() super(BaseLoader, self).__init__()
@staticmethod @staticmethod
def load_lines(data_path): def load_lines(data_path):
""" """
@@ -20,7 +25,7 @@ class BaseLoader(object):
with open(data_path, "r", encoding="utf=8") as f: with open(data_path, "r", encoding="utf=8") as f:
text = f.readlines() text = f.readlines()
return [line.strip() for line in text] return [line.strip() for line in text]
@classmethod @classmethod
def load(cls, data_path): def load(cls, data_path):
""" """
@@ -31,7 +36,7 @@ class BaseLoader(object):
with open(data_path, "r", encoding="utf-8") as f: with open(data_path, "r", encoding="utf-8") as f:
text = f.readlines() text = f.readlines()
return [[word for word in sent.strip()] for sent in text] return [[word for word in sent.strip()] for sent in text]
@classmethod @classmethod
def load_with_cache(cls, data_path, cache_path): def load_with_cache(cls, data_path, cache_path):
"""缓存版的load """缓存版的load
@@ -48,16 +53,18 @@ class BaseLoader(object):


class DataLoaderRegister: class DataLoaderRegister:
_readers = {} _readers = {}
@classmethod @classmethod
def set_reader(cls, reader_cls, read_fn_name): def set_reader(cls, reader_cls, read_fn_name):
# def wrapper(reader_cls): # def wrapper(reader_cls):
if read_fn_name in cls._readers: if read_fn_name in cls._readers:
raise KeyError('duplicate reader: {} and {} for read_func: {}'.format(cls._readers[read_fn_name], reader_cls, read_fn_name))
raise KeyError(
'duplicate reader: {} and {} for read_func: {}'.format(cls._readers[read_fn_name], reader_cls,
read_fn_name))
if hasattr(reader_cls, 'load'): if hasattr(reader_cls, 'load'):
cls._readers[read_fn_name] = reader_cls().load cls._readers[read_fn_name] = reader_cls().load
return reader_cls return reader_cls
@classmethod @classmethod
def get_reader(cls, read_fn_name): def get_reader(cls, read_fn_name):
if read_fn_name in cls._readers: if read_fn_name in cls._readers:


+ 36
- 28
fastNLP/io/config_io.py View File

@@ -1,14 +1,20 @@
""" """

用于读入和处理和保存 config 文件 用于读入和处理和保存 config 文件
.. todo::
这个模块中的类可能被抛弃?
""" """
__all__ = ["ConfigLoader","ConfigSection","ConfigSaver"]
import configparser import configparser
import json import json
import os import os


from .base_loader import BaseLoader from .base_loader import BaseLoader


__all__ = [
"ConfigLoader",
"ConfigSection",
"ConfigSaver"
]



class ConfigLoader(BaseLoader): class ConfigLoader(BaseLoader):
""" """
@@ -19,15 +25,16 @@ class ConfigLoader(BaseLoader):
:param str data_path: 配置文件的路径 :param str data_path: 配置文件的路径


""" """
def __init__(self, data_path=None): def __init__(self, data_path=None):
super(ConfigLoader, self).__init__() super(ConfigLoader, self).__init__()
if data_path is not None: if data_path is not None:
self.config = self.parse(super(ConfigLoader, self).load(data_path)) self.config = self.parse(super(ConfigLoader, self).load(data_path))
@staticmethod @staticmethod
def parse(string): def parse(string):
raise NotImplementedError raise NotImplementedError
@staticmethod @staticmethod
def load_config(file_path, sections): def load_config(file_path, sections):
""" """
@@ -81,10 +88,10 @@ class ConfigSection(object):
ConfigSection是一个存储了一个section中所有键值对的数据结构,推荐使用此类的实例来配合 :meth:`ConfigLoader.load_config` 使用 ConfigSection是一个存储了一个section中所有键值对的数据结构,推荐使用此类的实例来配合 :meth:`ConfigLoader.load_config` 使用


""" """
def __init__(self): def __init__(self):
super(ConfigSection, self).__init__() super(ConfigSection, self).__init__()
def __getitem__(self, key): def __getitem__(self, key):
""" """
:param key: str, the name of the attribute :param key: str, the name of the attribute
@@ -97,7 +104,7 @@ class ConfigSection(object):
if key in self.__dict__.keys(): if key in self.__dict__.keys():
return getattr(self, key) return getattr(self, key)
raise AttributeError("do NOT have attribute %s" % key) raise AttributeError("do NOT have attribute %s" % key)
def __setitem__(self, key, value): def __setitem__(self, key, value):
""" """
:param key: str, the name of the attribute :param key: str, the name of the attribute
@@ -112,14 +119,14 @@ class ConfigSection(object):
raise AttributeError("attr %s except %s but got %s" % raise AttributeError("attr %s except %s but got %s" %
(key, str(type(getattr(self, key))), str(type(value)))) (key, str(type(getattr(self, key))), str(type(value))))
setattr(self, key, value) setattr(self, key, value)
def __contains__(self, item): def __contains__(self, item):
""" """
:param item: The key of item. :param item: The key of item.
:return: True if the key in self.__dict__.keys() else False. :return: True if the key in self.__dict__.keys() else False.
""" """
return item in self.__dict__.keys() return item in self.__dict__.keys()
def __eq__(self, other): def __eq__(self, other):
"""Overwrite the == operator """Overwrite the == operator


@@ -131,15 +138,15 @@ class ConfigSection(object):
return False return False
if getattr(self, k) != getattr(self, k): if getattr(self, k) != getattr(self, k):
return False return False
for k in other.__dict__.keys(): for k in other.__dict__.keys():
if k not in self.__dict__.keys(): if k not in self.__dict__.keys():
return False return False
if getattr(self, k) != getattr(self, k): if getattr(self, k) != getattr(self, k):
return False return False
return True return True
def __ne__(self, other): def __ne__(self, other):
"""Overwrite the != operator """Overwrite the != operator


@@ -147,7 +154,7 @@ class ConfigSection(object):
:return: :return:
""" """
return not self.__eq__(other) return not self.__eq__(other)
@property @property
def data(self): def data(self):
return self.__dict__ return self.__dict__
@@ -162,11 +169,12 @@ class ConfigSaver(object):
:param str file_path: 配置文件的路径 :param str file_path: 配置文件的路径


""" """
def __init__(self, file_path): def __init__(self, file_path):
self.file_path = file_path self.file_path = file_path
if not os.path.exists(self.file_path): if not os.path.exists(self.file_path):
raise FileNotFoundError("file {} NOT found!".__format__(self.file_path)) raise FileNotFoundError("file {} NOT found!".__format__(self.file_path))
def _get_section(self, sect_name): def _get_section(self, sect_name):
""" """
This is the function to get the section with the section name. This is the function to get the section with the section name.
@@ -177,7 +185,7 @@ class ConfigSaver(object):
sect = ConfigSection() sect = ConfigSection()
ConfigLoader().load_config(self.file_path, {sect_name: sect}) ConfigLoader().load_config(self.file_path, {sect_name: sect})
return sect return sect
def _read_section(self): def _read_section(self):
""" """
This is the function to read sections from the config file. This is the function to read sections from the config file.
@@ -187,16 +195,16 @@ class ConfigSaver(object):
sect_key_list: A list of names in sect_list. sect_key_list: A list of names in sect_list.
""" """
sect_name = None sect_name = None
sect_list = {} sect_list = {}
sect_key_list = [] sect_key_list = []
single_section = {} single_section = {}
single_section_key = [] single_section_key = []
with open(self.file_path, 'r') as f: with open(self.file_path, 'r') as f:
lines = f.readlines() lines = f.readlines()
for line in lines: for line in lines:
if line.startswith('[') and line.endswith(']\n'): if line.startswith('[') and line.endswith(']\n'):
if sect_name is None: if sect_name is None:
@@ -208,29 +216,29 @@ class ConfigSaver(object):
sect_key_list.append(sect_name) sect_key_list.append(sect_name)
sect_name = line[1: -2] sect_name = line[1: -2]
continue continue
if line.startswith('#'): if line.startswith('#'):
single_section[line] = '#' single_section[line] = '#'
single_section_key.append(line) single_section_key.append(line)
continue continue
if line.startswith('\n'): if line.startswith('\n'):
single_section_key.append('\n') single_section_key.append('\n')
continue continue
if '=' not in line: if '=' not in line:
raise RuntimeError("can NOT load config file {}".__format__(self.file_path)) raise RuntimeError("can NOT load config file {}".__format__(self.file_path))
key = line.split('=', maxsplit=1)[0].strip() key = line.split('=', maxsplit=1)[0].strip()
value = line.split('=', maxsplit=1)[1].strip() + '\n' value = line.split('=', maxsplit=1)[1].strip() + '\n'
single_section[key] = value single_section[key] = value
single_section_key.append(key) single_section_key.append(key)
if sect_name is not None: if sect_name is not None:
sect_list[sect_name] = single_section, single_section_key sect_list[sect_name] = single_section, single_section_key
sect_key_list.append(sect_name) sect_key_list.append(sect_name)
return sect_list, sect_key_list return sect_list, sect_key_list
def _write_section(self, sect_list, sect_key_list): def _write_section(self, sect_list, sect_key_list):
""" """
This is the function to write config file with section list and name list. This is the function to write config file with section list and name list.
@@ -252,7 +260,7 @@ class ConfigSaver(object):
continue continue
f.write(key + ' = ' + single_section[key]) f.write(key + ' = ' + single_section[key])
f.write('\n') f.write('\n')
def save_config_file(self, section_name, section): def save_config_file(self, section_name, section):
""" """
这个方法可以用来修改并保存配置文件中单独的一个 section 这个方法可以用来修改并保存配置文件中单独的一个 section
@@ -284,11 +292,11 @@ class ConfigSaver(object):
break break
if not change_file: if not change_file:
return return
sect_list, sect_key_list = self._read_section() sect_list, sect_key_list = self._read_section()
if section_name not in sect_key_list: if section_name not in sect_key_list:
raise AttributeError() raise AttributeError()
sect, sect_key = sect_list[section_name] sect, sect_key = sect_list[section_name]
for k in section.__dict__.keys(): for k in section.__dict__.keys():
if k not in sect_key: if k not in sect_key:


+ 6
- 5
fastNLP/io/dataset_loader.py View File

@@ -10,6 +10,12 @@ dataset_loader模块实现了许多 DataSetLoader, 用于读取不同格式的


# ... do stuff # ... do stuff
""" """
from nltk.tree import Tree

from ..core.dataset import DataSet
from ..core.instance import Instance
from .file_reader import _read_csv, _read_json, _read_conll

__all__ = [ __all__ = [
'DataSetLoader', 'DataSetLoader',
'CSVLoader', 'CSVLoader',
@@ -20,11 +26,6 @@ __all__ = [
'PeopleDailyCorpusLoader', 'PeopleDailyCorpusLoader',
'Conll2003Loader', 'Conll2003Loader',
] ]
from nltk.tree import Tree

from ..core.dataset import DataSet
from ..core.instance import Instance
from .file_reader import _read_csv, _read_json, _read_conll




def _download_from_url(url, path): def _download_from_url(url, path):


+ 30
- 26
fastNLP/io/embed_loader.py View File

@@ -1,11 +1,15 @@
import os import os
import warnings


import numpy as np import numpy as np


from ..core.vocabulary import Vocabulary from ..core.vocabulary import Vocabulary
from .base_loader import BaseLoader from .base_loader import BaseLoader


import warnings
__all__ = [
"EmbedLoader"
]



class EmbedLoader(BaseLoader): class EmbedLoader(BaseLoader):
""" """
@@ -13,10 +17,10 @@ class EmbedLoader(BaseLoader):


用于读取预训练的embedding, 读取结果可直接载入为模型参数。 用于读取预训练的embedding, 读取结果可直接载入为模型参数。
""" """
def __init__(self): def __init__(self):
super(EmbedLoader, self).__init__() super(EmbedLoader, self).__init__()
@staticmethod @staticmethod
def load_with_vocab(embed_filepath, vocab, dtype=np.float32, normalize=True, error='ignore'): def load_with_vocab(embed_filepath, vocab, dtype=np.float32, normalize=True, error='ignore'):
""" """
@@ -40,11 +44,11 @@ class EmbedLoader(BaseLoader):
line = f.readline().strip() line = f.readline().strip()
parts = line.split() parts = line.split()
start_idx = 0 start_idx = 0
if len(parts)==2:
if len(parts) == 2:
dim = int(parts[1]) dim = int(parts[1])
start_idx += 1 start_idx += 1
else: else:
dim = len(parts)-1
dim = len(parts) - 1
f.seek(0) f.seek(0)
matrix = np.random.randn(len(vocab), dim).astype(dtype) matrix = np.random.randn(len(vocab), dim).astype(dtype)
for idx, line in enumerate(f, start_idx): for idx, line in enumerate(f, start_idx):
@@ -63,21 +67,21 @@ class EmbedLoader(BaseLoader):
total_hits = sum(hit_flags) total_hits = sum(hit_flags)
print("Found {} out of {} words in the pre-training embedding.".format(total_hits, len(vocab))) print("Found {} out of {} words in the pre-training embedding.".format(total_hits, len(vocab)))
found_vectors = matrix[hit_flags] found_vectors = matrix[hit_flags]
if len(found_vectors)!=0:
if len(found_vectors) != 0:
mean = np.mean(found_vectors, axis=0, keepdims=True) mean = np.mean(found_vectors, axis=0, keepdims=True)
std = np.std(found_vectors, axis=0, keepdims=True) std = np.std(found_vectors, axis=0, keepdims=True)
unfound_vec_num = len(vocab) - total_hits unfound_vec_num = len(vocab) - total_hits
r_vecs = np.random.randn(unfound_vec_num, dim).astype(dtype)*std + mean
matrix[hit_flags==False] = r_vecs
r_vecs = np.random.randn(unfound_vec_num, dim).astype(dtype) * std + mean
matrix[hit_flags == False] = r_vecs
if normalize: if normalize:
matrix /= np.linalg.norm(matrix, axis=1, keepdims=True) matrix /= np.linalg.norm(matrix, axis=1, keepdims=True)
return matrix return matrix
@staticmethod @staticmethod
def load_without_vocab(embed_filepath, dtype=np.float32, padding='<pad>', unknown='<unk>', normalize=True, def load_without_vocab(embed_filepath, dtype=np.float32, padding='<pad>', unknown='<unk>', normalize=True,
error='ignore'):
error='ignore'):
""" """
从embed_filepath中读取预训练的word vector。根据预训练的词表读取embedding并生成一个对应的Vocabulary。 从embed_filepath中读取预训练的word vector。根据预训练的词表读取embedding并生成一个对应的Vocabulary。


@@ -96,35 +100,35 @@ class EmbedLoader(BaseLoader):
vec_dict = {} vec_dict = {}
found_unknown = False found_unknown = False
found_pad = False found_pad = False
with open(embed_filepath, 'r', encoding='utf-8') as f: with open(embed_filepath, 'r', encoding='utf-8') as f:
line = f.readline() line = f.readline()
start = 1 start = 1
dim = -1 dim = -1
if len(line.strip().split())!=2:
if len(line.strip().split()) != 2:
f.seek(0) f.seek(0)
start = 0 start = 0
for idx, line in enumerate(f, start=start): for idx, line in enumerate(f, start=start):
try: try:
parts = line.strip().split() parts = line.strip().split()
word = parts[0] word = parts[0]
if dim==-1:
dim = len(parts)-1
if dim == -1:
dim = len(parts) - 1
vec = np.fromstring(' '.join(parts[1:]), sep=' ', dtype=dtype, count=dim) vec = np.fromstring(' '.join(parts[1:]), sep=' ', dtype=dtype, count=dim)
vec_dict[word] = vec vec_dict[word] = vec
vocab.add_word(word) vocab.add_word(word)
if unknown is not None and unknown==word:
if unknown is not None and unknown == word:
found_unknown = True found_unknown = True
if found_pad is not None and padding==word:
if found_pad is not None and padding == word:
found_pad = True found_pad = True
except Exception as e: except Exception as e:
if error=='ignore':
if error == 'ignore':
warnings.warn("Error occurred at the {} line.".format(idx)) warnings.warn("Error occurred at the {} line.".format(idx))
pass pass
else: else:
print("Error occurred at the {} line.".format(idx)) print("Error occurred at the {} line.".format(idx))
raise e raise e
if dim==-1:
if dim == -1:
raise RuntimeError("{} is an empty file.".format(embed_filepath)) raise RuntimeError("{} is an empty file.".format(embed_filepath))
matrix = np.random.randn(len(vocab), dim).astype(dtype) matrix = np.random.randn(len(vocab), dim).astype(dtype)
if (unknown is not None and not found_unknown) or (padding is not None and not found_pad): if (unknown is not None and not found_unknown) or (padding is not None and not found_pad):
@@ -133,19 +137,19 @@ class EmbedLoader(BaseLoader):
start_idx += 1 start_idx += 1
if unknown is not None: if unknown is not None:
start_idx += 1 start_idx += 1
mean = np.mean(matrix[start_idx:], axis=0, keepdims=True) mean = np.mean(matrix[start_idx:], axis=0, keepdims=True)
std = np.std(matrix[start_idx:], axis=0, keepdims=True) std = np.std(matrix[start_idx:], axis=0, keepdims=True)
if (unknown is not None and not found_unknown): if (unknown is not None and not found_unknown):
matrix[start_idx-1] = np.random.randn(1, dim).astype(dtype)*std + mean
matrix[start_idx - 1] = np.random.randn(1, dim).astype(dtype) * std + mean
if (padding is not None and not found_pad): if (padding is not None and not found_pad):
matrix[0] = np.random.randn(1, dim).astype(dtype)*std + mean
matrix[0] = np.random.randn(1, dim).astype(dtype) * std + mean
for key, vec in vec_dict.items(): for key, vec in vec_dict.items():
index = vocab.to_index(key) index = vocab.to_index(key)
matrix[index] = vec matrix[index] = vec
if normalize: if normalize:
matrix /= np.linalg.norm(matrix, axis=1, keepdims=True) matrix /= np.linalg.norm(matrix, axis=1, keepdims=True)
return matrix, vocab return matrix, vocab

+ 10
- 5
fastNLP/io/model_io.py View File

@@ -5,6 +5,11 @@ import torch


from .base_loader import BaseLoader from .base_loader import BaseLoader


__all__ = [
"ModelLoader",
"ModelSaver"
]



class ModelLoader(BaseLoader): class ModelLoader(BaseLoader):
""" """
@@ -12,10 +17,10 @@ class ModelLoader(BaseLoader):


用于读取模型 用于读取模型
""" """
def __init__(self): def __init__(self):
super(ModelLoader, self).__init__() super(ModelLoader, self).__init__()
@staticmethod @staticmethod
def load_pytorch(empty_model, model_path): def load_pytorch(empty_model, model_path):
""" """
@@ -25,7 +30,7 @@ class ModelLoader(BaseLoader):
:param str model_path: 模型保存的路径 :param str model_path: 模型保存的路径
""" """
empty_model.load_state_dict(torch.load(model_path)) empty_model.load_state_dict(torch.load(model_path))
@staticmethod @staticmethod
def load_pytorch_model(model_path): def load_pytorch_model(model_path):
""" """
@@ -48,14 +53,14 @@ class ModelSaver(object):
saver.save_pytorch(model) saver.save_pytorch(model)


""" """
def __init__(self, save_path): def __init__(self, save_path):
""" """


:param save_path: 模型保存的路径 :param save_path: 模型保存的路径
""" """
self.save_path = save_path self.save_path = save_path
def save_pytorch(self, model, param_only=True): def save_pytorch(self, model, param_only=True):
""" """
把 PyTorch 模型存入 ".pkl" 文件 把 PyTorch 模型存入 ".pkl" 文件


+ 18
- 2
fastNLP/models/__init__.py View File

@@ -7,7 +7,6 @@ fastNLP 在 :mod:`~fastNLP.models` 模块中内置了如 :class:`~fastNLP.models




""" """
__all__ = ["CNNText", "SeqLabeling", "ESIM", "STSeqLabel", "AdvSeqLabel", "STNLICls", "STSeqCls"]
from .base_model import BaseModel from .base_model import BaseModel
from .bert import BertForMultipleChoice, BertForQuestionAnswering, BertForSequenceClassification, \ from .bert import BertForMultipleChoice, BertForQuestionAnswering, BertForSequenceClassification, \
BertForTokenClassification BertForTokenClassification
@@ -15,4 +14,21 @@ from .biaffine_parser import BiaffineParser, GraphParser
from .cnn_text_classification import CNNText from .cnn_text_classification import CNNText
from .sequence_labeling import SeqLabeling, AdvSeqLabel from .sequence_labeling import SeqLabeling, AdvSeqLabel
from .snli import ESIM from .snli import ESIM
from .star_transformer import STSeqCls, STNLICls, STSeqLabel
from .star_transformer import StarTransEnc, STSeqCls, STNLICls, STSeqLabel

__all__ = [
"CNNText",
"SeqLabeling",
"AdvSeqLabel",
"ESIM",
"StarTransEnc",
"STSeqLabel",
"STNLICls",
"STSeqCls",
"BiaffineParser",
"GraphParser"
]

+ 5
- 5
fastNLP/models/base_model.py View File

@@ -6,13 +6,13 @@ from ..modules.decoder.MLP import MLP
class BaseModel(torch.nn.Module): class BaseModel(torch.nn.Module):
"""Base PyTorch model for all models. """Base PyTorch model for all models.
""" """
def __init__(self): def __init__(self):
super(BaseModel, self).__init__() super(BaseModel, self).__init__()
def fit(self, train_data, dev_data=None, **train_args): def fit(self, train_data, dev_data=None, **train_args):
pass pass
def predict(self, *args, **kwargs): def predict(self, *args, **kwargs):
raise NotImplementedError raise NotImplementedError


@@ -21,9 +21,9 @@ class NaiveClassifier(BaseModel):
def __init__(self, in_feature_dim, out_feature_dim): def __init__(self, in_feature_dim, out_feature_dim):
super(NaiveClassifier, self).__init__() super(NaiveClassifier, self).__init__()
self.mlp = MLP([in_feature_dim, in_feature_dim, out_feature_dim]) self.mlp = MLP([in_feature_dim, in_feature_dim, out_feature_dim])
def forward(self, x): def forward(self, x):
return {"predict": torch.sigmoid(self.mlp(x))} return {"predict": torch.sigmoid(self.mlp(x))}
def predict(self, x): def predict(self, x):
return {"predict": torch.sigmoid(self.mlp(x)) > 0.5} return {"predict": torch.sigmoid(self.mlp(x)) > 0.5}

+ 88
- 71
fastNLP/models/biaffine_parser.py View File

@@ -1,11 +1,12 @@
"""Biaffine Dependency Parser 的 Pytorch 实现.
""" """
from collections import defaultdict
Biaffine Dependency Parser 的 Pytorch 实现.
"""
import numpy as np import numpy as np
import torch import torch
from torch import nn
from torch.nn import functional as F
import torch.nn as nn
import torch.nn.functional as F

from collections import defaultdict


from ..core.const import Const as C from ..core.const import Const as C
from ..core.losses import LossFunc from ..core.losses import LossFunc
@@ -18,6 +19,12 @@ from ..modules.utils import get_embeddings
from .base_model import BaseModel from .base_model import BaseModel
from ..core.utils import seq_len_to_mask from ..core.utils import seq_len_to_mask


__all__ = [
"BiaffineParser",
"GraphParser"
]


def _mst(scores): def _mst(scores):
""" """
with some modification to support parser output for MST decoding with some modification to support parser output for MST decoding
@@ -44,7 +51,7 @@ def _mst(scores):
scores[roots, new_heads] / root_scores)] scores[roots, new_heads] / root_scores)]
heads[roots] = new_heads heads[roots] = new_heads
heads[new_root] = 0 heads[new_root] = 0
edges = defaultdict(set) edges = defaultdict(set)
vertices = set((0,)) vertices = set((0,))
for dep, head in enumerate(heads[tokens]): for dep, head in enumerate(heads[tokens]):
@@ -73,7 +80,7 @@ def _mst(scores):
heads[changed_cycle] = new_head heads[changed_cycle] = new_head
edges[new_head].add(changed_cycle) edges[new_head].add(changed_cycle)
edges[old_head].remove(changed_cycle) edges[old_head].remove(changed_cycle)
return heads return heads




@@ -88,7 +95,7 @@ def _find_cycle(vertices, edges):
_lowlinks = {} _lowlinks = {}
_onstack = defaultdict(lambda: False) _onstack = defaultdict(lambda: False)
_SCCs = [] _SCCs = []
def _strongconnect(v): def _strongconnect(v):
nonlocal _index nonlocal _index
_indices[v] = _index _indices[v] = _index
@@ -96,28 +103,28 @@ def _find_cycle(vertices, edges):
_index += 1 _index += 1
_stack.append(v) _stack.append(v)
_onstack[v] = True _onstack[v] = True
for w in edges[v]: for w in edges[v]:
if w not in _indices: if w not in _indices:
_strongconnect(w) _strongconnect(w)
_lowlinks[v] = min(_lowlinks[v], _lowlinks[w]) _lowlinks[v] = min(_lowlinks[v], _lowlinks[w])
elif _onstack[w]: elif _onstack[w]:
_lowlinks[v] = min(_lowlinks[v], _indices[w]) _lowlinks[v] = min(_lowlinks[v], _indices[w])
if _lowlinks[v] == _indices[v]: if _lowlinks[v] == _indices[v]:
SCC = set() SCC = set()
while True: while True:
w = _stack.pop() w = _stack.pop()
_onstack[w] = False _onstack[w] = False
SCC.add(w) SCC.add(w)
if not(w != v):
if not (w != v):
break break
_SCCs.append(SCC) _SCCs.append(SCC)
for v in vertices: for v in vertices:
if v not in _indices: if v not in _indices:
_strongconnect(v) _strongconnect(v)
return [SCC for SCC in _SCCs if len(SCC) > 1] return [SCC for SCC in _SCCs if len(SCC) > 1]




@@ -125,9 +132,10 @@ class GraphParser(BaseModel):
""" """
基于图的parser base class, 支持贪婪解码和最大生成树解码 基于图的parser base class, 支持贪婪解码和最大生成树解码
""" """
def __init__(self): def __init__(self):
super(GraphParser, self).__init__() super(GraphParser, self).__init__()
@staticmethod @staticmethod
def greedy_decoder(arc_matrix, mask=None): def greedy_decoder(arc_matrix, mask=None):
""" """
@@ -146,7 +154,7 @@ class GraphParser(BaseModel):
if mask is not None: if mask is not None:
heads *= mask.long() heads *= mask.long()
return heads return heads
@staticmethod @staticmethod
def mst_decoder(arc_matrix, mask=None): def mst_decoder(arc_matrix, mask=None):
""" """
@@ -176,6 +184,7 @@ class ArcBiaffine(nn.Module):
:param hidden_size: 输入的特征维度 :param hidden_size: 输入的特征维度
:param bias: 是否使用bias. Default: ``True`` :param bias: 是否使用bias. Default: ``True``
""" """
def __init__(self, hidden_size, bias=True): def __init__(self, hidden_size, bias=True):
super(ArcBiaffine, self).__init__() super(ArcBiaffine, self).__init__()
self.U = nn.Parameter(torch.Tensor(hidden_size, hidden_size), requires_grad=True) self.U = nn.Parameter(torch.Tensor(hidden_size, hidden_size), requires_grad=True)
@@ -185,7 +194,7 @@ class ArcBiaffine(nn.Module):
else: else:
self.register_parameter("bias", None) self.register_parameter("bias", None)
initial_parameter(self) initial_parameter(self)
def forward(self, head, dep): def forward(self, head, dep):
""" """


@@ -209,11 +218,12 @@ class LabelBilinear(nn.Module):
:param num_label: 边类别的个数 :param num_label: 边类别的个数
:param bias: 是否使用bias. Default: ``True`` :param bias: 是否使用bias. Default: ``True``
""" """
def __init__(self, in1_features, in2_features, num_label, bias=True): def __init__(self, in1_features, in2_features, num_label, bias=True):
super(LabelBilinear, self).__init__() super(LabelBilinear, self).__init__()
self.bilinear = nn.Bilinear(in1_features, in2_features, num_label, bias=bias) self.bilinear = nn.Bilinear(in1_features, in2_features, num_label, bias=bias)
self.lin = nn.Linear(in1_features + in2_features, num_label, bias=False) self.lin = nn.Linear(in1_features + in2_features, num_label, bias=False)
def forward(self, x1, x2): def forward(self, x1, x2):
""" """


@@ -225,13 +235,13 @@ class LabelBilinear(nn.Module):
output += self.lin(torch.cat([x1, x2], dim=2)) output += self.lin(torch.cat([x1, x2], dim=2))
return output return output



class BiaffineParser(GraphParser): class BiaffineParser(GraphParser):
""" """
别名::class:`fastNLP.models.BiaffineParser` :class:`fastNLP.models.baffine_parser.BiaffineParser` 别名::class:`fastNLP.models.BiaffineParser` :class:`fastNLP.models.baffine_parser.BiaffineParser`


Biaffine Dependency Parser 实现. Biaffine Dependency Parser 实现.
论文参考 ` Deep Biaffine Attention for Neural Dependency Parsing (Dozat and Manning, 2016)
<https://arxiv.org/abs/1611.01734>`_ .
论文参考 `Deep Biaffine Attention for Neural Dependency Parsing (Dozat and Manning, 2016) <https://arxiv.org/abs/1611.01734>`_ .


:param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即 :param init_embed: 单词词典, 可以是 tuple, 包括(num_embedings, embedding_dim), 即
embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象, embedding的大小和每个词的维度. 也可以传入 nn.Embedding 对象,
@@ -248,18 +258,19 @@ class BiaffineParser(GraphParser):
:param use_greedy_infer: 是否在inference时使用贪心算法. :param use_greedy_infer: 是否在inference时使用贪心算法.
若 ``False`` , 使用更加精确但相对缓慢的MST算法. Default: ``False`` 若 ``False`` , 使用更加精确但相对缓慢的MST算法. Default: ``False``
""" """
def __init__(self, def __init__(self,
init_embed,
pos_vocab_size,
pos_emb_dim,
num_label,
rnn_layers=1,
rnn_hidden_size=200,
arc_mlp_size=100,
label_mlp_size=100,
dropout=0.3,
encoder='lstm',
use_greedy_infer=False):
init_embed,
pos_vocab_size,
pos_emb_dim,
num_label,
rnn_layers=1,
rnn_hidden_size=200,
arc_mlp_size=100,
label_mlp_size=100,
dropout=0.3,
encoder='lstm',
use_greedy_infer=False):
super(BiaffineParser, self).__init__() super(BiaffineParser, self).__init__()
rnn_out_size = 2 * rnn_hidden_size rnn_out_size = 2 * rnn_hidden_size
word_hid_dim = pos_hid_dim = rnn_hidden_size word_hid_dim = pos_hid_dim = rnn_hidden_size
@@ -295,20 +306,20 @@ class BiaffineParser(GraphParser):
if (d_k * n_head) != rnn_out_size: if (d_k * n_head) != rnn_out_size:
raise ValueError('unsupported rnn_out_size: {} for transformer'.format(rnn_out_size)) raise ValueError('unsupported rnn_out_size: {} for transformer'.format(rnn_out_size))
self.position_emb = nn.Embedding(num_embeddings=self.max_len, self.position_emb = nn.Embedding(num_embeddings=self.max_len,
embedding_dim=rnn_out_size,)
embedding_dim=rnn_out_size, )
self.encoder = TransformerEncoder(num_layers=rnn_layers, self.encoder = TransformerEncoder(num_layers=rnn_layers,
model_size=rnn_out_size, model_size=rnn_out_size,
inner_size=1024, inner_size=1024,
key_size=d_k, key_size=d_k,
value_size=d_v, value_size=d_v,
num_head=n_head, num_head=n_head,
dropout=dropout,)
dropout=dropout, )
else: else:
raise ValueError('unsupported encoder type: {}'.format(encoder)) raise ValueError('unsupported encoder type: {}'.format(encoder))
self.mlp = nn.Sequential(nn.Linear(rnn_out_size, arc_mlp_size * 2 + label_mlp_size * 2), self.mlp = nn.Sequential(nn.Linear(rnn_out_size, arc_mlp_size * 2 + label_mlp_size * 2),
nn.ELU(),
TimestepDropout(p=dropout),)
nn.ELU(),
TimestepDropout(p=dropout), )
self.arc_mlp_size = arc_mlp_size self.arc_mlp_size = arc_mlp_size
self.label_mlp_size = label_mlp_size self.label_mlp_size = label_mlp_size
self.arc_predictor = ArcBiaffine(arc_mlp_size, bias=True) self.arc_predictor = ArcBiaffine(arc_mlp_size, bias=True)
@@ -316,7 +327,7 @@ class BiaffineParser(GraphParser):
self.use_greedy_infer = use_greedy_infer self.use_greedy_infer = use_greedy_infer
self.reset_parameters() self.reset_parameters()
self.dropout = dropout self.dropout = dropout
def reset_parameters(self): def reset_parameters(self):
for m in self.modules(): for m in self.modules():
if isinstance(m, nn.Embedding): if isinstance(m, nn.Embedding):
@@ -327,7 +338,7 @@ class BiaffineParser(GraphParser):
else: else:
for p in m.parameters(): for p in m.parameters():
nn.init.normal_(p, 0, 0.1) nn.init.normal_(p, 0, 0.1)
def forward(self, words1, words2, seq_len, target1=None): def forward(self, words1, words2, seq_len, target1=None):
"""模型forward阶段 """模型forward阶段


@@ -337,50 +348,52 @@ class BiaffineParser(GraphParser):
:param target1: [batch_size, seq_len] 输入真实标注的heads, 仅在训练阶段有效, :param target1: [batch_size, seq_len] 输入真实标注的heads, 仅在训练阶段有效,
用于训练label分类器. 若为 ``None`` , 使用预测的heads输入到label分类器 用于训练label分类器. 若为 ``None`` , 使用预测的heads输入到label分类器
Default: ``None`` Default: ``None``
:return dict: parsing结果::
:return dict: parsing
结果::

pred1: [batch_size, seq_len, seq_len] 边预测logits
pred2: [batch_size, seq_len, num_label] label预测logits
pred3: [batch_size, seq_len] heads的预测结果, 在 ``target1=None`` 时预测


pred1: [batch_size, seq_len, seq_len] 边预测logits
pred2: [batch_size, seq_len, num_label] label预测logits
pred3: [batch_size, seq_len] heads的预测结果, 在 ``target1=None`` 时预测
""" """
# prepare embeddings # prepare embeddings
batch_size, length = words1.shape batch_size, length = words1.shape
# print('forward {} {}'.format(batch_size, seq_len)) # print('forward {} {}'.format(batch_size, seq_len))
# get sequence mask # get sequence mask
mask = seq_len_to_mask(seq_len).long() mask = seq_len_to_mask(seq_len).long()
word = self.word_embedding(words1) # [N,L] -> [N,L,C_0]
pos = self.pos_embedding(words2) # [N,L] -> [N,L,C_1]
word = self.word_embedding(words1) # [N,L] -> [N,L,C_0]
pos = self.pos_embedding(words2) # [N,L] -> [N,L,C_1]
word, pos = self.word_fc(word), self.pos_fc(pos) word, pos = self.word_fc(word), self.pos_fc(pos)
word, pos = self.word_norm(word), self.pos_norm(pos) word, pos = self.word_norm(word), self.pos_norm(pos)
x = torch.cat([word, pos], dim=2) # -> [N,L,C]
x = torch.cat([word, pos], dim=2) # -> [N,L,C]
# encoder, extract features # encoder, extract features
if self.encoder_name.endswith('lstm'): if self.encoder_name.endswith('lstm'):
sort_lens, sort_idx = torch.sort(seq_len, dim=0, descending=True) sort_lens, sort_idx = torch.sort(seq_len, dim=0, descending=True)
x = x[sort_idx] x = x[sort_idx]
x = nn.utils.rnn.pack_padded_sequence(x, sort_lens, batch_first=True) x = nn.utils.rnn.pack_padded_sequence(x, sort_lens, batch_first=True)
feat, _ = self.encoder(x) # -> [N,L,C]
feat, _ = self.encoder(x) # -> [N,L,C]
feat, _ = nn.utils.rnn.pad_packed_sequence(feat, batch_first=True) feat, _ = nn.utils.rnn.pad_packed_sequence(feat, batch_first=True)
_, unsort_idx = torch.sort(sort_idx, dim=0, descending=False) _, unsort_idx = torch.sort(sort_idx, dim=0, descending=False)
feat = feat[unsort_idx] feat = feat[unsort_idx]
else: else:
seq_range = torch.arange(length, dtype=torch.long, device=x.device)[None,:]
seq_range = torch.arange(length, dtype=torch.long, device=x.device)[None, :]
x = x + self.position_emb(seq_range) x = x + self.position_emb(seq_range)
feat = self.encoder(x, mask.float()) feat = self.encoder(x, mask.float())
# for arc biaffine # for arc biaffine
# mlp, reduce dim # mlp, reduce dim
feat = self.mlp(feat) feat = self.mlp(feat)
arc_sz, label_sz = self.arc_mlp_size, self.label_mlp_size arc_sz, label_sz = self.arc_mlp_size, self.label_mlp_size
arc_dep, arc_head = feat[:,:,:arc_sz], feat[:,:,arc_sz:2*arc_sz]
label_dep, label_head = feat[:,:,2*arc_sz:2*arc_sz+label_sz], feat[:,:,2*arc_sz+label_sz:]
arc_dep, arc_head = feat[:, :, :arc_sz], feat[:, :, arc_sz:2 * arc_sz]
label_dep, label_head = feat[:, :, 2 * arc_sz:2 * arc_sz + label_sz], feat[:, :, 2 * arc_sz + label_sz:]
# biaffine arc classifier # biaffine arc classifier
arc_pred = self.arc_predictor(arc_head, arc_dep) # [N, L, L]
arc_pred = self.arc_predictor(arc_head, arc_dep) # [N, L, L]
# use gold or predicted arc to predict label # use gold or predicted arc to predict label
if target1 is None or not self.training: if target1 is None or not self.training:
# use greedy decoding in training # use greedy decoding in training
@@ -390,22 +403,22 @@ class BiaffineParser(GraphParser):
heads = self.mst_decoder(arc_pred, mask) heads = self.mst_decoder(arc_pred, mask)
head_pred = heads head_pred = heads
else: else:
assert self.training # must be training mode
assert self.training # must be training mode
if target1 is None: if target1 is None:
heads = self.greedy_decoder(arc_pred, mask) heads = self.greedy_decoder(arc_pred, mask)
head_pred = heads head_pred = heads
else: else:
head_pred = None head_pred = None
heads = target1 heads = target1
batch_range = torch.arange(start=0, end=batch_size, dtype=torch.long, device=words1.device).unsqueeze(1) batch_range = torch.arange(start=0, end=batch_size, dtype=torch.long, device=words1.device).unsqueeze(1)
label_head = label_head[batch_range, heads].contiguous() label_head = label_head[batch_range, heads].contiguous()
label_pred = self.label_predictor(label_head, label_dep) # [N, L, num_label]
label_pred = self.label_predictor(label_head, label_dep) # [N, L, num_label]
res_dict = {C.OUTPUTS(0): arc_pred, C.OUTPUTS(1): label_pred} res_dict = {C.OUTPUTS(0): arc_pred, C.OUTPUTS(1): label_pred}
if head_pred is not None: if head_pred is not None:
res_dict[C.OUTPUTS(2)] = head_pred res_dict[C.OUTPUTS(2)] = head_pred
return res_dict return res_dict
@staticmethod @staticmethod
def loss(pred1, pred2, target1, target2, seq_len): def loss(pred1, pred2, target1, target2, seq_len):
""" """
@@ -418,7 +431,7 @@ class BiaffineParser(GraphParser):
:param seq_len: [batch_size, seq_len] 真实目标的长度 :param seq_len: [batch_size, seq_len] 真实目标的长度
:return loss: scalar :return loss: scalar
""" """
batch_size, length, _ = pred1.shape batch_size, length, _ = pred1.shape
mask = seq_len_to_mask(seq_len) mask = seq_len_to_mask(seq_len)
flip_mask = (mask == 0) flip_mask = (mask == 0)
@@ -430,24 +443,26 @@ class BiaffineParser(GraphParser):
child_index = torch.arange(length, device=arc_logits.device, dtype=torch.long).unsqueeze(0) child_index = torch.arange(length, device=arc_logits.device, dtype=torch.long).unsqueeze(0)
arc_loss = arc_logits[batch_index, child_index, target1] arc_loss = arc_logits[batch_index, child_index, target1]
label_loss = label_logits[batch_index, child_index, target2] label_loss = label_logits[batch_index, child_index, target2]
byte_mask = flip_mask.byte() byte_mask = flip_mask.byte()
arc_loss.masked_fill_(byte_mask, 0) arc_loss.masked_fill_(byte_mask, 0)
label_loss.masked_fill_(byte_mask, 0) label_loss.masked_fill_(byte_mask, 0)
arc_nll = -arc_loss.mean() arc_nll = -arc_loss.mean()
label_nll = -label_loss.mean() label_nll = -label_loss.mean()
return arc_nll + label_nll return arc_nll + label_nll
def predict(self, words1, words2, seq_len): def predict(self, words1, words2, seq_len):
"""模型预测API """模型预测API


:param words1: [batch_size, seq_len] 输入word序列 :param words1: [batch_size, seq_len] 输入word序列
:param words2: [batch_size, seq_len] 输入pos序列 :param words2: [batch_size, seq_len] 输入pos序列
:param seq_len: [batch_size, seq_len] 输入序列长度 :param seq_len: [batch_size, seq_len] 输入序列长度
:return dict: parsing结果::
:return dict: parsing
结果::

pred1: [batch_size, seq_len] heads的预测结果
pred2: [batch_size, seq_len, num_label] label预测logits


pred1: [batch_size, seq_len] heads的预测结果
pred2: [batch_size, seq_len, num_label] label预测logits
""" """
res = self(words1, words2, seq_len) res = self(words1, words2, seq_len)
output = {} output = {}
@@ -470,6 +485,7 @@ class ParserLoss(LossFunc):
:param seq_len: [batch_size, seq_len] 真实目标的长度 :param seq_len: [batch_size, seq_len] 真实目标的长度
:return loss: scalar :return loss: scalar
""" """
def __init__(self, pred1=None, pred2=None, def __init__(self, pred1=None, pred2=None,
target1=None, target2=None, target1=None, target2=None,
seq_len=None): seq_len=None):
@@ -497,9 +513,10 @@ class ParserMetric(MetricBase):
UAS: 不带label时, 边预测的准确率 UAS: 不带label时, 边预测的准确率
LAS: 同时预测边和label的准确率 LAS: 同时预测边和label的准确率
""" """
def __init__(self, pred1=None, pred2=None, def __init__(self, pred1=None, pred2=None,
target1=None, target2=None, seq_len=None): target1=None, target2=None, seq_len=None):
super().__init__() super().__init__()
self._init_param_map(pred1=pred1, pred2=pred2, self._init_param_map(pred1=pred1, pred2=pred2,
target1=target1, target2=target2, target1=target1, target2=target2,
@@ -507,13 +524,13 @@ class ParserMetric(MetricBase):
self.num_arc = 0 self.num_arc = 0
self.num_label = 0 self.num_label = 0
self.num_sample = 0 self.num_sample = 0
def get_metric(self, reset=True): def get_metric(self, reset=True):
res = {'UAS': self.num_arc*1.0 / self.num_sample, 'LAS': self.num_label*1.0 / self.num_sample}
res = {'UAS': self.num_arc * 1.0 / self.num_sample, 'LAS': self.num_label * 1.0 / self.num_sample}
if reset: if reset:
self.num_sample = self.num_label = self.num_arc = 0 self.num_sample = self.num_label = self.num_arc = 0
return res return res
def evaluate(self, pred1, pred2, target1, target2, seq_len=None): def evaluate(self, pred1, pred2, target1, target2, seq_len=None):
"""Evaluate the performance of prediction. """Evaluate the performance of prediction.
""" """
@@ -522,7 +539,7 @@ class ParserMetric(MetricBase):
else: else:
seq_mask = seq_len_to_mask(seq_len.long()).long() seq_mask = seq_len_to_mask(seq_len.long()).long()
# mask out <root> tag # mask out <root> tag
seq_mask[:,0] = 0
seq_mask[:, 0] = 0
head_pred_correct = (pred1 == target1).long() * seq_mask head_pred_correct = (pred1 == target1).long() * seq_mask
label_pred_correct = (pred2 == target2).long() * head_pred_correct label_pred_correct = (pred2 == target2).long() * head_pred_correct
self.num_arc += head_pred_correct.sum().item() self.num_arc += head_pred_correct.sum().item()


+ 9
- 8
fastNLP/models/cnn_text_classification.py View File

@@ -1,12 +1,13 @@
# python: 3.6
# encoding: utf-8

import torch import torch
import torch.nn as nn import torch.nn as nn
from ..core.const import Const as C


from ..core.const import Const as C
from ..modules import encoder from ..modules import encoder


__all__ = [
"CNNText"
]



class CNNText(torch.nn.Module): class CNNText(torch.nn.Module):
""" """
@@ -23,7 +24,7 @@ class CNNText(torch.nn.Module):
:param int padding: 对句子前后的pad的大小, 用0填充。 :param int padding: 对句子前后的pad的大小, 用0填充。
:param float dropout: Dropout的大小 :param float dropout: Dropout的大小
""" """
def __init__(self, init_embed, def __init__(self, init_embed,
num_classes, num_classes,
kernel_nums=(3, 4, 5), kernel_nums=(3, 4, 5),
@@ -31,7 +32,7 @@ class CNNText(torch.nn.Module):
padding=0, padding=0,
dropout=0.5): dropout=0.5):
super(CNNText, self).__init__() super(CNNText, self).__init__()
# no support for pre-trained embedding currently # no support for pre-trained embedding currently
self.embed = encoder.Embedding(init_embed) self.embed = encoder.Embedding(init_embed)
self.conv_pool = encoder.ConvMaxpool( self.conv_pool = encoder.ConvMaxpool(
@@ -41,7 +42,7 @@ class CNNText(torch.nn.Module):
padding=padding) padding=padding)
self.dropout = nn.Dropout(dropout) self.dropout = nn.Dropout(dropout)
self.fc = nn.Linear(sum(kernel_nums), num_classes) self.fc = nn.Linear(sum(kernel_nums), num_classes)
def forward(self, words, seq_len=None): def forward(self, words, seq_len=None):
""" """


@@ -54,7 +55,7 @@ class CNNText(torch.nn.Module):
x = self.dropout(x) x = self.dropout(x)
x = self.fc(x) # [N,C] -> [N, N_class] x = self.fc(x) # [N,C] -> [N, N_class]
return {C.OUTPUT: x} return {C.OUTPUT: x}
def predict(self, words, seq_len=None): def predict(self, words, seq_len=None):
""" """
:param torch.LongTensor words: [batch_size, seq_len],句子中word的index :param torch.LongTensor words: [batch_size, seq_len],句子中word的index


+ 1
- 0
fastNLP/models/enas_controller.py View File

@@ -5,6 +5,7 @@ import os


import torch import torch
import torch.nn.functional as F import torch.nn.functional as F

from . import enas_utils as utils from . import enas_utils as utils
from .enas_utils import Node from .enas_utils import Node




+ 71
- 68
fastNLP/models/enas_model.py View File

@@ -1,17 +1,19 @@
# Code Modified from https://github.com/carpedm20/ENAS-pytorch
"""Module containing the shared RNN model."""
import numpy as np
"""
Module containing the shared RNN model.
Code Modified from https://github.com/carpedm20/ENAS-pytorch
"""
import collections import collections


import numpy as np
import torch import torch
from torch import nn
import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch.autograd import Variable from torch.autograd import Variable


from . import enas_utils as utils from . import enas_utils as utils
from .base_model import BaseModel from .base_model import BaseModel



def _get_dropped_weights(w_raw, dropout_p, is_training): def _get_dropped_weights(w_raw, dropout_p, is_training):
"""Drops out weights to implement DropConnect. """Drops out weights to implement DropConnect.


@@ -35,12 +37,13 @@ def _get_dropped_weights(w_raw, dropout_p, is_training):
The above TODO is the reason for the hacky check for `torch.nn.Parameter`. The above TODO is the reason for the hacky check for `torch.nn.Parameter`.
""" """
dropped_w = F.dropout(w_raw, p=dropout_p, training=is_training) dropped_w = F.dropout(w_raw, p=dropout_p, training=is_training)
if isinstance(dropped_w, torch.nn.Parameter): if isinstance(dropped_w, torch.nn.Parameter):
dropped_w = dropped_w.clone() dropped_w = dropped_w.clone()
return dropped_w return dropped_w



class EmbeddingDropout(torch.nn.Embedding): class EmbeddingDropout(torch.nn.Embedding):
"""Class for dropping out embeddings by zero'ing out parameters in the """Class for dropping out embeddings by zero'ing out parameters in the
embedding matrix. embedding matrix.
@@ -53,6 +56,7 @@ class EmbeddingDropout(torch.nn.Embedding):
See 'A Theoretically Grounded Application of Dropout in Recurrent Neural See 'A Theoretically Grounded Application of Dropout in Recurrent Neural
Networks', (Gal and Ghahramani, 2016). Networks', (Gal and Ghahramani, 2016).
""" """
def __init__(self, def __init__(self,
num_embeddings, num_embeddings,
embedding_dim, embedding_dim,
@@ -83,14 +87,14 @@ class EmbeddingDropout(torch.nn.Embedding):
assert (dropout >= 0.0) and (dropout < 1.0), ('Dropout must be >= 0.0 ' assert (dropout >= 0.0) and (dropout < 1.0), ('Dropout must be >= 0.0 '
'and < 1.0') 'and < 1.0')
self.scale = scale self.scale = scale
def forward(self, inputs): # pylint:disable=arguments-differ def forward(self, inputs): # pylint:disable=arguments-differ
"""Embeds `inputs` with the dropped out embedding weight matrix.""" """Embeds `inputs` with the dropped out embedding weight matrix."""
if self.training: if self.training:
dropout = self.dropout dropout = self.dropout
else: else:
dropout = 0 dropout = 0
if dropout: if dropout:
mask = self.weight.data.new(self.weight.size(0), 1) mask = self.weight.data.new(self.weight.size(0), 1)
mask.bernoulli_(1 - dropout) mask.bernoulli_(1 - dropout)
@@ -101,7 +105,7 @@ class EmbeddingDropout(torch.nn.Embedding):
masked_weight = self.weight masked_weight = self.weight
if self.scale and self.scale != 1: if self.scale and self.scale != 1:
masked_weight = masked_weight * self.scale masked_weight = masked_weight * self.scale
return F.embedding(inputs, return F.embedding(inputs,
masked_weight, masked_weight,
max_norm=self.max_norm, max_norm=self.max_norm,
@@ -114,7 +118,7 @@ class LockedDropout(nn.Module):
# code from https://github.com/salesforce/awd-lstm-lm/blob/master/locked_dropout.py # code from https://github.com/salesforce/awd-lstm-lm/blob/master/locked_dropout.py
def __init__(self): def __init__(self):
super().__init__() super().__init__()
def forward(self, x, dropout=0.5): def forward(self, x, dropout=0.5):
if not self.training or not dropout: if not self.training or not dropout:
return x return x
@@ -126,11 +130,12 @@ class LockedDropout(nn.Module):


class ENASModel(BaseModel): class ENASModel(BaseModel):
"""Shared RNN model.""" """Shared RNN model."""
def __init__(self, embed_num, num_classes, num_blocks=4, cuda=False, shared_hid=1000, shared_embed=1000): def __init__(self, embed_num, num_classes, num_blocks=4, cuda=False, shared_hid=1000, shared_embed=1000):
super(ENASModel, self).__init__() super(ENASModel, self).__init__()
self.use_cuda = cuda self.use_cuda = cuda
self.shared_hid = shared_hid self.shared_hid = shared_hid
self.num_blocks = num_blocks self.num_blocks = num_blocks
self.decoder = nn.Linear(self.shared_hid, num_classes) self.decoder = nn.Linear(self.shared_hid, num_classes)
@@ -139,16 +144,16 @@ class ENASModel(BaseModel):
dropout=0.1) dropout=0.1)
self.lockdrop = LockedDropout() self.lockdrop = LockedDropout()
self.dag = None self.dag = None
# Tie weights # Tie weights
# self.decoder.weight = self.encoder.weight # self.decoder.weight = self.encoder.weight
# Since W^{x, c} and W^{h, c} are always summed, there # Since W^{x, c} and W^{h, c} are always summed, there
# is no point duplicating their bias offset parameter. Likewise for # is no point duplicating their bias offset parameter. Likewise for
# W^{x, h} and W^{h, h}. # W^{x, h} and W^{h, h}.
self.w_xc = nn.Linear(shared_embed, self.shared_hid) self.w_xc = nn.Linear(shared_embed, self.shared_hid)
self.w_xh = nn.Linear(shared_embed, self.shared_hid) self.w_xh = nn.Linear(shared_embed, self.shared_hid)
# The raw weights are stored here because the hidden-to-hidden weights # The raw weights are stored here because the hidden-to-hidden weights
# are weight dropped on the forward pass. # are weight dropped on the forward pass.
self.w_hc_raw = torch.nn.Parameter( self.w_hc_raw = torch.nn.Parameter(
@@ -157,10 +162,10 @@ class ENASModel(BaseModel):
torch.Tensor(self.shared_hid, self.shared_hid)) torch.Tensor(self.shared_hid, self.shared_hid))
self.w_hc = None self.w_hc = None
self.w_hh = None self.w_hh = None
self.w_h = collections.defaultdict(dict) self.w_h = collections.defaultdict(dict)
self.w_c = collections.defaultdict(dict) self.w_c = collections.defaultdict(dict)
for idx in range(self.num_blocks): for idx in range(self.num_blocks):
for jdx in range(idx + 1, self.num_blocks): for jdx in range(idx + 1, self.num_blocks):
self.w_h[idx][jdx] = nn.Linear(self.shared_hid, self.w_h[idx][jdx] = nn.Linear(self.shared_hid,
@@ -169,48 +174,47 @@ class ENASModel(BaseModel):
self.w_c[idx][jdx] = nn.Linear(self.shared_hid, self.w_c[idx][jdx] = nn.Linear(self.shared_hid,
self.shared_hid, self.shared_hid,
bias=False) bias=False)
self._w_h = nn.ModuleList([self.w_h[idx][jdx] self._w_h = nn.ModuleList([self.w_h[idx][jdx]
for idx in self.w_h for idx in self.w_h
for jdx in self.w_h[idx]]) for jdx in self.w_h[idx]])
self._w_c = nn.ModuleList([self.w_c[idx][jdx] self._w_c = nn.ModuleList([self.w_c[idx][jdx]
for idx in self.w_c for idx in self.w_c
for jdx in self.w_c[idx]]) for jdx in self.w_c[idx]])
self.batch_norm = None self.batch_norm = None
# if args.mode == 'train': # if args.mode == 'train':
# self.batch_norm = nn.BatchNorm1d(self.shared_hid) # self.batch_norm = nn.BatchNorm1d(self.shared_hid)
# else: # else:
# self.batch_norm = None # self.batch_norm = None
self.reset_parameters() self.reset_parameters()
self.static_init_hidden = utils.keydefaultdict(self.init_hidden) self.static_init_hidden = utils.keydefaultdict(self.init_hidden)
def setDAG(self, dag): def setDAG(self, dag):
if self.dag is None: if self.dag is None:
self.dag = dag self.dag = dag
def forward(self, word_seq, hidden=None): def forward(self, word_seq, hidden=None):
inputs = torch.transpose(word_seq, 0, 1) inputs = torch.transpose(word_seq, 0, 1)
time_steps = inputs.size(0) time_steps = inputs.size(0)
batch_size = inputs.size(1) batch_size = inputs.size(1)


self.w_hh = _get_dropped_weights(self.w_hh_raw, self.w_hh = _get_dropped_weights(self.w_hh_raw,
0.5, 0.5,
self.training) self.training)
self.w_hc = _get_dropped_weights(self.w_hc_raw, self.w_hc = _get_dropped_weights(self.w_hc_raw,
0.5, 0.5,
self.training) self.training)
# hidden = self.static_init_hidden[batch_size] if hidden is None else hidden # hidden = self.static_init_hidden[batch_size] if hidden is None else hidden
hidden = self.static_init_hidden[batch_size] hidden = self.static_init_hidden[batch_size]
embed = self.encoder(inputs) embed = self.encoder(inputs)
embed = self.lockdrop(embed, 0.65 if self.training else 0) embed = self.lockdrop(embed, 0.65 if self.training else 0)
# The norm of hidden states are clipped here because # The norm of hidden states are clipped here because
# otherwise ENAS is especially prone to exploding activations on the # otherwise ENAS is especially prone to exploding activations on the
# forward pass. This could probably be fixed in a more elegant way, but # forward pass. This could probably be fixed in a more elegant way, but
@@ -226,7 +230,7 @@ class ENASModel(BaseModel):
for step in range(time_steps): for step in range(time_steps):
x_t = embed[step] x_t = embed[step]
logit, hidden = self.cell(x_t, hidden, self.dag) logit, hidden = self.cell(x_t, hidden, self.dag)
hidden_norms = hidden.norm(dim=-1) hidden_norms = hidden.norm(dim=-1)
max_norm = 25.0 max_norm = 25.0
if hidden_norms.data.max() > max_norm: if hidden_norms.data.max() > max_norm:
@@ -237,60 +241,60 @@ class ENASModel(BaseModel):
# because the PyTorch slicing and slice assignment is too # because the PyTorch slicing and slice assignment is too
# flaky. # flaky.
hidden_norms = hidden_norms.data.cpu().numpy() hidden_norms = hidden_norms.data.cpu().numpy()
clipped_num += 1 clipped_num += 1
if hidden_norms.max() > max_clipped_norm: if hidden_norms.max() > max_clipped_norm:
max_clipped_norm = hidden_norms.max() max_clipped_norm = hidden_norms.max()
clip_select = hidden_norms > max_norm clip_select = hidden_norms > max_norm
clip_norms = hidden_norms[clip_select] clip_norms = hidden_norms[clip_select]
mask = np.ones(hidden.size()) mask = np.ones(hidden.size())
normalizer = max_norm/clip_norms
normalizer = max_norm / clip_norms
normalizer = normalizer[:, np.newaxis] normalizer = normalizer[:, np.newaxis]
mask[clip_select] = normalizer mask[clip_select] = normalizer
if self.use_cuda: if self.use_cuda:
hidden *= torch.autograd.Variable( hidden *= torch.autograd.Variable(
torch.FloatTensor(mask).cuda(), requires_grad=False) torch.FloatTensor(mask).cuda(), requires_grad=False)
else: else:
hidden *= torch.autograd.Variable( hidden *= torch.autograd.Variable(
torch.FloatTensor(mask), requires_grad=False)
torch.FloatTensor(mask), requires_grad=False)
logits.append(logit) logits.append(logit)
h1tohT.append(hidden) h1tohT.append(hidden)
h1tohT = torch.stack(h1tohT) h1tohT = torch.stack(h1tohT)
output = torch.stack(logits) output = torch.stack(logits)
raw_output = output raw_output = output
output = self.lockdrop(output, 0.4 if self.training else 0) output = self.lockdrop(output, 0.4 if self.training else 0)
#Pooling
# Pooling
output = torch.mean(output, 0) output = torch.mean(output, 0)
decoded = self.decoder(output) decoded = self.decoder(output)
extra_out = {'dropped': decoded, extra_out = {'dropped': decoded,
'hiddens': h1tohT, 'hiddens': h1tohT,
'raw': raw_output} 'raw': raw_output}
return {'pred': decoded, 'hidden': hidden, 'extra_out': extra_out} return {'pred': decoded, 'hidden': hidden, 'extra_out': extra_out}
def cell(self, x, h_prev, dag): def cell(self, x, h_prev, dag):
"""Computes a single pass through the discovered RNN cell.""" """Computes a single pass through the discovered RNN cell."""
c = {} c = {}
h = {} h = {}
f = {} f = {}
f[0] = self.get_f(dag[-1][0].name) f[0] = self.get_f(dag[-1][0].name)
c[0] = torch.sigmoid(self.w_xc(x) + F.linear(h_prev, self.w_hc, None)) c[0] = torch.sigmoid(self.w_xc(x) + F.linear(h_prev, self.w_hc, None))
h[0] = (c[0]*f[0](self.w_xh(x) + F.linear(h_prev, self.w_hh, None)) +
(1 - c[0])*h_prev)
h[0] = (c[0] * f[0](self.w_xh(x) + F.linear(h_prev, self.w_hh, None)) +
(1 - c[0]) * h_prev)
leaf_node_ids = [] leaf_node_ids = []
q = collections.deque() q = collections.deque()
q.append(0) q.append(0)
# Computes connections from the parent nodes `node_id` # Computes connections from the parent nodes `node_id`
# to their child nodes `next_id` recursively, skipping leaf nodes. A # to their child nodes `next_id` recursively, skipping leaf nodes. A
# leaf node is a node whose id == `self.num_blocks`. # leaf node is a node whose id == `self.num_blocks`.
@@ -306,10 +310,10 @@ class ENASModel(BaseModel):
while True: while True:
if len(q) == 0: if len(q) == 0:
break break
node_id = q.popleft() node_id = q.popleft()
nodes = dag[node_id] nodes = dag[node_id]
for next_node in nodes: for next_node in nodes:
next_id = next_node.id next_id = next_node.id
if next_id == self.num_blocks: if next_id == self.num_blocks:
@@ -317,38 +321,38 @@ class ENASModel(BaseModel):
assert len(nodes) == 1, ('parent of leaf node should have ' assert len(nodes) == 1, ('parent of leaf node should have '
'only one child') 'only one child')
continue continue
w_h = self.w_h[node_id][next_id] w_h = self.w_h[node_id][next_id]
w_c = self.w_c[node_id][next_id] w_c = self.w_c[node_id][next_id]
f[next_id] = self.get_f(next_node.name) f[next_id] = self.get_f(next_node.name)
c[next_id] = torch.sigmoid(w_c(h[node_id])) c[next_id] = torch.sigmoid(w_c(h[node_id]))
h[next_id] = (c[next_id]*f[next_id](w_h(h[node_id])) +
(1 - c[next_id])*h[node_id])
h[next_id] = (c[next_id] * f[next_id](w_h(h[node_id])) +
(1 - c[next_id]) * h[node_id])
q.append(next_id) q.append(next_id)
# Instead of averaging loose ends, perhaps there should # Instead of averaging loose ends, perhaps there should
# be a set of separate unshared weights for each "loose" connection # be a set of separate unshared weights for each "loose" connection
# between each node in a cell and the output. # between each node in a cell and the output.
# #
# As it stands, all weights W^h_{ij} are doing double duty by # As it stands, all weights W^h_{ij} are doing double duty by
# connecting both from i to j, as well as from i to the output. # connecting both from i to j, as well as from i to the output.
# average all the loose ends # average all the loose ends
leaf_nodes = [h[node_id] for node_id in leaf_node_ids] leaf_nodes = [h[node_id] for node_id in leaf_node_ids]
output = torch.mean(torch.stack(leaf_nodes, 2), -1) output = torch.mean(torch.stack(leaf_nodes, 2), -1)
# stabilizing the Updates of omega # stabilizing the Updates of omega
if self.batch_norm is not None: if self.batch_norm is not None:
output = self.batch_norm(output) output = self.batch_norm(output)
return output, h[self.num_blocks - 1] return output, h[self.num_blocks - 1]
def init_hidden(self, batch_size): def init_hidden(self, batch_size):
zeros = torch.zeros(batch_size, self.shared_hid) zeros = torch.zeros(batch_size, self.shared_hid)
return utils.get_variable(zeros, self.use_cuda, requires_grad=False) return utils.get_variable(zeros, self.use_cuda, requires_grad=False)
def get_f(self, name): def get_f(self, name):
name = name.lower() name = name.lower()
if name == 'relu': if name == 'relu':
@@ -360,22 +364,21 @@ class ENASModel(BaseModel):
elif name == 'sigmoid': elif name == 'sigmoid':
f = torch.sigmoid f = torch.sigmoid
return f return f

@property @property
def num_parameters(self): def num_parameters(self):
def size(p): def size(p):
return np.prod(p.size()) return np.prod(p.size())
return sum([size(param) for param in self.parameters()]) return sum([size(param) for param in self.parameters()])


def reset_parameters(self): def reset_parameters(self):
init_range = 0.025 init_range = 0.025
# init_range = 0.025 if self.args.mode == 'train' else 0.04 # init_range = 0.025 if self.args.mode == 'train' else 0.04
for param in self.parameters(): for param in self.parameters():
param.data.uniform_(-init_range, init_range) param.data.uniform_(-init_range, init_range)
self.decoder.bias.data.fill_(0) self.decoder.bias.data.fill_(0)
def predict(self, word_seq): def predict(self, word_seq):
""" """




+ 69
- 72
fastNLP/models/enas_trainer.py View File

@@ -1,12 +1,12 @@
# Code Modified from https://github.com/carpedm20/ENAS-pytorch # Code Modified from https://github.com/carpedm20/ENAS-pytorch

import time
from datetime import datetime
from datetime import timedelta

import math
import numpy as np import numpy as np
import time
import torch import torch
import math

from datetime import datetime, timedelta

from torch.optim import Adam


try: try:
from tqdm.auto import tqdm from tqdm.auto import tqdm
@@ -21,8 +21,6 @@ from ..core.utils import _move_dict_value_to_device
from . import enas_utils as utils from . import enas_utils as utils
from ..core.utils import _build_args from ..core.utils import _build_args


from torch.optim import Adam



def _get_no_grad_ctx_mgr(): def _get_no_grad_ctx_mgr():
"""Returns a the `torch.no_grad` context manager for PyTorch version >= """Returns a the `torch.no_grad` context manager for PyTorch version >=
@@ -33,6 +31,7 @@ def _get_no_grad_ctx_mgr():


class ENASTrainer(Trainer): class ENASTrainer(Trainer):
"""A class to wrap training code.""" """A class to wrap training code."""
def __init__(self, train_data, model, controller, **kwargs): def __init__(self, train_data, model, controller, **kwargs):
"""Constructor for training algorithm. """Constructor for training algorithm.
:param DataSet train_data: the training data :param DataSet train_data: the training data
@@ -45,19 +44,19 @@ class ENASTrainer(Trainer):
self.controller_step = 0 self.controller_step = 0
self.shared_step = 0 self.shared_step = 0
self.max_length = 35 self.max_length = 35
self.shared = model self.shared = model
self.controller = controller self.controller = controller
self.shared_optim = Adam( self.shared_optim = Adam(
self.shared.parameters(), self.shared.parameters(),
lr=20.0, lr=20.0,
weight_decay=1e-7) weight_decay=1e-7)
self.controller_optim = Adam( self.controller_optim = Adam(
self.controller.parameters(), self.controller.parameters(),
lr=3.5e-4) lr=3.5e-4)
def train(self, load_best_model=True): def train(self, load_best_model=True):
""" """
:param bool load_best_model: 该参数只有在初始化提供了dev_data的情况下有效,如果True, trainer将在返回之前重新加载dev表现 :param bool load_best_model: 该参数只有在初始化提供了dev_data的情况下有效,如果True, trainer将在返回之前重新加载dev表现
@@ -82,21 +81,22 @@ class ENASTrainer(Trainer):
self.model = self.model.cuda() self.model = self.model.cuda()
self._model_device = self.model.parameters().__next__().device self._model_device = self.model.parameters().__next__().device
self._mode(self.model, is_test=False) self._mode(self.model, is_test=False)
self.start_time = str(datetime.now().strftime('%Y-%m-%d-%H-%M-%S')) self.start_time = str(datetime.now().strftime('%Y-%m-%d-%H-%M-%S'))
start_time = time.time() start_time = time.time()
print("training epochs started " + self.start_time, flush=True) print("training epochs started " + self.start_time, flush=True)
try: try:
self.callback_manager.on_train_begin() self.callback_manager.on_train_begin()
self._train() self._train()
self.callback_manager.on_train_end() self.callback_manager.on_train_end()
except (CallbackException, KeyboardInterrupt) as e: except (CallbackException, KeyboardInterrupt) as e:
self.callback_manager.on_exception(e) self.callback_manager.on_exception(e)
if self.dev_data is not None: if self.dev_data is not None:
print("\nIn Epoch:{}/Step:{}, got best dev performance:".format(self.best_dev_epoch, self.best_dev_step) +
self.tester._format_eval_results(self.best_dev_perf),)
print(
"\nIn Epoch:{}/Step:{}, got best dev performance:".format(self.best_dev_epoch, self.best_dev_step) +
self.tester._format_eval_results(self.best_dev_perf), )
results['best_eval'] = self.best_dev_perf results['best_eval'] = self.best_dev_perf
results['best_epoch'] = self.best_dev_epoch results['best_epoch'] = self.best_dev_epoch
results['best_step'] = self.best_dev_step results['best_step'] = self.best_dev_step
@@ -110,9 +110,9 @@ class ENASTrainer(Trainer):
finally: finally:
pass pass
results['seconds'] = round(time.time() - start_time, 2) results['seconds'] = round(time.time() - start_time, 2)
return results return results
def _train(self): def _train(self):
if not self.use_tqdm: if not self.use_tqdm:
from fastNLP.core.utils import _pseudo_tqdm as inner_tqdm from fastNLP.core.utils import _pseudo_tqdm as inner_tqdm
@@ -126,21 +126,21 @@ class ENASTrainer(Trainer):
avg_loss = 0 avg_loss = 0
data_iterator = Batch(self.train_data, batch_size=self.batch_size, sampler=self.sampler, as_numpy=False, data_iterator = Batch(self.train_data, batch_size=self.batch_size, sampler=self.sampler, as_numpy=False,
prefetch=self.prefetch) prefetch=self.prefetch)
for epoch in range(1, self.n_epochs+1):
for epoch in range(1, self.n_epochs + 1):
pbar.set_description_str(desc="Epoch {}/{}".format(epoch, self.n_epochs)) pbar.set_description_str(desc="Epoch {}/{}".format(epoch, self.n_epochs))
last_stage = (epoch > self.n_epochs + 1 - self.final_epochs) last_stage = (epoch > self.n_epochs + 1 - self.final_epochs)
if epoch == self.n_epochs + 1 - self.final_epochs: if epoch == self.n_epochs + 1 - self.final_epochs:
print('Entering the final stage. (Only train the selected structure)') print('Entering the final stage. (Only train the selected structure)')
# early stopping # early stopping
self.callback_manager.on_epoch_begin() self.callback_manager.on_epoch_begin()
# 1. Training the shared parameters omega of the child models # 1. Training the shared parameters omega of the child models
self.train_shared(pbar) self.train_shared(pbar)
# 2. Training the controller parameters theta # 2. Training the controller parameters theta
if not last_stage: if not last_stage:
self.train_controller() self.train_controller()
if ((self.validate_every > 0 and self.step % self.validate_every == 0) or if ((self.validate_every > 0 and self.step % self.validate_every == 0) or
(self.validate_every < 0 and self.step % len(data_iterator) == 0)) \ (self.validate_every < 0 and self.step % len(data_iterator) == 0)) \
and self.dev_data is not None: and self.dev_data is not None:
@@ -149,16 +149,15 @@ class ENASTrainer(Trainer):
eval_res = self._do_validation(epoch=epoch, step=self.step) eval_res = self._do_validation(epoch=epoch, step=self.step)
eval_str = "Evaluation at Epoch {}/{}. Step:{}/{}. ".format(epoch, self.n_epochs, self.step, eval_str = "Evaluation at Epoch {}/{}. Step:{}/{}. ".format(epoch, self.n_epochs, self.step,
total_steps) + \ total_steps) + \
self.tester._format_eval_results(eval_res)
self.tester._format_eval_results(eval_res)
pbar.write(eval_str) pbar.write(eval_str)
# lr decay; early stopping # lr decay; early stopping
self.callback_manager.on_epoch_end() self.callback_manager.on_epoch_end()
# =============== epochs end =================== # # =============== epochs end =================== #
pbar.close() pbar.close()
# ============ tqdm end ============== # # ============ tqdm end ============== #


def get_loss(self, inputs, targets, hidden, dags): def get_loss(self, inputs, targets, hidden, dags):
"""Computes the loss for the same batch for M models. """Computes the loss for the same batch for M models.


@@ -167,7 +166,7 @@ class ENASTrainer(Trainer):
""" """
if not isinstance(dags, list): if not isinstance(dags, list):
dags = [dags] dags = [dags]
loss = 0 loss = 0
for dag in dags: for dag in dags:
self.shared.setDAG(dag) self.shared.setDAG(dag)
@@ -175,14 +174,14 @@ class ENASTrainer(Trainer):
inputs['hidden'] = hidden inputs['hidden'] = hidden
result = self.shared(**inputs) result = self.shared(**inputs)
output, hidden, extra_out = result['pred'], result['hidden'], result['extra_out'] output, hidden, extra_out = result['pred'], result['hidden'], result['extra_out']
self.callback_manager.on_loss_begin(targets, result) self.callback_manager.on_loss_begin(targets, result)
sample_loss = self._compute_loss(result, targets) sample_loss = self._compute_loss(result, targets)
loss += sample_loss loss += sample_loss
assert len(dags) == 1, 'there are multiple `hidden` for multple `dags`' assert len(dags) == 1, 'there are multiple `hidden` for multple `dags`'
return loss, hidden, extra_out return loss, hidden, extra_out
def train_shared(self, pbar=None, max_step=None, dag=None): def train_shared(self, pbar=None, max_step=None, dag=None):
"""Train the language model for 400 steps of minibatches of 64 """Train the language model for 400 steps of minibatches of 64
examples. examples.
@@ -200,9 +199,9 @@ class ENASTrainer(Trainer):
model = self.shared model = self.shared
model.train() model.train()
self.controller.eval() self.controller.eval()
hidden = self.shared.init_hidden(self.batch_size) hidden = self.shared.init_hidden(self.batch_size)
abs_max_grad = 0 abs_max_grad = 0
abs_max_hidden_norm = 0 abs_max_hidden_norm = 0
step = 0 step = 0
@@ -211,15 +210,15 @@ class ENASTrainer(Trainer):
train_idx = 0 train_idx = 0
avg_loss = 0 avg_loss = 0
data_iterator = Batch(self.train_data, batch_size=self.batch_size, sampler=self.sampler, as_numpy=False, data_iterator = Batch(self.train_data, batch_size=self.batch_size, sampler=self.sampler, as_numpy=False,
prefetch=self.prefetch)
prefetch=self.prefetch)
for batch_x, batch_y in data_iterator: for batch_x, batch_y in data_iterator:
_move_dict_value_to_device(batch_x, batch_y, device=self._model_device) _move_dict_value_to_device(batch_x, batch_y, device=self._model_device)
indices = data_iterator.get_batch_indices() indices = data_iterator.get_batch_indices()
# negative sampling; replace unknown; re-weight batch_y # negative sampling; replace unknown; re-weight batch_y
self.callback_manager.on_batch_begin(batch_x, batch_y, indices) self.callback_manager.on_batch_begin(batch_x, batch_y, indices)
# prediction = self._data_forward(self.model, batch_x) # prediction = self._data_forward(self.model, batch_x)
dags = self.controller.sample(1) dags = self.controller.sample(1)
inputs, targets = batch_x, batch_y inputs, targets = batch_x, batch_y
# self.callback_manager.on_loss_begin(batch_y, prediction) # self.callback_manager.on_loss_begin(batch_y, prediction)
@@ -228,18 +227,18 @@ class ENASTrainer(Trainer):
hidden, hidden,
dags) dags)
hidden.detach_() hidden.detach_()
avg_loss += loss.item() avg_loss += loss.item()
# Is loss NaN or inf? requires_grad = False # Is loss NaN or inf? requires_grad = False
self.callback_manager.on_backward_begin(loss) self.callback_manager.on_backward_begin(loss)
self._grad_backward(loss) self._grad_backward(loss)
self.callback_manager.on_backward_end() self.callback_manager.on_backward_end()
self._update() self._update()
self.callback_manager.on_step_end() self.callback_manager.on_step_end()
if (self.step+1) % self.print_every == 0:
if (self.step + 1) % self.print_every == 0:
if self.use_tqdm: if self.use_tqdm:
print_output = "loss:{0:<6.5f}".format(avg_loss / self.print_every) print_output = "loss:{0:<6.5f}".format(avg_loss / self.print_every)
pbar.update(self.print_every) pbar.update(self.print_every)
@@ -255,30 +254,29 @@ class ENASTrainer(Trainer):
self.shared_step += 1 self.shared_step += 1
self.callback_manager.on_batch_end() self.callback_manager.on_batch_end()
# ================= mini-batch end ==================== # # ================= mini-batch end ==================== #


def get_reward(self, dag, entropies, hidden, valid_idx=0): def get_reward(self, dag, entropies, hidden, valid_idx=0):
"""Computes the perplexity of a single sampled model on a minibatch of """Computes the perplexity of a single sampled model on a minibatch of
validation data. validation data.
""" """
if not isinstance(entropies, np.ndarray): if not isinstance(entropies, np.ndarray):
entropies = entropies.data.cpu().numpy() entropies = entropies.data.cpu().numpy()
data_iterator = Batch(self.dev_data, batch_size=self.batch_size, sampler=self.sampler, as_numpy=False, data_iterator = Batch(self.dev_data, batch_size=self.batch_size, sampler=self.sampler, as_numpy=False,
prefetch=self.prefetch)
prefetch=self.prefetch)
for inputs, targets in data_iterator: for inputs, targets in data_iterator:
valid_loss, hidden, _ = self.get_loss(inputs, targets, hidden, dag) valid_loss, hidden, _ = self.get_loss(inputs, targets, hidden, dag)
valid_loss = utils.to_item(valid_loss.data) valid_loss = utils.to_item(valid_loss.data)
valid_ppl = math.exp(valid_loss) valid_ppl = math.exp(valid_loss)
R = 80 / valid_ppl R = 80 / valid_ppl
rewards = R + 1e-4 * entropies rewards = R + 1e-4 * entropies
return rewards, hidden return rewards, hidden
def train_controller(self): def train_controller(self):
"""Fixes the shared parameters and updates the controller parameters. """Fixes the shared parameters and updates the controller parameters.


@@ -296,13 +294,13 @@ class ENASTrainer(Trainer):
# Why can't we call shared.eval() here? Leads to loss # Why can't we call shared.eval() here? Leads to loss
# being uniformly zero for the controller. # being uniformly zero for the controller.
# self.shared.eval() # self.shared.eval()
avg_reward_base = None avg_reward_base = None
baseline = None baseline = None
adv_history = [] adv_history = []
entropy_history = [] entropy_history = []
reward_history = [] reward_history = []
hidden = self.shared.init_hidden(self.batch_size) hidden = self.shared.init_hidden(self.batch_size)
total_loss = 0 total_loss = 0
valid_idx = 0 valid_idx = 0
@@ -310,7 +308,7 @@ class ENASTrainer(Trainer):
# sample models # sample models
dags, log_probs, entropies = self.controller.sample( dags, log_probs, entropies = self.controller.sample(
with_details=True) with_details=True)
# calculate reward # calculate reward
np_entropies = entropies.data.cpu().numpy() np_entropies = entropies.data.cpu().numpy()
# No gradients should be backpropagated to the # No gradients should be backpropagated to the
@@ -320,40 +318,39 @@ class ENASTrainer(Trainer):
np_entropies, np_entropies,
hidden, hidden,
valid_idx) valid_idx)


reward_history.extend(rewards) reward_history.extend(rewards)
entropy_history.extend(np_entropies) entropy_history.extend(np_entropies)
# moving average baseline # moving average baseline
if baseline is None: if baseline is None:
baseline = rewards baseline = rewards
else: else:
decay = 0.95 decay = 0.95
baseline = decay * baseline + (1 - decay) * rewards baseline = decay * baseline + (1 - decay) * rewards
adv = rewards - baseline adv = rewards - baseline
adv_history.extend(adv) adv_history.extend(adv)
# policy loss # policy loss
loss = -log_probs*utils.get_variable(adv,
'cuda' in self.device,
requires_grad=False)
loss = -log_probs * utils.get_variable(adv,
'cuda' in self.device,
requires_grad=False)
loss = loss.sum() # or loss.mean() loss = loss.sum() # or loss.mean()
# update # update
self.controller_optim.zero_grad() self.controller_optim.zero_grad()
loss.backward() loss.backward()
self.controller_optim.step() self.controller_optim.step()
total_loss += utils.to_item(loss.data) total_loss += utils.to_item(loss.data)
if ((step % 50) == 0) and (step > 0): if ((step % 50) == 0) and (step > 0):
reward_history, adv_history, entropy_history = [], [], [] reward_history, adv_history, entropy_history = [], [], []
total_loss = 0 total_loss = 0
self.controller_step += 1 self.controller_step += 1
# prev_valid_idx = valid_idx # prev_valid_idx = valid_idx
# valid_idx = ((valid_idx + self.max_length) % # valid_idx = ((valid_idx + self.max_length) %
@@ -362,16 +359,16 @@ class ENASTrainer(Trainer):
# # validation data, we reset the hidden states. # # validation data, we reset the hidden states.
# if prev_valid_idx > valid_idx: # if prev_valid_idx > valid_idx:
# hidden = self.shared.init_hidden(self.batch_size) # hidden = self.shared.init_hidden(self.batch_size)
def derive(self, sample_num=10, valid_idx=0): def derive(self, sample_num=10, valid_idx=0):
"""We are always deriving based on the very first batch """We are always deriving based on the very first batch
of validation data? This seems wrong... of validation data? This seems wrong...
""" """
hidden = self.shared.init_hidden(self.batch_size) hidden = self.shared.init_hidden(self.batch_size)
dags, _, entropies = self.controller.sample(sample_num, dags, _, entropies = self.controller.sample(sample_num,
with_details=True) with_details=True)
max_R = 0 max_R = 0
best_dag = None best_dag = None
for dag in dags: for dag in dags:
@@ -379,5 +376,5 @@ class ENASTrainer(Trainer):
if R.max() > max_R: if R.max() > max_R:
max_R = R.max() max_R = R.max()
best_dag = dag best_dag = dag
self.model.setDAG(best_dag) self.model.setDAG(best_dag)

+ 0
- 2
fastNLP/models/enas_utils.py View File

@@ -1,12 +1,10 @@
# Code Modified from https://github.com/carpedm20/ENAS-pytorch # Code Modified from https://github.com/carpedm20/ENAS-pytorch


from __future__ import print_function from __future__ import print_function

from collections import defaultdict from collections import defaultdict
import collections import collections


import numpy as np import numpy as np

import torch import torch
from torch.autograd import Variable from torch.autograd import Variable




+ 9
- 1
fastNLP/models/sequence_labeling.py View File

@@ -1,11 +1,19 @@
"""
本模块实现了两种序列标注模型
"""
import torch import torch
import torch.nn as nn


from .base_model import BaseModel from .base_model import BaseModel
from ..modules import decoder, encoder from ..modules import decoder, encoder
from ..modules.decoder.CRF import allowed_transitions from ..modules.decoder.CRF import allowed_transitions
from ..core.utils import seq_len_to_mask from ..core.utils import seq_len_to_mask
from ..core.const import Const as C from ..core.const import Const as C
from torch import nn

__all__ = [
"SeqLabeling",
"AdvSeqLabel"
]




class SeqLabeling(BaseModel): class SeqLabeling(BaseModel):


+ 32
- 29
fastNLP/models/snli.py View File

@@ -8,6 +8,9 @@ from ..modules import encoder as Encoder
from ..modules import aggregator as Aggregator from ..modules import aggregator as Aggregator
from ..core.utils import seq_len_to_mask from ..core.utils import seq_len_to_mask


__all__ = [
"ESIM"
]


my_inf = 10e12 my_inf = 10e12


@@ -26,7 +29,7 @@ class ESIM(BaseModel):
:param int num_classes: 标签数目,默认为3 :param int num_classes: 标签数目,默认为3
:param numpy.array init_embedding: 初始词嵌入矩阵,形状为(vocab_size, embed_dim),默认为None,即随机初始化词嵌入矩阵 :param numpy.array init_embedding: 初始词嵌入矩阵,形状为(vocab_size, embed_dim),默认为None,即随机初始化词嵌入矩阵
""" """
def __init__(self, vocab_size, embed_dim, hidden_size, dropout=0.0, num_classes=3, init_embedding=None): def __init__(self, vocab_size, embed_dim, hidden_size, dropout=0.0, num_classes=3, init_embedding=None):
super(ESIM, self).__init__() super(ESIM, self).__init__()
@@ -35,35 +38,36 @@ class ESIM(BaseModel):
self.hidden_size = hidden_size self.hidden_size = hidden_size
self.dropout = dropout self.dropout = dropout
self.n_labels = num_classes self.n_labels = num_classes
self.drop = nn.Dropout(self.dropout) self.drop = nn.Dropout(self.dropout)
self.embedding = Encoder.Embedding( self.embedding = Encoder.Embedding(
(self.vocab_size, self.embed_dim), dropout=self.dropout, (self.vocab_size, self.embed_dim), dropout=self.dropout,
) )
self.embedding_layer = nn.Linear(self.embed_dim, self.hidden_size) self.embedding_layer = nn.Linear(self.embed_dim, self.hidden_size)
self.encoder = Encoder.LSTM( self.encoder = Encoder.LSTM(
input_size=self.embed_dim, hidden_size=self.hidden_size, num_layers=1, bias=True, input_size=self.embed_dim, hidden_size=self.hidden_size, num_layers=1, bias=True,
batch_first=True, bidirectional=True batch_first=True, bidirectional=True
) )
self.bi_attention = Aggregator.BiAttention() self.bi_attention = Aggregator.BiAttention()
self.mean_pooling = Aggregator.AvgPoolWithMask() self.mean_pooling = Aggregator.AvgPoolWithMask()
self.max_pooling = Aggregator.MaxPoolWithMask() self.max_pooling = Aggregator.MaxPoolWithMask()
self.inference_layer = nn.Linear(self.hidden_size * 4, self.hidden_size) self.inference_layer = nn.Linear(self.hidden_size * 4, self.hidden_size)
self.decoder = Encoder.LSTM( self.decoder = Encoder.LSTM(
input_size=self.hidden_size, hidden_size=self.hidden_size, num_layers=1, bias=True, input_size=self.hidden_size, hidden_size=self.hidden_size, num_layers=1, bias=True,
batch_first=True, bidirectional=True batch_first=True, bidirectional=True
) )
self.output = Decoder.MLP([4 * self.hidden_size, self.hidden_size, self.n_labels], 'tanh', dropout=self.dropout) self.output = Decoder.MLP([4 * self.hidden_size, self.hidden_size, self.n_labels], 'tanh', dropout=self.dropout)
def forward(self, words1, words2, seq_len1=None, seq_len2=None, target=None): def forward(self, words1, words2, seq_len1=None, seq_len2=None, target=None):
""" Forward function """ Forward function
:param torch.Tensor words1: [batch size(B), premise seq len(PL)] premise的token表示 :param torch.Tensor words1: [batch size(B), premise seq len(PL)] premise的token表示
:param torch.Tensor words2: [B, hypothesis seq len(HL)] hypothesis的token表示 :param torch.Tensor words2: [B, hypothesis seq len(HL)] hypothesis的token表示
:param torch.LongTensor seq_len1: [B] premise的长度 :param torch.LongTensor seq_len1: [B] premise的长度
@@ -71,10 +75,10 @@ class ESIM(BaseModel):
:param torch.LongTensor target: [B] 真实目标值 :param torch.LongTensor target: [B] 真实目标值
:return: dict prediction: [B, n_labels(N)] 预测结果 :return: dict prediction: [B, n_labels(N)] 预测结果
""" """
premise0 = self.embedding_layer(self.embedding(words1)) premise0 = self.embedding_layer(self.embedding(words1))
hypothesis0 = self.embedding_layer(self.embedding(words2)) hypothesis0 = self.embedding_layer(self.embedding(words2))
if seq_len1 is not None: if seq_len1 is not None:
seq_len1 = seq_len_to_mask(seq_len1) seq_len1 = seq_len_to_mask(seq_len1)
else: else:
@@ -85,55 +89,55 @@ class ESIM(BaseModel):
else: else:
seq_len2 = torch.ones(hypothesis0.size(0), hypothesis0.size(1)) seq_len2 = torch.ones(hypothesis0.size(0), hypothesis0.size(1))
seq_len2 = (seq_len2.long()).to(device=hypothesis0.device) seq_len2 = (seq_len2.long()).to(device=hypothesis0.device)
_BP, _PSL, _HP = premise0.size() _BP, _PSL, _HP = premise0.size()
_BH, _HSL, _HH = hypothesis0.size() _BH, _HSL, _HH = hypothesis0.size()
_BPL, _PLL = seq_len1.size() _BPL, _PLL = seq_len1.size()
_HPL, _HLL = seq_len2.size() _HPL, _HLL = seq_len2.size()
assert _BP == _BH and _BPL == _HPL and _BP == _BPL assert _BP == _BH and _BPL == _HPL and _BP == _BPL
assert _HP == _HH assert _HP == _HH
assert _PSL == _PLL and _HSL == _HLL assert _PSL == _PLL and _HSL == _HLL
B, PL, H = premise0.size() B, PL, H = premise0.size()
B, HL, H = hypothesis0.size() B, HL, H = hypothesis0.size()
a0 = self.encoder(self.drop(premise0)) # a0: [B, PL, H * 2] a0 = self.encoder(self.drop(premise0)) # a0: [B, PL, H * 2]
b0 = self.encoder(self.drop(hypothesis0)) # b0: [B, HL, H * 2] b0 = self.encoder(self.drop(hypothesis0)) # b0: [B, HL, H * 2]
a = torch.mean(a0.view(B, PL, -1, H), dim=2) # a: [B, PL, H] a = torch.mean(a0.view(B, PL, -1, H), dim=2) # a: [B, PL, H]
b = torch.mean(b0.view(B, HL, -1, H), dim=2) # b: [B, HL, H] b = torch.mean(b0.view(B, HL, -1, H), dim=2) # b: [B, HL, H]
ai, bi = self.bi_attention(a, b, seq_len1, seq_len2) ai, bi = self.bi_attention(a, b, seq_len1, seq_len2)
ma = torch.cat((a, ai, a - ai, a * ai), dim=2) # ma: [B, PL, 4 * H] ma = torch.cat((a, ai, a - ai, a * ai), dim=2) # ma: [B, PL, 4 * H]
mb = torch.cat((b, bi, b - bi, b * bi), dim=2) # mb: [B, HL, 4 * H] mb = torch.cat((b, bi, b - bi, b * bi), dim=2) # mb: [B, HL, 4 * H]
f_ma = self.inference_layer(ma) f_ma = self.inference_layer(ma)
f_mb = self.inference_layer(mb) f_mb = self.inference_layer(mb)
vat = self.decoder(self.drop(f_ma)) vat = self.decoder(self.drop(f_ma))
vbt = self.decoder(self.drop(f_mb)) vbt = self.decoder(self.drop(f_mb))
va = torch.mean(vat.view(B, PL, -1, H), dim=2) # va: [B, PL, H] va = torch.mean(vat.view(B, PL, -1, H), dim=2) # va: [B, PL, H]
vb = torch.mean(vbt.view(B, HL, -1, H), dim=2) # vb: [B, HL, H] vb = torch.mean(vbt.view(B, HL, -1, H), dim=2) # vb: [B, HL, H]
va_ave = self.mean_pooling(va, seq_len1, dim=1) # va_ave: [B, H] va_ave = self.mean_pooling(va, seq_len1, dim=1) # va_ave: [B, H]
va_max, va_arg_max = self.max_pooling(va, seq_len1, dim=1) # va_max: [B, H] va_max, va_arg_max = self.max_pooling(va, seq_len1, dim=1) # va_max: [B, H]
vb_ave = self.mean_pooling(vb, seq_len2, dim=1) # vb_ave: [B, H] vb_ave = self.mean_pooling(vb, seq_len2, dim=1) # vb_ave: [B, H]
vb_max, vb_arg_max = self.max_pooling(vb, seq_len2, dim=1) # vb_max: [B, H] vb_max, vb_arg_max = self.max_pooling(vb, seq_len2, dim=1) # vb_max: [B, H]
v = torch.cat((va_ave, va_max, vb_ave, vb_max), dim=1) # v: [B, 4 * H] v = torch.cat((va_ave, va_max, vb_ave, vb_max), dim=1) # v: [B, 4 * H]
prediction = torch.tanh(self.output(v)) # prediction: [B, N] prediction = torch.tanh(self.output(v)) # prediction: [B, N]
if target is not None: if target is not None:
func = nn.CrossEntropyLoss() func = nn.CrossEntropyLoss()
loss = func(prediction, target) loss = func(prediction, target)
return {Const.OUTPUT: prediction, Const.LOSS: loss} return {Const.OUTPUT: prediction, Const.LOSS: loss}
return {Const.OUTPUT: prediction} return {Const.OUTPUT: prediction}
def predict(self, words1, words2, seq_len1=None, seq_len2=None, target=None): def predict(self, words1, words2, seq_len1=None, seq_len2=None, target=None):
""" Predict function """ Predict function


@@ -146,4 +150,3 @@ class ESIM(BaseModel):
""" """
prediction = self.forward(words1, words2, seq_len1, seq_len2)[Const.OUTPUT] prediction = self.forward(words1, words2, seq_len1, seq_len2)[Const.OUTPUT]
return {Const.OUTPUT: torch.argmax(prediction, dim=-1)} return {Const.OUTPUT: torch.argmax(prediction, dim=-1)}


+ 40
- 27
fastNLP/models/star_transformer.py View File

@@ -1,17 +1,25 @@
"""Star-Transformer 的 一个 Pytorch 实现.
""" """
Star-Transformer 的 Pytorch 实现。
"""
import torch
from torch import nn

from ..modules.encoder.star_transformer import StarTransformer from ..modules.encoder.star_transformer import StarTransformer
from ..core.utils import seq_len_to_mask from ..core.utils import seq_len_to_mask
from ..modules.utils import get_embeddings from ..modules.utils import get_embeddings
from ..core.const import Const from ..core.const import Const


import torch
from torch import nn
__all__ = [
"StarTransEnc",
"STNLICls",
"STSeqCls",
"STSeqLabel",
]




class StarTransEnc(nn.Module): class StarTransEnc(nn.Module):
""" """
别名::class:`fastNLP.models.StarTransEnc` :class:`fastNLP.models.start_transformer.StarTransEnc`
别名::class:`fastNLP.models.StarTransEnc` :class:`fastNLP.models.star_transformer.StarTransEnc`


带word embedding的Star-Transformer Encoder 带word embedding的Star-Transformer Encoder


@@ -28,6 +36,7 @@ class StarTransEnc(nn.Module):
:param emb_dropout: 词嵌入的dropout概率. :param emb_dropout: 词嵌入的dropout概率.
:param dropout: 模型除词嵌入外的dropout概率. :param dropout: 模型除词嵌入外的dropout概率.
""" """
def __init__(self, init_embed, def __init__(self, init_embed,
hidden_size, hidden_size,
num_layers, num_layers,
@@ -47,7 +56,7 @@ class StarTransEnc(nn.Module):
head_dim=head_dim, head_dim=head_dim,
dropout=dropout, dropout=dropout,
max_len=max_len) max_len=max_len)
def forward(self, x, mask): def forward(self, x, mask):
""" """
:param FloatTensor data: [batch, length, hidden] 输入的序列 :param FloatTensor data: [batch, length, hidden] 输入的序列
@@ -72,7 +81,7 @@ class _Cls(nn.Module):
nn.Dropout(dropout), nn.Dropout(dropout),
nn.Linear(hid_dim, num_cls), nn.Linear(hid_dim, num_cls),
) )
def forward(self, x): def forward(self, x):
h = self.fc(x) h = self.fc(x)
return h return h
@@ -83,20 +92,21 @@ class _NLICls(nn.Module):
super(_NLICls, self).__init__() super(_NLICls, self).__init__()
self.fc = nn.Sequential( self.fc = nn.Sequential(
nn.Dropout(dropout), nn.Dropout(dropout),
nn.Linear(in_dim*4, hid_dim), #4
nn.Linear(in_dim * 4, hid_dim), # 4
nn.LeakyReLU(), nn.LeakyReLU(),
nn.Dropout(dropout), nn.Dropout(dropout),
nn.Linear(hid_dim, num_cls), nn.Linear(hid_dim, num_cls),
) )
def forward(self, x1, x2): def forward(self, x1, x2):
x = torch.cat([x1, x2, torch.abs(x1-x2), x1*x2], 1)
x = torch.cat([x1, x2, torch.abs(x1 - x2), x1 * x2], 1)
h = self.fc(x) h = self.fc(x)
return h return h



class STSeqLabel(nn.Module): class STSeqLabel(nn.Module):
""" """
别名::class:`fastNLP.models.STSeqLabel` :class:`fastNLP.models.start_transformer.STSeqLabel`
别名::class:`fastNLP.models.STSeqLabel` :class:`fastNLP.models.star_transformer.STSeqLabel`


用于序列标注的Star-Transformer模型 用于序列标注的Star-Transformer模型


@@ -112,6 +122,7 @@ class STSeqLabel(nn.Module):
:param emb_dropout: 词嵌入的dropout概率. Default: 0.1 :param emb_dropout: 词嵌入的dropout概率. Default: 0.1
:param dropout: 模型除词嵌入外的dropout概率. Default: 0.1 :param dropout: 模型除词嵌入外的dropout概率. Default: 0.1
""" """
def __init__(self, init_embed, num_cls, def __init__(self, init_embed, num_cls,
hidden_size=300, hidden_size=300,
num_layers=4, num_layers=4,
@@ -120,7 +131,7 @@ class STSeqLabel(nn.Module):
max_len=512, max_len=512,
cls_hidden_size=600, cls_hidden_size=600,
emb_dropout=0.1, emb_dropout=0.1,
dropout=0.1,):
dropout=0.1, ):
super(STSeqLabel, self).__init__() super(STSeqLabel, self).__init__()
self.enc = StarTransEnc(init_embed=init_embed, self.enc = StarTransEnc(init_embed=init_embed,
hidden_size=hidden_size, hidden_size=hidden_size,
@@ -131,7 +142,7 @@ class STSeqLabel(nn.Module):
emb_dropout=emb_dropout, emb_dropout=emb_dropout,
dropout=dropout) dropout=dropout)
self.cls = _Cls(hidden_size, num_cls, cls_hidden_size) self.cls = _Cls(hidden_size, num_cls, cls_hidden_size)
def forward(self, words, seq_len): def forward(self, words, seq_len):
""" """


@@ -142,9 +153,9 @@ class STSeqLabel(nn.Module):
mask = seq_len_to_mask(seq_len) mask = seq_len_to_mask(seq_len)
nodes, _ = self.enc(words, mask) nodes, _ = self.enc(words, mask)
output = self.cls(nodes) output = self.cls(nodes)
output = output.transpose(1,2) # make hidden to be dim 1
return {Const.OUTPUT: output} # [bsz, n_cls, seq_len]
output = output.transpose(1, 2) # make hidden to be dim 1
return {Const.OUTPUT: output} # [bsz, n_cls, seq_len]
def predict(self, words, seq_len): def predict(self, words, seq_len):
""" """


@@ -159,7 +170,7 @@ class STSeqLabel(nn.Module):


class STSeqCls(nn.Module): class STSeqCls(nn.Module):
""" """
别名::class:`fastNLP.models.STSeqCls` :class:`fastNLP.models.start_transformer.STSeqCls`
别名::class:`fastNLP.models.STSeqCls` :class:`fastNLP.models.star_transformer.STSeqCls`


用于分类任务的Star-Transformer 用于分类任务的Star-Transformer


@@ -175,7 +186,7 @@ class STSeqCls(nn.Module):
:param emb_dropout: 词嵌入的dropout概率. Default: 0.1 :param emb_dropout: 词嵌入的dropout概率. Default: 0.1
:param dropout: 模型除词嵌入外的dropout概率. Default: 0.1 :param dropout: 模型除词嵌入外的dropout概率. Default: 0.1
""" """
def __init__(self, init_embed, num_cls, def __init__(self, init_embed, num_cls,
hidden_size=300, hidden_size=300,
num_layers=4, num_layers=4,
@@ -184,7 +195,7 @@ class STSeqCls(nn.Module):
max_len=512, max_len=512,
cls_hidden_size=600, cls_hidden_size=600,
emb_dropout=0.1, emb_dropout=0.1,
dropout=0.1,):
dropout=0.1, ):
super(STSeqCls, self).__init__() super(STSeqCls, self).__init__()
self.enc = StarTransEnc(init_embed=init_embed, self.enc = StarTransEnc(init_embed=init_embed,
hidden_size=hidden_size, hidden_size=hidden_size,
@@ -195,7 +206,7 @@ class STSeqCls(nn.Module):
emb_dropout=emb_dropout, emb_dropout=emb_dropout,
dropout=dropout) dropout=dropout)
self.cls = _Cls(hidden_size, num_cls, cls_hidden_size) self.cls = _Cls(hidden_size, num_cls, cls_hidden_size)
def forward(self, words, seq_len): def forward(self, words, seq_len):
""" """


@@ -206,9 +217,9 @@ class STSeqCls(nn.Module):
mask = seq_len_to_mask(seq_len) mask = seq_len_to_mask(seq_len)
nodes, relay = self.enc(words, mask) nodes, relay = self.enc(words, mask)
y = 0.5 * (relay + nodes.max(1)[0]) y = 0.5 * (relay + nodes.max(1)[0])
output = self.cls(y) # [bsz, n_cls]
output = self.cls(y) # [bsz, n_cls]
return {Const.OUTPUT: output} return {Const.OUTPUT: output}
def predict(self, words, seq_len): def predict(self, words, seq_len):
""" """


@@ -223,7 +234,7 @@ class STSeqCls(nn.Module):


class STNLICls(nn.Module): class STNLICls(nn.Module):
""" """
别名::class:`fastNLP.models.STNLICls` :class:`fastNLP.models.start_transformer.STNLICls`
别名::class:`fastNLP.models.STNLICls` :class:`fastNLP.models.star_transformer.STNLICls`
用于自然语言推断(NLI)的Star-Transformer 用于自然语言推断(NLI)的Star-Transformer


@@ -239,7 +250,7 @@ class STNLICls(nn.Module):
:param emb_dropout: 词嵌入的dropout概率. Default: 0.1 :param emb_dropout: 词嵌入的dropout概率. Default: 0.1
:param dropout: 模型除词嵌入外的dropout概率. Default: 0.1 :param dropout: 模型除词嵌入外的dropout概率. Default: 0.1
""" """
def __init__(self, init_embed, num_cls, def __init__(self, init_embed, num_cls,
hidden_size=300, hidden_size=300,
num_layers=4, num_layers=4,
@@ -248,7 +259,7 @@ class STNLICls(nn.Module):
max_len=512, max_len=512,
cls_hidden_size=600, cls_hidden_size=600,
emb_dropout=0.1, emb_dropout=0.1,
dropout=0.1,):
dropout=0.1, ):
super(STNLICls, self).__init__() super(STNLICls, self).__init__()
self.enc = StarTransEnc(init_embed=init_embed, self.enc = StarTransEnc(init_embed=init_embed,
hidden_size=hidden_size, hidden_size=hidden_size,
@@ -259,7 +270,7 @@ class STNLICls(nn.Module):
emb_dropout=emb_dropout, emb_dropout=emb_dropout,
dropout=dropout) dropout=dropout)
self.cls = _NLICls(hidden_size, num_cls, cls_hidden_size) self.cls = _NLICls(hidden_size, num_cls, cls_hidden_size)
def forward(self, words1, words2, seq_len1, seq_len2): def forward(self, words1, words2, seq_len1, seq_len2):
""" """


@@ -271,14 +282,16 @@ class STNLICls(nn.Module):
""" """
mask1 = seq_len_to_mask(seq_len1) mask1 = seq_len_to_mask(seq_len1)
mask2 = seq_len_to_mask(seq_len2) mask2 = seq_len_to_mask(seq_len2)
def enc(seq, mask): def enc(seq, mask):
nodes, relay = self.enc(seq, mask) nodes, relay = self.enc(seq, mask)
return 0.5 * (relay + nodes.max(1)[0]) return 0.5 * (relay + nodes.max(1)[0])
y1 = enc(words1, mask1) y1 = enc(words1, mask1)
y2 = enc(words2, mask2) y2 = enc(words2, mask2)
output = self.cls(y1, y2) # [bsz, n_cls]
output = self.cls(y1, y2) # [bsz, n_cls]
return {Const.OUTPUT: output} return {Const.OUTPUT: output}
def predict(self, words1, words2, seq_len1, seq_len2): def predict(self, words1, words2, seq_len1, seq_len2):
""" """




Loading…
Cancel
Save