From d0354d8e2883b4433378c4a6a247972de664a06d Mon Sep 17 00:00:00 2001
From: ChenXin <will131@foxmail.com>
Date: Tue, 20 Aug 2019 00:00:31 +0800
Subject: [PATCH] fix some importing bugs

---
 fastNLP/__init__.py      |   5 +-
 fastNLP/core/__init__.py |   2 +-
 fastNLP/core/field.py    | 191 +++++++++++++++++++++------------------
 3 files changed, 108 insertions(+), 90 deletions(-)

diff --git a/fastNLP/__init__.py b/fastNLP/__init__.py
index a6767088..879fd644 100644
--- a/fastNLP/__init__.py
+++ b/fastNLP/__init__.py
@@ -14,6 +14,7 @@ __all__ = [
     "Instance",
     "FieldArray",
     
+    
     "DataSetIter",
     "BatchIter",
     "TorchLoaderIter",
@@ -31,6 +32,7 @@ __all__ = [
     "TensorboardCallback",
     "LRScheduler",
     "ControlC",
+    "LRFinder",
     
     "Padder",
     "AutoPadder",
@@ -43,7 +45,8 @@ __all__ = [
     "Optimizer",
     "SGD",
     "Adam",
-    
+    "AdamW",
+
     "Sampler",
     "SequentialSampler",
     "BucketSampler",
diff --git a/fastNLP/core/__init__.py b/fastNLP/core/__init__.py
index acf0efc4..4a43b73d 100644
--- a/fastNLP/core/__init__.py
+++ b/fastNLP/core/__init__.py
@@ -22,7 +22,7 @@ from .field import FieldArray, Padder, AutoPadder, EngChar2DPadder
 from .instance import Instance
 from .losses import LossFunc, CrossEntropyLoss, L1Loss, BCELoss, NLLLoss, LossInForward
 from .metrics import AccuracyMetric, SpanFPreRecMetric, ExtractiveQAMetric
-from .optimizer import Optimizer, SGD, Adam
+from .optimizer import Optimizer, SGD, Adam, AdamW
 from .sampler import SequentialSampler, BucketSampler, RandomSampler, Sampler
 from .tester import Tester
 from .trainer import Trainer
diff --git a/fastNLP/core/field.py b/fastNLP/core/field.py
index 65bd9be4..26d22ada 100644
--- a/fastNLP/core/field.py
+++ b/fastNLP/core/field.py
@@ -1,4 +1,8 @@
-
+__all__ = [
+    "Padder",
+    "AutoPadder",
+    "EngChar2DPadder",
+]
 
 from numbers import Number
 import torch
@@ -9,24 +13,27 @@ from copy import deepcopy
 from collections import Counter
 from .utils import _is_iterable
 
+
 class SetInputOrTargetException(Exception):
     def __init__(self, msg, index=None, field_name=None):
         super().__init__(msg)
         self.msg = msg
         self.index = index  # 标示在哪个数据遭遇到问题了
-        self.field_name = field_name # 标示当前field的名称
+        self.field_name = field_name  # 标示当前field的名称
+
 
 class AppendToTargetOrInputException(Exception):
     def __init__(self, msg, index=None, field_name=None):
         super().__init__(msg)
         self.msg = msg
         self.index = index  # 标示在哪个数据遭遇到问题了
-        self.field_name = field_name # 标示当前field的名称
+        self.field_name = field_name  # 标示当前field的名称
+
 
 class FieldArray:
     def __init__(self, name, content, is_target=False, is_input=False, padder=None, ignore_type=False,
                  use_1st_ins_infer_dim_type=True):
-        if len(content)==0:
+        if len(content) == 0:
             raise RuntimeError("Empty fieldarray is not allowed.")
         _content = content
         try:
@@ -43,34 +50,34 @@ class FieldArray:
         self._use_1st_ins_infer_dim_type = bool(use_1st_ins_infer_dim_type)
         self._is_input = False
         self._is_target = False
-
+        
         if is_input:
             self.is_input = is_input
         if is_target:
             self.is_target = is_target
-
+        
         if padder is None:
             padder = AutoPadder(pad_val=0)
         else:
             assert isinstance(padder, Padder), "padder must be of type fastNLP.Padder."
             padder = deepcopy(padder)
         self.set_padder(padder)
-
+    
     @property
     def ignore_type(self):
         return self._ignore_type
-
+    
     @ignore_type.setter
     def ignore_type(self, value):
         if value:
             self._cell_ndim = None
             self.dtype = None
         self._ignore_type = value
-
+    
     @property
     def is_input(self):
         return self._is_input
-
+    
     @is_input.setter
     def is_input(self, value):
         """
@@ -85,11 +92,11 @@ class FieldArray:
             self.dtype = None
             self._cell_ndim = None
         self._is_input = value
-
+    
     @property
     def is_target(self):
         return self._is_target
-
+    
     @is_target.setter
     def is_target(self, value):
         """
@@ -103,7 +110,7 @@ class FieldArray:
             self.dtype = None
             self._cell_ndim = None
         self._is_target = value
-
+    
     def _check_dtype_and_ndim(self, only_check_1st_ins_dim_type=True):
         """
         检查当前content所有的element是否是同一个类型，且是否每个元素具有相同的维度。通过的话，设置_cell_ndim与_ele_type属性；没有
@@ -120,35 +127,37 @@ class FieldArray:
                 for cell in self.content[1:]:
                     index += 1
                     type_i, dim_i = _get_ele_type_and_dim(cell)
-                    if type_i!=type_0:
-                        raise SetInputOrTargetException("Type:{} in index {} is different from the first element with type:{}."
-                                                        ".".format(type_i, index, type_0))
-                    if dim_0!=dim_i:
-                        raise SetInputOrTargetException("Dimension:{} in index {} is different from the first element with "
-                                                        "dimension:{}.".format(dim_i, index, dim_0))
+                    if type_i != type_0:
+                        raise SetInputOrTargetException(
+                            "Type:{} in index {} is different from the first element with type:{}."
+                            ".".format(type_i, index, type_0))
+                    if dim_0 != dim_i:
+                        raise SetInputOrTargetException(
+                            "Dimension:{} in index {} is different from the first element with "
+                            "dimension:{}.".format(dim_i, index, dim_0))
             self._cell_ndim = dim_0
             self.dtype = type_0
         except SetInputOrTargetException as e:
             e.index = index
             raise e
-
-    def append(self, val:Any):
+    
+    def append(self, val: Any):
         """
         :param val: 把该val append到fieldarray。
         :return:
         """
         if (self._is_target or self._is_input) and self._ignore_type is False and not self._use_1st_ins_infer_dim_type:
             type_, dim_ = _get_ele_type_and_dim(val)
-            if self.dtype!=type_:
+            if self.dtype != type_:
                 raise AppendToTargetOrInputException(f"Value(type:{type_}) are of different types with "
                                                      f"previous values(type:{self.dtype}).")
-            if self._cell_ndim!=dim_:
+            if self._cell_ndim != dim_:
                 raise AppendToTargetOrInputException(f"Value(dim:{dim_}) are of different dimensions with "
                                                      f"previous values(dim:{self._cell_ndim}).")
             self.content.append(val)
         else:
             self.content.append(val)
-
+    
     def pop(self, index):
         """
         删除该field中index处的元素
@@ -156,22 +165,22 @@ class FieldArray:
         :return:
         """
         self.content.pop(index)
-
+    
     def __getitem__(self, indices):
         return self.get(indices, pad=False)
-
+    
     def __setitem__(self, idx, val):
         assert isinstance(idx, int)
         if (self._is_target or self._is_input) and self.ignore_type is False:  # 需要检测类型
             type_, dim_ = _get_ele_type_and_dim(val)
-            if self.dtype!=type_:
+            if self.dtype != type_:
                 raise RuntimeError(f"Value(type:{type_}) are of different types with "
-                                                     f"other values(type:{self.dtype}).")
-            if self._cell_ndim!=dim_:
+                                   f"other values(type:{self.dtype}).")
+            if self._cell_ndim != dim_:
                 raise RuntimeError(f"Value(dim:{dim_}) are of different dimensions with "
-                                                     f"previous values(dim:{self._cell_ndim}).")
+                                   f"previous values(dim:{self._cell_ndim}).")
         self.content[idx] = val
-
+    
     def get(self, indices, pad=True):
         """
         根据给定的indices返回内容
@@ -184,16 +193,16 @@ class FieldArray:
             return self.content[indices]
         if self.is_input is False and self.is_target is False:
             raise RuntimeError("Please specify either is_input or is_target to True for {}".format(self.name))
-
+        
         contents = [self.content[i] for i in indices]
         if self.padder is None or pad is False:
             return np.array(contents)
         else:
             return self.pad(contents)
-
+    
     def pad(self, contents):
         return self.padder(contents, field_name=self.name, field_ele_dtype=self.dtype, dim=self._cell_ndim)
-
+    
     def set_padder(self, padder):
         """
         设置padder，在这个field进行pad的时候用这个padder进行pad，如果为None则不进行pad。
@@ -205,7 +214,7 @@ class FieldArray:
             self.padder = deepcopy(padder)
         else:
             self.padder = None
-
+    
     def set_pad_val(self, pad_val):
         """
         修改padder的pad_val.
@@ -215,7 +224,7 @@ class FieldArray:
         if self.padder is not None:
             self.padder.set_pad_val(pad_val)
         return self
-
+    
     def __len__(self):
         """
         Returns the size of FieldArray.
@@ -223,7 +232,7 @@ class FieldArray:
         :return int length:
         """
         return len(self.content)
-
+    
     def to(self, other):
         """
         将other的属性复制给本FieldArray(other必须为FieldArray类型).
@@ -233,15 +242,15 @@ class FieldArray:
         :return: :class:`~fastNLP.FieldArray`
         """
         assert isinstance(other, FieldArray), "Only supports fastNLP.FieldArray type, not {}.".format(type(other))
-
+        
         self.ignore_type = other.ignore_type
         self.is_input = other.is_input
         self.is_target = other.is_target
         self.padder = other.padder
-
+        
         return self
-
-    def split(self, sep:str=None, inplace:bool=True):
+    
+    def split(self, sep: str = None, inplace: bool = True):
         """
         依次对自身的元素使用.split()方法，应该只有当本field的元素为str时，该方法才有用。将返回值
 
@@ -257,8 +266,8 @@ class FieldArray:
                 print(f"Exception happens when process value in index {index}.")
                 raise e
         return self._after_process(new_contents, inplace=inplace)
-
-    def int(self, inplace:bool=True):
+    
+    def int(self, inplace: bool = True):
         """
         将本field中的值调用int(cell). 支持field中内容为以下两种情况(1)['1', '2', ...](即field中每个值为str的)，
             (2) [['1', '2', ..], ['3', ..], ...](即field中每个值为一个list，list中的值会被依次转换。)
@@ -277,7 +286,7 @@ class FieldArray:
                 print(f"Exception happens when process value in index {index}.")
                 print(e)
         return self._after_process(new_contents, inplace=inplace)
-
+    
     def float(self, inplace=True):
         """
         将本field中的值调用float(cell). 支持field中内容为以下两种情况(1)['1', '2', ...](即field中每个值为str的)，
@@ -297,7 +306,7 @@ class FieldArray:
                 print(f"Exception happens when process value in index {index}.")
                 raise e
         return self._after_process(new_contents, inplace=inplace)
-
+    
     def bool(self, inplace=True):
         """
         将本field中的值调用bool(cell). 支持field中内容为以下两种情况(1)['1', '2', ...](即field中每个值为str的)，
@@ -316,9 +325,9 @@ class FieldArray:
             except Exception as e:
                 print(f"Exception happens when process value in index {index}.")
                 raise e
-
+        
         return self._after_process(new_contents, inplace=inplace)
-
+    
     def lower(self, inplace=True):
         """
         将本field中的值调用cell.lower(). 支持field中内容为以下两种情况(1)['1', '2', ...](即field中每个值为str的)，
@@ -338,7 +347,7 @@ class FieldArray:
                 print(f"Exception happens when process value in index {index}.")
                 raise e
         return self._after_process(new_contents, inplace=inplace)
-
+    
     def upper(self, inplace=True):
         """
         将本field中的值调用cell.lower(). 支持field中内容为以下两种情况(1)['1', '2', ...](即field中每个值为str的)，
@@ -358,7 +367,7 @@ class FieldArray:
                 print(f"Exception happens when process value in index {index}.")
                 raise e
         return self._after_process(new_contents, inplace=inplace)
-
+    
     def value_count(self):
         """
         返回该field下不同value的数量。多用于统计label数量
@@ -366,17 +375,18 @@ class FieldArray:
         :return: Counter, key是label，value是出现次数
         """
         count = Counter()
-
+        
         def cum(cell):
             if _is_iterable(cell) and not isinstance(cell, str):
                 for cell_ in cell:
                     cum(cell_)
             else:
                 count[cell] += 1
+        
         for cell in self.content:
             cum(cell)
         return count
-
+    
     def _after_process(self, new_contents, inplace):
         """
         当调用处理函数之后，决定是否要替换field。
@@ -398,7 +408,7 @@ class FieldArray:
             return new_contents
 
 
-def _get_ele_type_and_dim(cell:Any, dim=0):
+def _get_ele_type_and_dim(cell: Any, dim=0):
     """
     识别cell的类别与dimension的数量
 
@@ -414,13 +424,13 @@ def _get_ele_type_and_dim(cell:Any, dim=0):
     elif isinstance(cell, list):
         dim += 1
         res = [_get_ele_type_and_dim(cell_i, dim) for cell_i in cell]
-        types = set([i for i,j in res])
-        dims = set([j for i,j in res])
-        if len(types)>1:
+        types = set([i for i, j in res])
+        dims = set([j for i, j in res])
+        if len(types) > 1:
             raise SetInputOrTargetException("Mixed types detected: {}.".format(list(types)))
-        elif len(types)==0:
+        elif len(types) == 0:
             raise SetInputOrTargetException("Empty value encountered.")
-        if len(dims)>1:
+        if len(dims) > 1:
             raise SetInputOrTargetException("Mixed dimension detected: {}.".format(list(dims)))
         return types.pop(), dims.pop()
     elif isinstance(cell, torch.Tensor):
@@ -431,16 +441,16 @@ def _get_ele_type_and_dim(cell:Any, dim=0):
         # 否则需要继续往下iterate
         dim += 1
         res = [_get_ele_type_and_dim(cell_i, dim) for cell_i in cell]
-        types = set([i for i,j in res])
-        dims = set([j for i,j in res])
-        if len(types)>1:
+        types = set([i for i, j in res])
+        dims = set([j for i, j in res])
+        if len(types) > 1:
             raise SetInputOrTargetException("Mixed types detected: {}.".format(list(types)))
-        elif len(types)==0:
+        elif len(types) == 0:
             raise SetInputOrTargetException("Empty value encountered.")
-        if len(dims)>1:
+        if len(dims) > 1:
             raise SetInputOrTargetException("Mixed dimension detected: {}.".format(list(dims)))
         return types.pop(), dims.pop()
-    else: # 包含tuple, set, dict以及其它的类型
+    else:  # 包含tuple, set, dict以及其它的类型
         raise SetInputOrTargetException(f"Cannot process type:{type(cell)}.")
 
 
@@ -462,15 +472,15 @@ class Padder:
         :return: np.array([padded_element])
 
     """
-
+    
     def __init__(self, pad_val=0, **kwargs):
         self.pad_val = pad_val
-
+    
     def set_pad_val(self, pad_val):
         self.pad_val = pad_val
-
+    
     @abstractmethod
-    def __call__(self, contents, field_name, field_ele_dtype, dim:int):
+    def __call__(self, contents, field_name, field_ele_dtype, dim: int):
         """
         传入的是List内容。假设有以下的DataSet。
 
@@ -537,23 +547,24 @@ class AutoPadder(Padder):
 
     3 其它情况不进行处理，返回一个np.array类型。
     """
+    
     def __init__(self, pad_val=0):
         super().__init__(pad_val=pad_val)
-
+    
     def __call__(self, contents, field_name, field_ele_dtype, dim):
         if field_ele_dtype:
-            if dim>3:
+            if dim > 3:
                 return np.array(contents)
             if isinstance(field_ele_dtype, type) and \
                     (issubclass(field_ele_dtype, np.number) or issubclass(field_ele_dtype, Number)):
-                if dim==0:
+                if dim == 0:
                     array = np.array(contents, dtype=field_ele_dtype)
-                elif dim==1:
+                elif dim == 1:
                     max_len = max(map(len, contents))
                     array = np.full((len(contents), max_len), self.pad_val, dtype=field_ele_dtype)
                     for i, content_i in enumerate(contents):
                         array[i, :len(content_i)] = content_i
-                elif dim==2:
+                elif dim == 2:
                     max_len = max(map(len, contents))
                     max_word_len = max([max([len(content_ii) for content_ii in content_i]) for
                                         content_i in contents])
@@ -563,20 +574,21 @@ class AutoPadder(Padder):
                             array[i, j, :len(content_ii)] = content_ii
                 else:
                     shape = np.shape(contents)
-                    if len(shape)==4: # 说明各dimension是相同的大小
+                    if len(shape) == 4:  # 说明各dimension是相同的大小
                         array = np.array(contents, dtype=field_ele_dtype)
                     else:
-                        raise RuntimeError(f"Field:{field_name} has 3 dimensions, every sample should have the same shape.")
+                        raise RuntimeError(
+                            f"Field:{field_name} has 3 dimensions, every sample should have the same shape.")
                 return array
             elif str(field_ele_dtype).startswith('torch'):
-                if dim==0:
+                if dim == 0:
                     tensor = torch.tensor(contents).to(field_ele_dtype)
-                elif dim==1:
+                elif dim == 1:
                     max_len = max(map(len, contents))
                     tensor = torch.full((len(contents), max_len), fill_value=self.pad_val, dtype=field_ele_dtype)
                     for i, content_i in enumerate(contents):
                         tensor[i, :len(content_i)] = torch.tensor(content_i)
-                elif dim==2:
+                elif dim == 2:
                     max_len = max(map(len, contents))
                     max_word_len = max([max([len(content_ii) for content_ii in content_i]) for
                                         content_i in contents])
@@ -587,15 +599,18 @@ class AutoPadder(Padder):
                             tensor[i, j, :len(content_ii)] = torch.tensor(content_ii)
                 else:
                     shapes = set([np.shape(content_i) for content_i in contents])
-                    if len(shapes)>1:
-                        raise RuntimeError(f"Field:{field_name} has 3 dimensions, every sample should have the same shape.")
+                    if len(shapes) > 1:
+                        raise RuntimeError(
+                            f"Field:{field_name} has 3 dimensions, every sample should have the same shape.")
                     shape = shapes.pop()
-                    if len(shape)==3:
-                        tensor = torch.full([len(contents)]+list(shape), fill_value=self.pad_val, dtype=field_ele_dtype)
+                    if len(shape) == 3:
+                        tensor = torch.full([len(contents)] + list(shape), fill_value=self.pad_val,
+                                            dtype=field_ele_dtype)
                         for i, content_i in enumerate(contents):
                             tensor[i] = torch.tensor(content_i, dtype=field_ele_dtype)
                     else:
-                        raise RuntimeError(f"Field:{field_name} has 3 dimensions, every sample should have the same shape.")
+                        raise RuntimeError(
+                            f"Field:{field_name} has 3 dimensions, every sample should have the same shape.")
                 return tensor
             else:
                 return np.array(contents)  # 不进行任何操作
@@ -626,7 +641,7 @@ class EngChar2DPadder(Padder):
         dataset.set_padder('chars', padder)  # chars这个field的设置为了EnChar2DPadder
 
     """
-
+    
     def __init__(self, pad_val=0, pad_length=0):
         """
         :param pad_val: int, pad的位置使用该index
@@ -634,9 +649,9 @@ class EngChar2DPadder(Padder):
             都pad或截取到该长度.
         """
         super().__init__(pad_val=pad_val)
-
+        
         self.pad_length = pad_length
-
+    
     def __call__(self, contents, field_name, field_ele_dtype, dim):
         """
         期望输入类似于
@@ -655,7 +670,7 @@ class EngChar2DPadder(Padder):
             raise TypeError('dtype of Field:{} should be np.int64 or np.float64 to do 2D padding, get {}.'.format(
                 field_name, field_ele_dtype
             ))
-        assert dim==2, f"Field:{field_name} has {dim}, EngChar2DPadder only supports input with 2 dimensions."
+        assert dim == 2, f"Field:{field_name} has {dim}, EngChar2DPadder only supports input with 2 dimensions."
         if self.pad_length < 1:
             max_char_length = max([max(len(char_lst) for char_lst in word_lst) for word_lst in contents])
         else:
@@ -663,12 +678,12 @@ class EngChar2DPadder(Padder):
         max_sent_length = max(len(word_lst) for word_lst in contents)
         batch_size = len(contents)
         dtype = type(contents[0][0][0])
-
+        
         padded_array = np.full((batch_size, max_sent_length, max_char_length), fill_value=self.pad_val,
                                dtype=dtype)
         for b_idx, word_lst in enumerate(contents):
             for c_idx, char_lst in enumerate(word_lst):
                 chars = char_lst[:max_char_length]
                 padded_array[b_idx, c_idx, :len(chars)] = chars
-
+        
         return padded_array