Browse Source

Merge pull request #106 from FFTYYY/master

update loss & a small change in requirements
tags/v0.2.0
Yige XU GitHub 6 years ago
parent
commit
abf840c376
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 509 additions and 56 deletions
  1. +1
    -1
      README.md
  2. +192
    -54
      fastNLP/core/loss.py
  3. +1
    -1
      requirements.txt
  4. +315
    -0
      test/core/test_loss.py

+ 1
- 1
README.md View File

@@ -14,7 +14,7 @@ fastNLP is a modular Natural Language Processing system based on PyTorch, for fa
## Requirements

- numpy>=1.14.2
- torch==0.4.0
- torch>=0.4.0
- torchvision>=0.1.8
- tensorboardX



+ 192
- 54
fastNLP/core/loss.py View File

@@ -1,58 +1,196 @@
import torch

def squash(predict , truth , **kwargs):
'''To reshape tensors in order to fit Loss functions in pytorch

:param predict : Tensor, model output
:param truth : Tensor, truth from dataset
:param **kwargs : extra arguments

:return predict , truth: predict & truth after processing
'''
return predict.view(-1 , predict.size()[-1]) , truth.view(-1,)

def unpad(predict , truth , **kwargs):
'''To process padded sequence output to get true loss
Using pack_padded_sequence() method
This method contains squash()

:param predict : Tensor, [batch_size , max_len , tag_size]
:param truth : Tensor, [batch_size , max_len]
:param **kwargs : extra arguments, kwargs["lens"] is expected to be exsist
kwargs["lens"] : list or LongTensor, [batch_size]
the i-th element is true lengths of i-th sequence
:return predict , truth: predict & truth after processing
'''
if kwargs.get("lens") is None:
return predict , truth
lens = torch.LongTensor(kwargs["lens"])
lens , idx = torch.sort(lens , descending = True)
predict = torch.nn.utils.rnn.pack_padded_sequence(predict[idx] , lens , batch_first = True).data
truth = torch.nn.utils.rnn.pack_padded_sequence(truth[idx] , lens , batch_first = True).data
return predict , truth

def unpad_mask(predict , truth , **kwargs):
'''To process padded sequence output to get true loss
Using mask() method
This method contains squash()

:param predict : Tensor, [batch_size , max_len , tag_size]
:param truth : Tensor, [batch_size , max_len]
:param **kwargs : extra arguments, kwargs["lens"] is expected to be exsist
kwargs["lens"] : list or LongTensor, [batch_size]
the i-th element is true lengths of i-th sequence
:return predict , truth: predict & truth after processing
'''
if kwargs.get("lens") is None:
return predict , truth
mas = make_mask(kwargs["lens"] , truth.size()[1])
return mask(predict , truth , mask = mas)

def mask(predict , truth , **kwargs):
'''To select specific elements from Tensor
This method contains squash()

:param predict : Tensor, [batch_size , max_len , tag_size]
:param truth : Tensor, [batch_size , max_len]
:param **kwargs : extra arguments, kwargs["mask"] is expected to be exsist
kwargs["mask"] : ByteTensor, [batch_size , max_len]
the mask Tensor , the position that is 1 will be selected
:return predict , truth: predict & truth after processing
'''
if kwargs.get("mask") is None:
return predict , truth
mask = kwargs["mask"]
predict , truth = squash(predict , truth)
mask = mask.view(-1,)

predict = torch.masked_select(predict.permute(1,0) , mask).view(predict.size()[-1] , -1).permute(1,0)
truth = torch.masked_select(truth , mask)

return predict , truth

def make_mask(lens , tar_len):
'''to generate a mask that select [:lens[i]] for i-th element
embezzle from fastNLP.models.sequence_modeling.seq_mask

:param lens : list or LongTensor, [batch_size]
:param tar_len : int
:return mask : ByteTensor
'''
lens = torch.LongTensor(lens)
mask = [torch.ge(lens, i + 1) for i in range(tar_len)]
mask = torch.stack(mask, 1)
return mask

#map string to function. Just for more elegant using
method_dict = {
"squash" : squash,
"unpad" : unpad,
"unpad_mask" : unpad_mask,
"mask" : mask,
}

loss_function_name = {
"L1Loss".lower() : torch.nn.L1Loss,
"BCELoss".lower() : torch.nn.BCELoss,
"MSELoss".lower() : torch.nn.MSELoss,
"NLLLoss".lower() : torch.nn.NLLLoss,
"KLDivLoss".lower() : torch.nn.KLDivLoss,
"NLLLoss2dLoss".lower() : torch.nn.NLLLoss2d, #every name should end with "loss"
"SmoothL1Loss".lower() : torch.nn.SmoothL1Loss,
"SoftMarginLoss".lower() : torch.nn.SoftMarginLoss,
"PoissonNLLLoss".lower() : torch.nn.PoissonNLLLoss,
"MultiMarginLoss".lower() : torch.nn.MultiMarginLoss,
"CrossEntropyLoss".lower() : torch.nn.CrossEntropyLoss,
"BCEWithLogitsLoss".lower() : torch.nn.BCEWithLogitsLoss,
"MarginRankingLoss".lower() : torch.nn.MarginRankingLoss,
"TripletMarginLoss".lower() : torch.nn.TripletMarginLoss,
"HingeEmbeddingLoss".lower() : torch.nn.HingeEmbeddingLoss,
"CosineEmbeddingLoss".lower() : torch.nn.CosineEmbeddingLoss,
"MultiLabelMarginLoss".lower() : torch.nn.MultiLabelMarginLoss,
"MultiLabelSoftMarginLoss".lower() : torch.nn.MultiLabelSoftMarginLoss,
}

class Loss(object):
"""Loss function of the algorithm,
either the wrapper of a loss function from framework, or a user-defined loss (need pytorch auto_grad support)

"""

def __init__(self, args):
"""

:param args: None or str, the name of a loss function.

"""
if args is None:
# this is useful when Trainer.__init__ performs type check
self._loss = None
elif isinstance(args, str):
self._loss = self._borrow_from_pytorch(args)
else:
raise NotImplementedError

def get(self):
"""

:return self._loss: the loss function
"""
return self._loss

@staticmethod
def _borrow_from_pytorch(loss_name):
"""Given a name of a loss function, return it from PyTorch.

:param loss_name: str, the name of a loss function

- cross_entropy: combines log softmax and nll loss in a single function.
- nll: negative log likelihood

:return loss: a PyTorch loss
"""

class InnerCrossEntropy:
"""A simple wrapper to guarantee input shapes."""

def __init__(self):
self.f = torch.nn.CrossEntropyLoss()

def __call__(self, predict, truth):
truth = truth.view(-1, )
return self.f(predict, truth)

if loss_name == "cross_entropy":
return InnerCrossEntropy()
elif loss_name == 'nll':
return torch.nn.NLLLoss()
else:
raise NotImplementedError
'''a Loss object is a callable object represents loss functions
'''

def __init__(self , loss_name , pre_pro = [squash], **kwargs):
'''

:param loss_name: str or None , the name of loss function
:param pre_pro : list of function or str, methods to reform parameters before calculating loss
the strings will be auto translated to pre-defined functions
:param **kwargs: kwargs for torch loss function

pre_pro funcsions should have three arguments: predict, truth, **arg
predict and truth is the necessary parameters in loss function
kwargs is the extra parameters passed-in when calling loss function
pre_pro functions should return two objects, respectively predict and truth that after processed

'''

if loss_name is None:
# this is useful when Trainer.__init__ performs type check
self._loss = None
else:
if not isinstance(loss_name, str):
raise NotImplementedError
else:
self._loss = self._get_loss(loss_name , **kwargs)

self.pre_pro = [f if callable(f) else method_dict.get(f) for f in pre_pro]

def add_pre_pro(self , func):
'''add a pre_pro function

:param func: a function or str, methods to reform parameters before calculating loss
the strings will be auto translated to pre-defined functions
'''
if not callable(func):
func = method_dict.get(func)
if func is None:
return
self.pre_pro.append(func)

@staticmethod
def _get_loss(loss_name , **kwargs):
'''Get loss function from torch

:param loss_name: str, the name of loss function
:param **kwargs: kwargs for torch loss function
:return: A callable loss function object
'''
loss_name = loss_name.strip().lower()
loss_name = "".join(loss_name.split("_"))

if len(loss_name) < 4 or loss_name[-4 : ] != "loss":
loss_name += "loss"
return loss_function_name[loss_name](**kwargs)

def get(self):
'''This method exists just for make some existing codes run error-freely
'''
return self

def __call__(self , predict , truth , **kwargs):
'''call a loss function
predict and truth will be processed by pre_pro methods in order of addition

:param predict : Tensor, model output
:param truth : Tensor, truth from dataset
:param **kwargs : extra arguments, pass to pre_pro functions
for example, if used unpad_mask() in pre_pro, there should be a kwarg named lens
'''
for f in self.pre_pro:
if f is None:
continue
predict , truth = f(predict , truth , **kwargs)

return self._loss(predict , truth)

+ 1
- 1
requirements.txt View File

@@ -1,4 +1,4 @@
numpy>=1.14.2
torch==0.4.0
torch>=0.4.0
torchvision>=0.1.8
tensorboardX

+ 315
- 0
test/core/test_loss.py View File

@@ -0,0 +1,315 @@
import os
import unittest

from fastNLP.core.dataset import DataSet
from fastNLP.core.metrics import SeqLabelEvaluator
from fastNLP.core.field import TextField, LabelField
from fastNLP.core.instance import Instance
from fastNLP.core.optimizer import Optimizer
from fastNLP.core.trainer import SeqLabelTrainer
from fastNLP.models.sequence_modeling import SeqLabeling

import fastNLP.core.loss as loss
import math
import torch as tc
import pdb

class TestLoss(unittest.TestCase):

def test_case_1(self):
#验证nllloss的原理

print (".----------------------------------")

loss_func = loss.Loss("nll")

#pdb.set_trace()

y = tc.Tensor(
[
[.3,.4,.3],
[.5,.3,.2],
[.3,.6,.1],
]
)

gy = tc.LongTensor(
[
0,
1,
2,
]
)


y = tc.log(y)
los = loss_func(y , gy)

r = -math.log(.3) - math.log(.3) - math.log(.1)
r /= 3
print ("loss = %f" % (los))
print ("r = %f" % (r))

self.assertEqual(int(los * 1000), int(r * 1000))

def test_case_2(self):
#验证squash()的正确性
print ("----------------------------------")

log = math.log

loss_func = loss.Loss("nll")

#pdb.set_trace()

y = tc.Tensor(
[
[[.3,.4,.3],[.3,.4,.3],],
[[.5,.3,.2],[.1,.2,.7],],
[[.3,.6,.1],[.2,.1,.7],],
]
)

gy = tc.LongTensor(
[
[0,2],
[1,2],
[2,1],
]
)


#pdb.set_trace()

y = tc.log(y)
los = loss_func(y , gy)
print ("loss = %f" % (los))

r = -log(.3) - log(.3) - log(.1) - log(.3) - log(.7) - log(.1)
r /= 6
print ("r = %f" % (r))

self.assertEqual(int(los * 1000), int(r * 1000))

def test_case_3(self):
#验证pack_padded_sequence()的正确性
print ("----------------------------------")

log = math.log

loss_func = loss.Loss("nll")

#pdb.set_trace()

y = tc.Tensor(
[
[[.3,.4,.3],[.3,.2,.5],[.4,.5,.1,],],
[[.5,.3,.2],[.1,.2,.7],[.0,.0,.0,],],
[[.3,.6,.1],[.0,.0,.0],[.0,.0,.0,],],
]
)

gy = tc.LongTensor(
[
[0,2,1,],
[1,2,0,],
[2,0,0,],
]
)

lens = [3,2,1]

#pdb.set_trace()

y = tc.log(y)

yy = tc.nn.utils.rnn.pack_padded_sequence(y , lens , batch_first = True).data
gyy = tc.nn.utils.rnn.pack_padded_sequence(gy , lens , batch_first = True).data
los = loss_func(yy , gyy)
print ("loss = %f" % (los))


r = -log(.3) - log(.5) - log(.5) - log(.3) - log(.7) - log(.1)
r /= 6
print ("r = %f" % (r))

self.assertEqual(int(los * 1000), int(r * 1000))

def test_case_4(self):
#验证unpad()的正确性
print ("----------------------------------")

log = math.log

#pdb.set_trace()

y = tc.Tensor(
[
[[.3,.4,.3],[.3,.2,.5],[.4,.5,.1,],[.6,.3,.1,],],
[[.5,.3,.2],[.1,.2,.7],[.0,.0,.0,],[.0,.0,.0,],],
[[.3,.6,.1],[.0,.0,.0],[.0,.0,.0,],[.0,.0,.0,],],
]
)

gy = tc.LongTensor(
[
[0,2,1,2,],
[1,2,0,0,],
[2,0,0,0,],
]
)

lens = [4,2,1]

#pdb.set_trace()

y = tc.log(y)

loss_func = loss.Loss("nll" , pre_pro = ["unpad"])
los = loss_func(y , gy , lens = lens)
print ("loss = %f" % (los))


r = -log(.1) -log(.3) - log(.5) - log(.5) - log(.3) - log(.7) - log(.1)
r /= 7
print ("r = %f" % (r))


self.assertEqual(int(los * 1000), int(r * 1000))

def test_case_5(self):
#验证mask()和make_mask()的正确性
print ("----------------------------------")

log = math.log

#pdb.set_trace()

y = tc.Tensor(
[
[[.5,.3,.2],[.1,.2,.7],[.0,.0,.0,],[.0,.0,.0,],],
[[.5,.4,.1],[.3,.2,.5],[.4,.5,.1,],[.6,.1,.3,],],
[[.3,.6,.1],[.3,.2,.5],[.0,.0,.0,],[.0,.0,.0,],],
]
)

gy = tc.LongTensor(
[
[1,2,0,0,],
[0,2,1,2,],
[2,1,0,0,],
]
)

mask = tc.ByteTensor(
[
[1,1,0,0,],
[1,1,1,1,],
[1,1,0,0,],
]
)

y = tc.log(y)

lens = [2,4,2]

loss_func = loss.Loss("nll" , pre_pro = ["mask"])
los = loss_func(y , gy , mask = mask)
print ("loss = %f" % (los))

los2 = loss_func(y , gy , mask = loss.make_mask(lens,gy.size()[-1]))
print ("loss2 = %f" % (los2))


r = -log(.3) -log(.7) - log(.5) - log(.5) - log(.5) - log(.3) - log(.1) - log(.2)
r /= 8
print ("r = %f" % (r))


self.assertEqual(int(los * 1000), int(r * 1000))
self.assertEqual(int(los2 * 1000), int(r * 1000))

def test_case_6(self):
#验证unpad_mask()的正确性
print ("----------------------------------")

log = math.log

#pdb.set_trace()

y = tc.Tensor(
[
[[.3,.4,.3],[.3,.2,.5],[.4,.5,.1,],[.6,.3,.1,],],
[[.5,.3,.2],[.1,.2,.7],[.0,.0,.0,],[.0,.0,.0,],],
[[.3,.6,.1],[.0,.0,.0],[.0,.0,.0,],[.0,.0,.0,],],
]
)

gy = tc.LongTensor(
[
[0,2,1,2,],
[1,2,0,0,],
[2,0,0,0,],
]
)

lens = [4,2,1]

#pdb.set_trace()

y = tc.log(y)

loss_func = loss.Loss("nll" , pre_pro = ["unpad_mask"])
los = loss_func(y , gy , lens = lens)
print ("loss = %f" % (los))


r = -log(.1) -log(.3) - log(.5) - log(.5) - log(.3) - log(.7) - log(.1)
r /= 7
print ("r = %f" % (r))

self.assertEqual(int(los * 1000), int(r * 1000))

def test_case_7(self):
#验证一些其他东西
print ("----------------------------------")

log = math.log

#pdb.set_trace()

y = tc.Tensor(
[
[[.3,.4,.3],[.3,.2,.5],[.4,.5,.1,],[.6,.3,.1,],],
[[.5,.3,.2],[.1,.2,.7],[.0,.0,.0,],[.0,.0,.0,],],
[[.3,.6,.1],[.0,.0,.0],[.0,.0,.0,],[.0,.0,.0,],],
]
)

gy = tc.LongTensor(
[
[0,2,1,2,],
[1,2,0,0,],
[2,0,0,0,],
]
)

lens = [4,2,1]

#pdb.set_trace()

y = tc.log(y)

loss_func = loss.Loss("nll" , pre_pro = [] , weight = tc.Tensor([1,1,0]))
loss_func.add_pre_pro("unpad_mask")
los = loss_func(y , gy , lens = lens)
print ("loss = %f" % (los))


r = - log(.3) - log(.5) - log(.3)
r /= 3
print ("r = %f" % (r))
self.assertEqual(int(los * 1000), int(r * 1000))

if __name__ == "__main__":
unittest.main()

Loading…
Cancel
Save