Merge pull request #106 from FFTYYY/master

update loss & a small change in requirements
6 years ago · abf840c376
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ fastNLP is a modular Natural Language Processing system based on PyTorch, for fa
 ## Requirements

 - numpy>=1.14.2
 - torch==0.4.0
 - torch>=0.4.0
 - torchvision>=0.1.8
 - tensorboardX

--- a/fastNLP/core/loss.py
+++ b/fastNLP/core/loss.py
@@ -1,58 +1,196 @@
 import torch

 def squash(predict , truth , **kwargs):
 	'''To reshape tensors in order to fit Loss functions in pytorch

 	:param predict	: Tensor, model output
 	:param truth	: Tensor, truth from dataset
 	:param **kwargs : extra arguments

 	:return predict , truth: predict & truth after processing
 	'''
 	return predict.view(-1 , predict.size()[-1]) , truth.view(-1,)

 def unpad(predict , truth , **kwargs):
 	'''To process padded sequence output to get true loss
 	Using pack_padded_sequence() method
 	This method contains squash()

 	:param predict	: Tensor, [batch_size , max_len , tag_size]
 	:param truth	: Tensor, [batch_size , max_len]
 	:param **kwargs : extra arguments, kwargs["lens"] is expected to be exsist
 		kwargs["lens"] : list or LongTensor, [batch_size]
 					  the i-th element is true lengths of i-th sequence  
 	
 	:return predict , truth: predict & truth after processing
 	'''
 	if kwargs.get("lens") is None:
 		return predict , truth
 	lens = torch.LongTensor(kwargs["lens"])
 	lens , idx = torch.sort(lens , descending = True)
 	predict = torch.nn.utils.rnn.pack_padded_sequence(predict[idx] , lens , batch_first = True).data
 	truth = torch.nn.utils.rnn.pack_padded_sequence(truth[idx] , lens , batch_first = True).data
 	return predict , truth

 def unpad_mask(predict , truth , **kwargs):
 	'''To process padded sequence output to get true loss
 	Using mask() method
 	This method contains squash()

 	:param predict	: Tensor, [batch_size , max_len , tag_size]
 	:param truth	: Tensor, [batch_size , max_len]
 	:param **kwargs : extra arguments, kwargs["lens"] is expected to be exsist
 		kwargs["lens"] : list or LongTensor, [batch_size]
 					  the i-th element is true lengths of i-th sequence  
 	
 	:return predict , truth: predict & truth after processing
 	'''
 	if kwargs.get("lens") is None:
 		return predict , truth
 	mas = make_mask(kwargs["lens"] , truth.size()[1])
 	return mask(predict , truth , mask = mas)

 def mask(predict , truth , **kwargs):
 	'''To select specific elements from Tensor
 	This method contains squash()

 	:param predict	: Tensor, [batch_size , max_len , tag_size]
 	:param truth	: Tensor, [batch_size , max_len]
 	:param **kwargs : extra arguments, kwargs["mask"] is expected to be exsist
 		kwargs["mask"] : ByteTensor, [batch_size , max_len]
 					  the mask Tensor , the position that is 1 will be selected
 	
 	:return predict , truth: predict & truth after processing
 	'''
 	if kwargs.get("mask") is None:
 		return predict , truth
 	mask = kwargs["mask"]
 	
 	predict , truth = squash(predict , truth)
 	mask = mask.view(-1,)

 	predict = torch.masked_select(predict.permute(1,0) , mask).view(predict.size()[-1] , -1).permute(1,0)
 	truth = torch.masked_select(truth , mask)

 	return predict , truth

 def make_mask(lens , tar_len):
 	'''to generate a mask that select [:lens[i]] for i-th element
 	embezzle from fastNLP.models.sequence_modeling.seq_mask

 	:param lens		: list or LongTensor, [batch_size]
 	:param tar_len	: int
 	
 	:return mask 	: ByteTensor
 	'''
 	lens = torch.LongTensor(lens)
 	mask = [torch.ge(lens, i + 1) for i in range(tar_len)]
 	mask = torch.stack(mask, 1)
 	return mask

 #map string to function. Just for more elegant using
 method_dict = {
 	"squash" : squash,
 	"unpad" : unpad,
 	"unpad_mask" : unpad_mask,
 	"mask" : mask,
 }

 loss_function_name = {
 	"L1Loss".lower() : torch.nn.L1Loss,
 	"BCELoss".lower() : torch.nn.BCELoss,
 	"MSELoss".lower() : torch.nn.MSELoss,
 	"NLLLoss".lower() : torch.nn.NLLLoss,
 	"KLDivLoss".lower() : torch.nn.KLDivLoss,
 	"NLLLoss2dLoss".lower() : torch.nn.NLLLoss2d,	#every name should end with "loss"
 	"SmoothL1Loss".lower() : torch.nn.SmoothL1Loss,
 	"SoftMarginLoss".lower() : torch.nn.SoftMarginLoss,
 	"PoissonNLLLoss".lower() : torch.nn.PoissonNLLLoss,
 	"MultiMarginLoss".lower() : torch.nn.MultiMarginLoss,
 	"CrossEntropyLoss".lower() : torch.nn.CrossEntropyLoss,
 	"BCEWithLogitsLoss".lower() : torch.nn.BCEWithLogitsLoss,
 	"MarginRankingLoss".lower() : torch.nn.MarginRankingLoss,
 	"TripletMarginLoss".lower() : torch.nn.TripletMarginLoss,
 	"HingeEmbeddingLoss".lower() : torch.nn.HingeEmbeddingLoss,
 	"CosineEmbeddingLoss".lower() : torch.nn.CosineEmbeddingLoss,
 	"MultiLabelMarginLoss".lower() : torch.nn.MultiLabelMarginLoss,
 	"MultiLabelSoftMarginLoss".lower() : torch.nn.MultiLabelSoftMarginLoss,
 }

 class Loss(object):
    """Loss function of the algorithm,
    either the wrapper of a loss function from framework, or a user-defined loss (need pytorch auto_grad support)

    """

    def __init__(self, args):
        """

        :param args: None or str, the name of a loss function.

        """
        if args is None:
            # this is useful when Trainer.__init__ performs type check
            self._loss = None
        elif isinstance(args, str):
            self._loss = self._borrow_from_pytorch(args)
        else:
            raise NotImplementedError

    def get(self):
        """

        :return self._loss: the loss function
        """
        return self._loss

    @staticmethod
    def _borrow_from_pytorch(loss_name):
        """Given a name of a loss function, return it from PyTorch.

        :param loss_name: str, the name of a loss function

                - cross_entropy: combines log softmax and nll loss in a single function.
                - nll: negative log likelihood

        :return loss: a PyTorch loss
        """

        class InnerCrossEntropy:
            """A simple wrapper to guarantee input shapes."""

            def __init__(self):
                self.f = torch.nn.CrossEntropyLoss()

            def __call__(self, predict, truth):
                truth = truth.view(-1, )
                return self.f(predict, truth)

        if loss_name == "cross_entropy":
            return InnerCrossEntropy()
        elif loss_name == 'nll':
            return torch.nn.NLLLoss()
        else:
            raise NotImplementedError
 	'''a Loss object is a callable object represents loss functions
 	'''

 	def __init__(self , loss_name , pre_pro = [squash], **kwargs):
 		'''

 		:param loss_name: str or None , the name of loss function
 		:param pre_pro 	: list of function or str, methods to reform parameters before calculating loss
 			the strings will be auto translated to pre-defined functions
 		:param **kwargs: kwargs for torch loss function

 		pre_pro funcsions should have three arguments: predict, truth, **arg
 			predict and truth is the necessary parameters in loss function
 			kwargs is the extra parameters passed-in when calling loss function
 		pre_pro functions should return two objects, respectively predict and truth that after processed

 		'''

 		if loss_name is None:
 			# this is useful when Trainer.__init__ performs type check
 			self._loss = None
 		else:
 			if not isinstance(loss_name, str):
 				raise NotImplementedError
 			else:
 				self._loss = self._get_loss(loss_name , **kwargs)

 		self.pre_pro = [f if callable(f) else method_dict.get(f) for f in pre_pro]

 	def add_pre_pro(self , func):
 		'''add a pre_pro function

 		:param func: a function or str, methods to reform parameters before calculating loss
 			the strings will be auto translated to pre-defined functions
 		'''
 		if not callable(func):
 			func = method_dict.get(func)
 			if func is None:
 				return
 		self.pre_pro.append(func)

 	@staticmethod
 	def _get_loss(loss_name , **kwargs):
 		'''Get loss function from torch

 		:param loss_name: str, the name of loss function
 		:param **kwargs: kwargs for torch loss function
 		:return: A callable loss function object
 		'''
 		loss_name = loss_name.strip().lower()
 		loss_name = "".join(loss_name.split("_"))

 		if len(loss_name) < 4 or loss_name[-4 : ] != "loss":
 			loss_name += "loss"
 		return loss_function_name[loss_name](**kwargs)

 	def get(self):
 		'''This method exists just for make some existing codes run error-freely
 		'''
 		return self

 	def __call__(self , predict , truth , **kwargs):
 		'''call a loss function
 		predict and truth will be processed by pre_pro methods in order of addition 

 		:param predict	: Tensor, model output
 		:param truth 	: Tensor, truth from dataset 
 		:param **kwargs : extra arguments, pass to pre_pro functions
 			for example, if used unpad_mask() in pre_pro, there should be a kwarg named lens
 		'''
 		for f in self.pre_pro:
 			if f is None:
 				continue
 			predict , truth = f(predict , truth , **kwargs)

 		return self._loss(predict , truth)
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
 numpy>=1.14.2
 torch==0.4.0
 torch>=0.4.0
 torchvision>=0.1.8
 tensorboardX
--- a/test/core/test_loss.py
+++ b/test/core/test_loss.py
@@ -0,0 +1,315 @@
 import os
 import unittest

 from fastNLP.core.dataset import DataSet
 from fastNLP.core.metrics import SeqLabelEvaluator
 from fastNLP.core.field import TextField, LabelField
 from fastNLP.core.instance import Instance
 from fastNLP.core.optimizer import Optimizer
 from fastNLP.core.trainer import SeqLabelTrainer
 from fastNLP.models.sequence_modeling import SeqLabeling

 import fastNLP.core.loss as loss
 import math
 import torch as tc
 import pdb

 class TestLoss(unittest.TestCase):

 	def test_case_1(self):
 		#验证nllloss的原理

 		print (".----------------------------------")

 		loss_func = loss.Loss("nll")

 		#pdb.set_trace()

 		y = tc.Tensor(
 				[
 					[.3,.4,.3],
 					[.5,.3,.2],
 					[.3,.6,.1],
 				]
 			)

 		gy = tc.LongTensor(
 				[
 					0,
 					1,
 					2,
 				]
 			)


 		y = tc.log(y)
 		los = loss_func(y , gy)

 		r = -math.log(.3) - math.log(.3) - math.log(.1)
 		r /= 3
 		print ("loss = %f" % (los))
 		print ("r = %f" % (r))

 		self.assertEqual(int(los * 1000), int(r * 1000))

 	def test_case_2(self):
 		#验证squash()的正确性
 		print ("----------------------------------")

 		log = math.log

 		loss_func = loss.Loss("nll")

 		#pdb.set_trace()

 		y = tc.Tensor(
 				[
 					[[.3,.4,.3],[.3,.4,.3],],
 					[[.5,.3,.2],[.1,.2,.7],],
 					[[.3,.6,.1],[.2,.1,.7],],
 				]
 			)

 		gy = tc.LongTensor(
 				[
 					[0,2],
 					[1,2],
 					[2,1],
 				]
 			)


 		#pdb.set_trace()

 		y = tc.log(y)
 		los = loss_func(y , gy)
 		print ("loss = %f" % (los))

 		r = -log(.3) - log(.3) - log(.1) - log(.3) - log(.7) - log(.1)
 		r /= 6
 		print ("r = %f" % (r))

 		self.assertEqual(int(los * 1000), int(r * 1000))

 	def test_case_3(self):
 		#验证pack_padded_sequence()的正确性
 		print ("----------------------------------")

 		log = math.log

 		loss_func = loss.Loss("nll")

 		#pdb.set_trace()

 		y = tc.Tensor(
 				[
 					[[.3,.4,.3],[.3,.2,.5],[.4,.5,.1,],],
 					[[.5,.3,.2],[.1,.2,.7],[.0,.0,.0,],],
 					[[.3,.6,.1],[.0,.0,.0],[.0,.0,.0,],],
 				]
 			)

 		gy = tc.LongTensor(
 				[
 					[0,2,1,],
 					[1,2,0,],
 					[2,0,0,],
 				]
 			)

 		lens = [3,2,1]

 		#pdb.set_trace()

 		y = tc.log(y)

 		yy = tc.nn.utils.rnn.pack_padded_sequence(y , lens , batch_first = True).data
 		gyy = tc.nn.utils.rnn.pack_padded_sequence(gy , lens , batch_first = True).data
 		los = loss_func(yy , gyy)
 		print ("loss = %f" % (los))


 		r = -log(.3) - log(.5) - log(.5) - log(.3) - log(.7) - log(.1)
 		r /= 6
 		print ("r = %f" % (r))

 		self.assertEqual(int(los * 1000), int(r * 1000))

 	def test_case_4(self):
 		#验证unpad()的正确性
 		print ("----------------------------------")

 		log = math.log

 		#pdb.set_trace()

 		y = tc.Tensor(
 				[
 					[[.3,.4,.3],[.3,.2,.5],[.4,.5,.1,],[.6,.3,.1,],],
 					[[.5,.3,.2],[.1,.2,.7],[.0,.0,.0,],[.0,.0,.0,],],
 					[[.3,.6,.1],[.0,.0,.0],[.0,.0,.0,],[.0,.0,.0,],],
 				]
 			)

 		gy = tc.LongTensor(
 				[
 					[0,2,1,2,],
 					[1,2,0,0,],
 					[2,0,0,0,],
 				]
 			)

 		lens = [4,2,1]

 		#pdb.set_trace()

 		y = tc.log(y)

 		loss_func = loss.Loss("nll" , pre_pro = ["unpad"])
 		los = loss_func(y , gy , lens = lens)
 		print ("loss = %f" % (los))


 		r = -log(.1) -log(.3) - log(.5) - log(.5) - log(.3) - log(.7) - log(.1)
 		r /= 7
 		print ("r = %f" % (r))


 		self.assertEqual(int(los * 1000), int(r * 1000))

 	def test_case_5(self):
 		#验证mask()和make_mask()的正确性
 		print ("----------------------------------")

 		log = math.log

 		#pdb.set_trace()

 		y = tc.Tensor(
 				[
 					[[.5,.3,.2],[.1,.2,.7],[.0,.0,.0,],[.0,.0,.0,],],
 					[[.5,.4,.1],[.3,.2,.5],[.4,.5,.1,],[.6,.1,.3,],],
 					[[.3,.6,.1],[.3,.2,.5],[.0,.0,.0,],[.0,.0,.0,],],
 				]
 			)

 		gy = tc.LongTensor(
 				[
 					[1,2,0,0,],
 					[0,2,1,2,],
 					[2,1,0,0,],
 				]
 			)

 		mask = tc.ByteTensor(
 				[
 					[1,1,0,0,],
 					[1,1,1,1,],
 					[1,1,0,0,],
 				]
 			)

 		y = tc.log(y)

 		lens = [2,4,2]

 		loss_func = loss.Loss("nll" , pre_pro = ["mask"])
 		los = loss_func(y , gy , mask = mask)
 		print ("loss = %f" % (los))

 		los2 = loss_func(y , gy , mask = loss.make_mask(lens,gy.size()[-1]))
 		print ("loss2 = %f" % (los2))


 		r = -log(.3) -log(.7) - log(.5) - log(.5) - log(.5) - log(.3) - log(.1) - log(.2)
 		r /= 8
 		print ("r = %f" % (r))


 		self.assertEqual(int(los * 1000), int(r * 1000))
 		self.assertEqual(int(los2 * 1000), int(r * 1000))

 	def test_case_6(self):
 		#验证unpad_mask()的正确性
 		print ("----------------------------------")

 		log = math.log

 		#pdb.set_trace()

 		y = tc.Tensor(
 				[
 					[[.3,.4,.3],[.3,.2,.5],[.4,.5,.1,],[.6,.3,.1,],],
 					[[.5,.3,.2],[.1,.2,.7],[.0,.0,.0,],[.0,.0,.0,],],
 					[[.3,.6,.1],[.0,.0,.0],[.0,.0,.0,],[.0,.0,.0,],],
 				]
 			)

 		gy = tc.LongTensor(
 				[
 					[0,2,1,2,],
 					[1,2,0,0,],
 					[2,0,0,0,],
 				]
 			)

 		lens = [4,2,1]

 		#pdb.set_trace()

 		y = tc.log(y)

 		loss_func = loss.Loss("nll" , pre_pro = ["unpad_mask"])
 		los = loss_func(y , gy , lens = lens)
 		print ("loss = %f" % (los))


 		r = -log(.1) -log(.3) - log(.5) - log(.5) - log(.3) - log(.7) - log(.1)
 		r /= 7
 		print ("r = %f" % (r))

 		self.assertEqual(int(los * 1000), int(r * 1000))

 	def test_case_7(self):
 		#验证一些其他东西
 		print ("----------------------------------")

 		log = math.log

 		#pdb.set_trace()

 		y = tc.Tensor(
 				[
 					[[.3,.4,.3],[.3,.2,.5],[.4,.5,.1,],[.6,.3,.1,],],
 					[[.5,.3,.2],[.1,.2,.7],[.0,.0,.0,],[.0,.0,.0,],],
 					[[.3,.6,.1],[.0,.0,.0],[.0,.0,.0,],[.0,.0,.0,],],
 				]
 			)

 		gy = tc.LongTensor(
 				[
 					[0,2,1,2,],
 					[1,2,0,0,],
 					[2,0,0,0,],
 				]
 			)

 		lens = [4,2,1]

 		#pdb.set_trace()

 		y = tc.log(y)

 		loss_func = loss.Loss("nll" , pre_pro = [] , weight = tc.Tensor([1,1,0]))
 		loss_func.add_pre_pro("unpad_mask")
 		los = loss_func(y , gy , lens = lens)
 		print ("loss = %f" % (los))


 		r = - log(.3) - log(.5) - log(.3)
 		r /= 3
 		print ("r = %f" % (r))
 		self.assertEqual(int(los * 1000), int(r * 1000))

 if __name__ == "__main__":
 	unittest.main()