Merge branch 'master' into dev

6 years ago · b80e5e8b29
--- a/fastNLP/core/dataset.py
+++ b/fastNLP/core/dataset.py
@@ -32,8 +32,18 @@ class DataSet(list):
        return self

    def index_field(self, field_name, vocab):
        for ins in self:
            ins.index_field(field_name, vocab)
        if isinstance(field_name, str):
            field_list = [field_name]
            vocab_list = [vocab]
        else:
            classes = (list, tuple)
            assert isinstance(field_name, classes) and isinstance(vocab, classes) and len(field_name) == len(vocab)
            field_list = field_name
            vocab_list = vocab

        for name, vocabs in zip(field_list, vocab_list):
            for ins in self:
                ins.index_field(name, vocabs)
        return self

    def to_tensor(self, idx: int, padding_length: dict):
--- a/fastNLP/core/metrics.py
+++ b/fastNLP/core/metrics.py
@@ -57,6 +57,20 @@ class SeqLabelEvaluator(Evaluator):
        return {"accuracy": float(accuracy)}


 class SNLIEvaluator(Evaluator):
    def __init__(self):
        super(SNLIEvaluator, self).__init__()

    def __call__(self, predict, truth):
        y_prob = [torch.nn.functional.softmax(y_logit, dim=-1) for y_logit in predict]
        y_prob = torch.cat(y_prob, dim=0)
        y_pred = torch.argmax(y_prob, dim=-1)
        truth = [t['truth'] for t in truth]
        y_true = torch.cat(truth, dim=0).view(-1)
        acc = float(torch.sum(y_pred == y_true)) / y_true.size(0)
        return {"accuracy": acc}


 def _conver_numpy(x):
    """convert input data to numpy array

--- a/fastNLP/core/tester.py
+++ b/fastNLP/core/tester.py
@@ -83,6 +83,7 @@ class Tester(object):
            truth_list.append(batch_y)
        eval_results = self.evaluate(output_list, truth_list)
        print("[tester] {}".format(self.print_eval_results(eval_results)))
        logger.info("[tester] {}".format(self.print_eval_results(eval_results)))

    def mode(self, model, is_test=False):
        """Train mode or Test mode. This is for PyTorch currently.
@@ -131,3 +132,10 @@ class ClassificationTester(Tester):
        print(
            "[FastNLP Warning] ClassificationTester will be deprecated. Please use Tester directly.")
        super(ClassificationTester, self).__init__(**test_args)


 class SNLITester(Tester):
    def __init__(self, **test_args):
        print(
            "[FastNLP Warning] SNLITester will be deprecated. Please use Tester directly.")
        super(SNLITester, self).__init__(**test_args)
--- a/fastNLP/core/trainer.py
+++ b/fastNLP/core/trainer.py
@@ -10,7 +10,7 @@ from fastNLP.core.loss import Loss
 from fastNLP.core.metrics import Evaluator
 from fastNLP.core.optimizer import Optimizer
 from fastNLP.core.sampler import RandomSampler
 from fastNLP.core.tester import SeqLabelTester, ClassificationTester
 from fastNLP.core.tester import SeqLabelTester, ClassificationTester, SNLITester
 from fastNLP.saver.logger import create_logger
 from fastNLP.saver.model_saver import ModelSaver

@@ -162,7 +162,7 @@ class Trainer(object):
            if kwargs["n_print"] > 0 and step % kwargs["n_print"] == 0:
                end = time.time()
                diff = timedelta(seconds=round(end - kwargs["start"]))
                print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.2} time: {}".format(
                print_output = "[epoch: {:>3} step: {:>4}] train loss: {:>4.6} time: {}".format(
                    kwargs["epoch"], step, loss.data, diff)
                print(print_output)
                logger.info(print_output)
@@ -292,3 +292,15 @@ class ClassificationTrainer(Trainer):

    def _create_validator(self, valid_args):
        return ClassificationTester(**valid_args)


 class SNLITrainer(Trainer):
    """Trainer for text SNLI."""

    def __init__(self, **train_args):
        print(
            "[FastNLP Warning] SNLITrainer will be deprecated. Please use Trainer directly.")
        super(SNLITrainer, self).__init__(**train_args)

    def _create_validator(self, valid_args):
        return SNLITester(**valid_args)
--- a/fastNLP/core/vocabulary.py
+++ b/fastNLP/core/vocabulary.py
@@ -18,6 +18,7 @@ def isiterable(p_object):
        return False
    return True


 def check_build_vocab(func):
    def _wrapper(self, *args, **kwargs):
        if self.word2idx is None:
@@ -28,6 +29,7 @@ def check_build_vocab(func):
        return func(self, *args, **kwargs)
    return _wrapper


 class Vocabulary(object):
    """Use for word and index one to one mapping

@@ -52,7 +54,6 @@ class Vocabulary(object):
        self.word2idx = None
        self.idx2word = None


    def update(self, word):
        """add word or list of words into Vocabulary

@@ -71,7 +72,6 @@ class Vocabulary(object):
            self.word2idx = None
        return self


    def build_vocab(self):
        """build 'word to index' dict, and filter the word using `max_size` and `min_freq`
        """
@@ -164,3 +164,11 @@ class Vocabulary(object):
        """
        self.__dict__.update(state)
        self.idx2word = None

    def __contains__(self, item):
        """Check if a word in vocabulary.

        :param item: the word
        :return: True or False
        """
        return self.has_word(item)
--- a/fastNLP/loader/dataset_loader.py
+++ b/fastNLP/loader/dataset_loader.py
@@ -5,6 +5,7 @@ from fastNLP.core.dataset import DataSet
 from fastNLP.core.instance import Instance
 from fastNLP.core.field import *


 def convert_seq_dataset(data):
    """Create an DataSet instance that contains no labels.

@@ -23,6 +24,7 @@ def convert_seq_dataset(data):
        dataset.append(Instance(word_seq=x))
    return dataset


 def convert_seq2tag_dataset(data):
    """Convert list of data into DataSet

@@ -45,6 +47,7 @@ def convert_seq2tag_dataset(data):
        dataset.append(ins)
    return dataset


 def convert_seq2seq_dataset(data):
    """Convert list of data into DataSet

@@ -84,6 +87,7 @@ class DataSetLoader(BaseLoader):
        """
        raise NotImplementedError


@DataSet.set_reader('read_raw')
 class RawDataSetLoader(DataSetLoader):
    def __init__(self):
@@ -99,6 +103,7 @@ class RawDataSetLoader(DataSetLoader):
    def convert(self, data):
        return convert_seq_dataset(data)


@DataSet.set_reader('read_pos')
 class POSDataSetLoader(DataSetLoader):
    """Dataset Loader for POS Tag datasets.
@@ -168,6 +173,7 @@ class POSDataSetLoader(DataSetLoader):
        """
        return convert_seq2seq_dataset(data)


@DataSet.set_reader('read_tokenize')
 class TokenizeDataSetLoader(DataSetLoader):
    """
@@ -227,6 +233,7 @@ class TokenizeDataSetLoader(DataSetLoader):
    def convert(self, data):
        return convert_seq2seq_dataset(data)


@DataSet.set_reader('read_class')
 class ClassDataSetLoader(DataSetLoader):
    """Loader for classification data sets"""
@@ -265,6 +272,7 @@ class ClassDataSetLoader(DataSetLoader):
    def convert(self, data):
        return convert_seq2tag_dataset(data)


@DataSet.set_reader('read_conll')
 class ConllLoader(DataSetLoader):
    """loader for conll format files"""
@@ -306,6 +314,7 @@ class ConllLoader(DataSetLoader):
    def convert(self, data):
        pass


@DataSet.set_reader('read_lm')
 class LMDataSetLoader(DataSetLoader):
    """Language Model Dataset Loader
@@ -342,6 +351,7 @@ class LMDataSetLoader(DataSetLoader):
    def convert(self, data):
        pass


@DataSet.set_reader('read_people_daily')
 class PeopleDailyCorpusLoader(DataSetLoader):
    """
@@ -394,3 +404,72 @@ class PeopleDailyCorpusLoader(DataSetLoader):

    def convert(self, data):
        pass


 class SNLIDataSetLoader(DataSetLoader):
    """A data set loader for SNLI data set.

    """

    def __init__(self):
        super(SNLIDataSetLoader, self).__init__()

    def load(self, path_list):
        """

        :param path_list: A list of file name, in the order of premise file, hypothesis file, and label file.
        :return: data_set: A DataSet object.
        """
        assert len(path_list) == 3
        line_set = []
        for file in path_list:
            if not os.path.exists(file):
                raise FileNotFoundError("file {} NOT found".format(file))

            with open(file, 'r', encoding='utf-8') as f:
                lines = f.readlines()
                line_set.append(lines)

        premise_lines, hypothesis_lines, label_lines = line_set
        assert len(premise_lines) == len(hypothesis_lines) and len(premise_lines) == len(label_lines)

        data_set = []
        for premise, hypothesis, label in zip(premise_lines, hypothesis_lines, label_lines):
            p = premise.strip().split()
            h = hypothesis.strip().split()
            l = label.strip()
            data_set.append([p, h, l])

        return self.convert(data_set)

    def convert(self, data):
        """Convert a 3D list to a DataSet object.

        :param data: A 3D tensor.
            [
                [ [premise_word_11, premise_word_12, ...], [hypothesis_word_11, hypothesis_word_12, ...], [label_1] ],
                [ [premise_word_21, premise_word_22, ...], [hypothesis_word_21, hypothesis_word_22, ...], [label_2] ],
                ...
            ]
        :return: data_set: A DataSet object.
        """

        data_set = DataSet()

        for example in data:
            p, h, l = example
            # list, list, str
            x1 = TextField(p, is_target=False)
            x2 = TextField(h, is_target=False)
            x1_len = TextField([1] * len(p), is_target=False)
            x2_len = TextField([1] * len(h), is_target=False)
            y = LabelField(l, is_target=True)
            instance = Instance()
            instance.add_field("premise", x1)
            instance.add_field("hypothesis", x2)
            instance.add_field("premise_len", x1_len)
            instance.add_field("hypothesis_len", x2_len)
            instance.add_field("truth", y)
            data_set.append(instance)

        return data_set
--- a/fastNLP/loader/embed_loader.py
+++ b/fastNLP/loader/embed_loader.py
@@ -6,11 +6,12 @@ import torch
 from fastNLP.loader.base_loader import BaseLoader
 from fastNLP.core.vocabulary import Vocabulary


 class EmbedLoader(BaseLoader):
    """docstring for EmbedLoader"""

    def __init__(self, data_path):
        super(EmbedLoader, self).__init__(data_path)
    def __init__(self):
        super(EmbedLoader, self).__init__()

    @staticmethod
    def _load_glove(emb_file):
@@ -55,15 +56,15 @@ class EmbedLoader(BaseLoader):
        :param emb_type: str, the pre-trained embedding format, support glove now
        :param vocab: Vocabulary, a mapping from word to index, can be provided by user or built from pre-trained embedding
        :param emb_pkl: str, the embedding pickle file.
        :return embedding_np: numpy array of shape (len(word_dict), emb_dim)
        :return embedding_tensor: Tensor of shape (len(word_dict), emb_dim)
                vocab: input vocab or vocab built by pre-train
        TODO: fragile code
        """
        # If the embedding pickle exists, load it and return.
        if os.path.exists(emb_pkl):
            with open(emb_pkl, "rb") as f:
                embedding_np, vocab = _pickle.load(f)
            return embedding_np, vocab
                embedding_tensor, vocab = _pickle.load(f)
            return embedding_tensor, vocab
        # Otherwise, load the pre-trained embedding.
        pretrain = EmbedLoader._load_pretrain(emb_file, emb_type)
        if vocab is None:
@@ -71,14 +72,14 @@ class EmbedLoader(BaseLoader):
            vocab = Vocabulary()
            for w in pretrain.keys():
                vocab.update(w)
        embedding_np = torch.randn(len(vocab), emb_dim)
        embedding_tensor = torch.randn(len(vocab), emb_dim)
        for w, v in pretrain.items():
            if len(v.shape) > 1 or emb_dim != v.shape[0]:
                raise ValueError('pretrian embedding dim is {}, dismatching required {}'.format(v.shape, (emb_dim,)))
            if vocab.has_word(w):
                embedding_np[vocab[w]] = v
                embedding_tensor[vocab[w]] = v

        # save and return the result
        with open(emb_pkl, "wb") as f:
            _pickle.dump((embedding_np, vocab), f)
        return embedding_np, vocab
            _pickle.dump((embedding_tensor, vocab), f)
        return embedding_tensor, vocab
--- a/fastNLP/models/snli.py
+++ b/fastNLP/models/snli.py
@@ -0,0 +1,161 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F

 from fastNLP.models.base_model import BaseModel
 from fastNLP.modules import decoder as Decoder, encoder as Encoder


 my_inf = 10e12


 class SNLI(BaseModel):
    """
    PyTorch Network for SNLI.
    """

    def __init__(self, args, init_embedding=None):
        super(SNLI, self).__init__()
        self.vocab_size = args["vocab_size"]
        self.embed_dim = args["embed_dim"]
        self.hidden_size = args["hidden_size"]
        self.batch_first = args["batch_first"]
        self.dropout = args["dropout"]
        self.n_labels = args["num_classes"]
        self.gpu = args["gpu"] and torch.cuda.is_available()

        self.embedding = Encoder.embedding.Embedding(self.vocab_size, self.embed_dim, init_emb=init_embedding,
                                                     dropout=self.dropout)

        self.embedding_layer = Encoder.Linear(self.embed_dim, self.hidden_size)

        self.encoder = Encoder.LSTM(
            input_size=self.embed_dim, hidden_size=self.hidden_size, num_layers=1, bias=True,
            batch_first=self.batch_first, bidirectional=True
        )

        self.inference_layer = Encoder.Linear(self.hidden_size * 4, self.hidden_size)

        self.decoder = Encoder.LSTM(
            input_size=self.hidden_size, hidden_size=self.hidden_size, num_layers=1, bias=True,
            batch_first=self.batch_first, bidirectional=True
        )

        self.output = Decoder.MLP([4 * self.hidden_size, self.hidden_size, self.n_labels], 'tanh')

    def forward(self, premise, hypothesis, premise_len, hypothesis_len):
        """ Forward function

        :param premise: A Tensor represents premise: [batch size(B), premise seq len(PL), hidden size(H)].
        :param hypothesis: A Tensor represents hypothesis: [B, hypothesis seq len(HL), H].
        :param premise_len: A Tensor record which is a real word and which is a padding word in premise: [B, PL].
        :param hypothesis_len: A Tensor record which is a real word and which is a padding word in hypothesis: [B, HL].
        :return: prediction: A Tensor of classification result: [B, n_labels(N)].
        """

        premise0 = self.embedding_layer(self.embedding(premise))
        hypothesis0 = self.embedding_layer(self.embedding(hypothesis))

        _BP, _PSL, _HP = premise0.size()
        _BH, _HSL, _HH = hypothesis0.size()
        _BPL, _PLL = premise_len.size()
        _HPL, _HLL = hypothesis_len.size()

        assert _BP == _BH and _BPL == _HPL and _BP == _BPL
        assert _HP == _HH
        assert _PSL == _PLL and _HSL == _HLL

        B, PL, H = premise0.size()
        B, HL, H = hypothesis0.size()

        # a0, (ah0, ac0) = self.encoder(premise)  # a0: [B, PL, H * 2], ah0: [2, B, H]
        # b0, (bh0, bc0) = self.encoder(hypothesis)  # b0: [B, HL, H * 2]

        a0 = self.encoder(premise0)  # a0: [B, PL, H * 2]
        b0 = self.encoder(hypothesis0)  # b0: [B, HL, H * 2]

        a = torch.mean(a0.view(B, PL, -1, H), dim=2)  # a: [B, PL, H]
        b = torch.mean(b0.view(B, HL, -1, H), dim=2)  # b: [B, HL, H]

        ai, bi = self.calc_bi_attention(a, b, premise_len, hypothesis_len)

        ma = torch.cat((a, ai, a - ai, a * ai), dim=2)  # ma: [B, PL, 4 * H]
        mb = torch.cat((b, bi, b - bi, b * bi), dim=2)  # mb: [B, HL, 4 * H]

        f_ma = self.inference_layer(ma)
        f_mb = self.inference_layer(mb)

        vat = self.decoder(f_ma)
        vbt = self.decoder(f_mb)

        va = torch.mean(vat.view(B, PL, -1, H), dim=2)  # va: [B, PL, H]
        vb = torch.mean(vbt.view(B, HL, -1, H), dim=2)  # vb: [B, HL, H]

        # va_ave = torch.mean(va, dim=1)  # va_ave: [B, H]
        # va_max, va_arg_max = torch.max(va, dim=1)  # va_max: [B, H]
        # vb_ave = torch.mean(vb, dim=1)  # vb_ave: [B, H]
        # vb_max, vb_arg_max = torch.max(vb, dim=1)  # vb_max: [B, H]

        va_ave = self.mean_pooling(va, premise_len, dim=1)  # va_ave: [B, H]
        va_max, va_arg_max = self.max_pooling(va, premise_len, dim=1)  # va_max: [B, H]
        vb_ave = self.mean_pooling(vb, hypothesis_len, dim=1)  # vb_ave: [B, H]
        vb_max, vb_arg_max = self.max_pooling(vb, hypothesis_len, dim=1)  # vb_max: [B, H]

        v = torch.cat((va_ave, va_max, vb_ave, vb_max), dim=1)  # v: [B, 4 * H]

        # v_mlp = F.tanh(self.mlp_layer1(v))  # v_mlp: [B, H]
        # prediction = self.mlp_layer2(v_mlp)  # prediction: [B, N]

        prediction = F.tanh(self.output(v))  # prediction: [B, N]

        return prediction

    @staticmethod
    def calc_bi_attention(in_x1, in_x2, x1_len, x2_len):

        # in_x1: [batch_size, x1_seq_len, hidden_size]
        # in_x2: [batch_size, x2_seq_len, hidden_size]
        # x1_len: [batch_size, x1_seq_len]
        # x2_len: [batch_size, x2_seq_len]

        assert in_x1.size()[0] == in_x2.size()[0]
        assert in_x1.size()[2] == in_x2.size()[2]
        # The batch size and hidden size must be equal.
        assert in_x1.size()[1] == x1_len.size()[1] and in_x2.size()[1] == x2_len.size()[1]
        # The seq len in in_x and x_len must be equal.
        assert in_x1.size()[0] == x1_len.size()[0] and x1_len.size()[0] == x2_len.size()[0]

        batch_size = in_x1.size()[0]
        x1_max_len = in_x1.size()[1]
        x2_max_len = in_x2.size()[1]

        in_x2_t = torch.transpose(in_x2, 1, 2)  # [batch_size, hidden_size, x2_seq_len]

        attention_matrix = torch.bmm(in_x1, in_x2_t)  # [batch_size, x1_seq_len, x2_seq_len]

        a_mask = x1_len.le(0.5).float() * -my_inf  # [batch_size, x1_seq_len]
        a_mask = a_mask.view(batch_size, x1_max_len, -1)
        a_mask = a_mask.expand(-1, -1, x2_max_len)  # [batch_size, x1_seq_len, x2_seq_len]
        b_mask = x2_len.le(0.5).float() * -my_inf
        b_mask = b_mask.view(batch_size, -1, x2_max_len)
        b_mask = b_mask.expand(-1, x1_max_len, -1)  # [batch_size, x1_seq_len, x2_seq_len]

        attention_a = F.softmax(attention_matrix + a_mask, dim=2)  # [batch_size, x1_seq_len, x2_seq_len]
        attention_b = F.softmax(attention_matrix + b_mask, dim=1)  # [batch_size, x1_seq_len, x2_seq_len]

        out_x1 = torch.bmm(attention_a, in_x2)  # [batch_size, x1_seq_len, hidden_size]
        attention_b_t = torch.transpose(attention_b, 1, 2)
        out_x2 = torch.bmm(attention_b_t, in_x1)  # [batch_size, x2_seq_len, hidden_size]

        return out_x1, out_x2

    @staticmethod
    def mean_pooling(tensor, mask, dim=0):
        masks = mask.view(mask.size(0), mask.size(1), -1).float()
        return torch.sum(tensor * masks, dim=dim) / torch.sum(masks, dim=1)

    @staticmethod
    def max_pooling(tensor, mask, dim=0):
        masks = mask.view(mask.size(0), mask.size(1), -1)
        masks = masks.expand(-1, -1, tensor.size(2)).float()
        return torch.max(tensor + masks.le(0.5).float() * -my_inf, dim=dim)
--- a/fastNLP/modules/decoder/MLP.py
+++ b/fastNLP/modules/decoder/MLP.py
@@ -1,12 +1,15 @@
 import torch
 import torch.nn as nn
 from fastNLP.modules.utils import initial_parameter


 class MLP(nn.Module):
    def __init__(self, size_layer, activation='relu' , initial_method = None):
    def __init__(self, size_layer, activation='relu', initial_method=None):
        """Multilayer Perceptrons as a decoder

        :param size_layer: list of int, define the size of MLP layers
        :param activation: str or function, the activation function for hidden layers
        :param size_layer: list of int, define the size of MLP layers.
        :param activation: str or function, the activation function for hidden layers.
        :param initial_method: str, the name of init method.

        .. note::
            There is no activation function applying on output layer.
@@ -23,7 +26,7 @@ class MLP(nn.Module):

        actives = {
            'relu': nn.ReLU(),
            'tanh': nn.Tanh()
            'tanh': nn.Tanh(),
        }
        if activation in actives:
            self.hidden_active = actives[activation]
@@ -31,7 +34,7 @@ class MLP(nn.Module):
            self.hidden_active = activation
        else:
            raise ValueError("should set activation correctly: {}".format(activation))
        initial_parameter(self, initial_method  )
        initial_parameter(self, initial_method)

    def forward(self, x):
        for layer in self.hiddens:
@@ -40,13 +43,11 @@ class MLP(nn.Module):
        return x



 if __name__ == '__main__':
    net1 = MLP([5,10,5])
    net2 = MLP([5,10,5], 'tanh')
    net1 = MLP([5, 10, 5])
    net2 = MLP([5, 10, 5], 'tanh')
    for net in [net1, net2]:
        x = torch.randn(5, 5)
        y = net(x)
        print(x)
        print(y)
    
--- a/fastNLP/modules/encoder/linear.py
+++ b/fastNLP/modules/encoder/linear.py
@@ -1,6 +1,8 @@
 import torch.nn as nn

 from fastNLP.modules.utils import initial_parameter


 class Linear(nn.Module):
    """
    Linear module
@@ -12,10 +14,11 @@ class Linear(nn.Module):
    bidirectional : If True, becomes a bidirectional RNN
    """

    def __init__(self, input_size, output_size, bias=True,initial_method = None        ):
    def __init__(self, input_size, output_size, bias=True, initial_method=None):
        super(Linear, self).__init__()
        self.linear = nn.Linear(input_size, output_size, bias)
        initial_parameter(self, initial_method)

    def forward(self, x):
        x = self.linear(x)
        return x
--- a/fastNLP/modules/encoder/lstm.py
+++ b/fastNLP/modules/encoder/lstm.py
@@ -14,16 +14,23 @@ class LSTM(nn.Module):
    bidirectional : If True, becomes a bidirectional RNN. Default: False.
    """

    def __init__(self, input_size, hidden_size=100, num_layers=1, dropout=0.0, bidirectional=False,
                 initial_method=None):
    def __init__(self, input_size, hidden_size=100, num_layers=1, dropout=0.0, batch_first=True,
                 bidirectional=False, bias=True, initial_method=None, get_hidden=False):
        super(LSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, bias=True, batch_first=True,
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, bias=bias, batch_first=batch_first,
                            dropout=dropout, bidirectional=bidirectional)
        self.get_hidden = get_hidden
        initial_parameter(self, initial_method)

    def forward(self, x):
        x, _ = self.lstm(x)
        return x
    def forward(self, x, h0=None, c0=None):
        if h0 is not None and c0 is not None:
            x, (ht, ct) = self.lstm(x, (h0, c0))
        else:
            x, (ht, ct) = self.lstm(x)
        if self.get_hidden:
            return x, (ht, ct)
        else:
            return x


 if __name__ == "__main__":
--- a/test/data_for_tests/config
+++ b/test/data_for_tests/config
@@ -45,3 +45,28 @@ use_cuda = true
 learn_rate = 1e-3
 momentum = 0.9
 model_name = "class_model.pkl"

 [snli_trainer]
 epochs = 5
 batch_size = 32
 validate = true
 save_best_dev = true
 use_cuda = true
 learn_rate = 1e-4
 loss = "cross_entropy"
 print_every_step = 1000

 [snli_tester]
 batch_size = 512
 use_cuda = true

 [snli_model]
 model_name = "snli_model.pkl"
 embed_dim = 300
 hidden_size = 300
 batch_first = true
 dropout = 0.5
 gpu = true
 embed_file = "./../data_for_tests/glove.840B.300d.txt"
 embed_pkl = "./snli/embed.pkl"
 examples = 0