hummingbird
/
fastNLP

 
			
							import numpy as np


class BaseModel(object):
    """PyTorch base model for all models"""

    def __init__(self):
        pass

    def prepare_input(self, data):
        """
        :param data: str, raw input vector(?)
        :return (X, Y): tuple, input features and labels
        """
        raise NotImplementedError

    def mode(self, test=False):
        raise NotImplementedError

    def data_forward(self, *x):
        # required by PyTorch nn
        raise NotImplementedError

    def grad_backward(self):
        raise NotImplementedError

    def get_loss(self, pred, truth):
        raise NotImplementedError


class ToyModel(BaseModel):
    """This is for code testing."""

    def __init__(self):
        super(ToyModel, self).__init__()
        self.test_mode = False
        self.weight = np.random.rand(5, 1)
        self.bias = np.random.rand()
        self._loss = 0

    def prepare_input(self, data):
        return data[:, :-1], data[:, -1]

    def mode(self, test=False):
        self.test_mode = test

    def data_forward(self, x):
        return np.matmul(x, self.weight) + self.bias

    def grad_backward(self):
        print("loss gradient backward")

    def get_loss(self, pred, truth):
        self._loss = np.mean(np.square(pred - truth))
        return self._loss


class Vocabulary(object):
    """
        A collection of lookup tables.
    """

    def __init__(self):
        self.word_set = None
        self.word2idx = None
        self.emb_matrix = None

    def lookup(self, word):
        if word in self.word_set:
            return self.emb_matrix[self.word2idx[word]]
        return LookupError("The key " + word + " does not exist.")


class Document(object):
    """
        contains a sequence of tokens
        each token is a character with linguistic attributes
    """

    def __init__(self):
        # wrap pandas.dataframe
        self.dataframe = None