hummingbird
/
fastNLP

 
			
							from collections import defaultdict

import numpy as np
import torch
import torch.nn.init as init
import torch.nn as nn
def mask_softmax(matrix, mask):
    if mask is None:
        result = torch.nn.functional.softmax(matrix, dim=-1)
    else:
        raise NotImplementedError
    return result

def initial_parameter(net ,initial_method =None):

    if initial_method == 'xavier_uniform':
        init_method = init.xavier_uniform_
    elif initial_method=='xavier_normal':
        init_method = init.xavier_normal_
    elif initial_method == 'kaiming_normal' or initial_method =='msra':
        init_method = init.kaiming_normal
    elif initial_method == 'kaiming_uniform':
        init_method = init.kaiming_normal
    elif initial_method == 'orthogonal':
        init_method = init.orthogonal_
    elif initial_method == 'sparse':
        init_method = init.sparse_
    elif initial_method =='normal':
        init_method = init.normal_
    elif initial_method =='uniform':
        initial_method = init.uniform_
    else:
        init_method = init.xavier_normal_
    def weights_init(m):
        # classname = m.__class__.__name__
        if isinstance(m, nn.Conv2d) or isinstance(m,nn.Conv1d) or isinstance(m,nn.Conv3d):  # for all the cnn
            if initial_method != None:
                init_method(m.weight.data)
            else:
                init.xavier_normal_(m.weight.data)
            init.normal_(m.bias.data)
        elif isinstance(m, nn.LSTM):
            for w in m.parameters():
                if len(w.data.size())>1:
                    init_method(w.data)  # weight
                else:
                    init.normal_(w.data)  # bias
        elif hasattr(m, 'weight') and m.weight.requires_grad:
            init_method(m.weight.data)
        else:
            for w in m.parameters() :
                if  w.requires_grad:
                    if len(w.data.size())>1:
                        init_method(w.data)  # weight
                    else:
                        init.normal_(w.data)  # bias
                # print("init else")
    net.apply(weights_init)

def seq_mask(seq_len, max_len):
    mask = [torch.ge(torch.LongTensor(seq_len), i + 1) for i in range(max_len)]
    mask = torch.stack(mask, 1)
    return mask


"""
    Codes from FudanParser. Not tested. Do not use !!!
"""


def expand_gt(gt):
    """expand_gt: Expand ground truth to matrix
    Arguments:
        gt: tensor of (n, l)
    Return:
        f: ground truth matrix of (n, l), $gt[i][j] = k$ leads to $f[i][j][k] = 1$.
    """
    n, l = gt.shape
    ret = torch.zeros(n, l, l).long()
    for i in range(n):
        ret[i][torch.arange(l).long(), gt[i]] = 1
    return ret


def greedy_decoding(arc_f):
    """greedy_decoding
    Arguments:
        arc_f: a tensor in shape of (n, l+1, l+1)
               length of the sentence is l and index 0 is <root>
    Output:
        arc_pred: a tensor in shape of (n, l), indicating the head words
    """

    f_arc = arc_f[:, 1:, :]  # ignore the root
    _, arc_pred = torch.max(f_arc.data, dim=-1, keepdim=False)
    return arc_pred


def mst_decoding(arc_f):
    batch_size = arc_f.shape[0]
    length = arc_f.shape[1]
    arc_score = arc_f.data.cpu()
    pred_collection = []
    for i in range(batch_size):
        head = mst(arc_score[i].numpy())
        pred_collection.append(head[1:].reshape((1, length - 1)))
    arc_pred = torch.LongTensor(np.concatenate(pred_collection, axis=0)).type_as(arc_f).long()
    return arc_pred


def outer_product(features):
    """InterProduct: Get inter sequence product of features
    Arguments:
        features: feature vectors of sequence in the shape of (n, l, h)
    Return:
        f: product result in (n, l, l, h) shape
    """
    n, l, c = features.shape
    features = features.contiguous()
    x = features.view(n, l, 1, c)
    x = x.expand(n, l, l, c)
    y = features.view(n, 1, l, c).contiguous()
    y = y.expand(n, l, l, c)
    return x * y


def outer_concat(features):
    """InterProduct: Get inter sequence concatenation of features
    Arguments:
        features: feature vectors of sequence in the shape of (n, l, h)
    Return:
        f: product result in (n, l, l, h) shape
    """
    n, l, c = features.shape
    x = features.contiguous().view(n, l, 1, c)
    x = x.expand(n, l, l, c)
    y = features.view(n, 1, l, c)
    y = y.expand(n, l, l, c)
    return torch.cat((x, y), dim=3)


def mst(scores):
    """
    https://github.com/tdozat/Parser/blob/0739216129cd39d69997d28cbc4133b360ea3934/lib/models/nn.py#L692  # NOQA
    """
    length = scores.shape[0]
    min_score = scores.min() - 1
    eye = np.eye(length)
    scores = scores * (1 - eye) + min_score * eye
    heads = np.argmax(scores, axis=1)
    heads[0] = 0
    tokens = np.arange(1, length)
    roots = np.where(heads[tokens] == 0)[0] + 1
    if len(roots) < 1:
        root_scores = scores[tokens, 0]
        head_scores = scores[tokens, heads[tokens]]
        new_root = tokens[np.argmax(root_scores / head_scores)]
        heads[new_root] = 0
    elif len(roots) > 1:
        root_scores = scores[roots, 0]
        scores[roots, 0] = 0
        new_heads = np.argmax(scores[roots][:, tokens], axis=1) + 1
        new_root = roots[np.argmin(
            scores[roots, new_heads] / root_scores)]
        heads[roots] = new_heads
        heads[new_root] = 0

    edges = defaultdict(set)
    vertices = set((0,))
    for dep, head in enumerate(heads[tokens]):
        vertices.add(dep + 1)
        edges[head].add(dep + 1)
    for cycle in _find_cycle(vertices, edges):
        dependents = set()
        to_visit = set(cycle)
        while len(to_visit) > 0:
            node = to_visit.pop()
            if node not in dependents:
                dependents.add(node)
                to_visit.update(edges[node])
        cycle = np.array(list(cycle))
        old_heads = heads[cycle]
        old_scores = scores[cycle, old_heads]
        non_heads = np.array(list(dependents))
        scores[np.repeat(cycle, len(non_heads)),
               np.repeat([non_heads], len(cycle), axis=0).flatten()] = min_score
        new_heads = np.argmax(scores[cycle][:, tokens], axis=1) + 1
        new_scores = scores[cycle, new_heads] / old_scores
        change = np.argmax(new_scores)
        changed_cycle = cycle[change]
        old_head = old_heads[change]
        new_head = new_heads[change]
        heads[changed_cycle] = new_head
        edges[new_head].add(changed_cycle)
        edges[old_head].remove(changed_cycle)

    return heads


def _find_cycle(vertices, edges):
    """
    https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm  # NOQA
    https://github.com/tdozat/Parser/blob/0739216129cd39d69997d28cbc4133b360ea3934/lib/etc/tarjan.py  # NOQA
    """
    _index = 0
    _stack = []
    _indices = {}
    _lowlinks = {}
    _onstack = defaultdict(lambda: False)
    _SCCs = []

    def _strongconnect(v):
        nonlocal _index
        _indices[v] = _index
        _lowlinks[v] = _index
        _index += 1
        _stack.append(v)
        _onstack[v] = True

        for w in edges[v]:
            if w not in _indices:
                _strongconnect(w)
                _lowlinks[v] = min(_lowlinks[v], _lowlinks[w])
            elif _onstack[w]:
                _lowlinks[v] = min(_lowlinks[v], _indices[w])

        if _lowlinks[v] == _indices[v]:
            SCC = set()
            while True:
                w = _stack.pop()
                _onstack[w] = False
                SCC.add(w)
                if not (w != v):
                    break
            _SCCs.append(SCC)

    for v in vertices:
        if v not in _indices:
            _strongconnect(v)

    return [SCC for SCC in _SCCs if len(SCC) > 1]


# https://github.com/alykhantejani/nninit/blob/master/nninit.py
def orthogonal(tensor, gain=1):
    """Fills the input Tensor or Variable with a (semi) orthogonal matrix. The input tensor must have at least 2 dimensions,
       and for tensors with more than 2 dimensions the trailing dimensions are flattened. viewed as 2D representation with
       rows equal to the first dimension and columns equal to the product of  as a sparse matrix, where the non-zero elements
       will be drawn from a normal distribution with mean=0 and std=`std`.
       Reference: "Exact solutions to the nonlinear dynamics of learning in deep linear neural networks" - Saxe, A. et al.
    Args:
        tensor: a n-dimension torch.Tensor, where n >= 2
        gain: optional gain to be applied
    Examples:
        >>> w = torch.Tensor(3, 5)
        >>> nninit.orthogonal(w)
    """
    if tensor.ndimension() < 2:
        raise ValueError("Only tensors with 2 or more dimensions are supported.")

    flattened_shape = (tensor.size(0), int(np.prod(tensor.detach().numpy().shape[1:])))
    flattened = torch.Tensor(flattened_shape[0], flattened_shape[1]).normal_(0, 1)

    u, s, v = np.linalg.svd(flattened.numpy(), full_matrices=False)
    if u.shape == flattened.detach().numpy().shape:
        tensor.view_as(flattened).copy_(torch.from_numpy(u))
    else:
        tensor.view_as(flattened).copy_(torch.from_numpy(v))

    tensor.mul_(gain)
    with torch.no_grad():
        return tensor


def generate_step_dropout(masks, hidden_dim, step_dropout, training=False):
    # assume batch first
    # import pdb
    # pdb.set_trace()

    batch, length = masks.size()
    if not training:
        return torch.ones(batch, length, hidden_dim).fill_(1 - step_dropout).cuda(masks.device) * masks.view(batch,
                                                                                                             length, 1)
    masked = torch.zeros(batch, 1, hidden_dim).fill_(step_dropout)
    masked = torch.bernoulli(masked).repeat(1, length, 1)
    masked = masked.cuda(masks.device) * masks.view(batch, length, 1)
    return masked