import numpy as np
import tensorflow as tf


def tf_lstm(x, y_):
    '''
    LSTM model in TensorFlow, for MNIST dataset.

    Parameters:
        x: Variable(tensorflow.python.framework.ops.Tensor), shape (N, dims)
        y_: Variable(tensorflow.python.framework.ops.Tensor), shape (N, num_classes)
    Return:
        loss: Variable(tensorflow.python.framework.ops.Tensor), shape (1,)
        y: Variable(tensorflow.python.framework.ops.Tensor), shape (N, num_classes)
    '''

    print("Building LSTM model in tensorflow...")
    diminput = 28
    dimhidden = 128
    dimoutput = 10
    nsteps = 28

    forget_gate_w = tf.Variable(np.random.normal(
        scale=0.1, size=(diminput, dimhidden)).astype(np.float32))
    forget_gate_u = tf.Variable(np.random.normal(
        scale=0.1, size=(dimhidden, dimhidden)).astype(np.float32))
    forget_gate_b = tf.Variable(np.random.normal(
        scale=0.1, size=(dimhidden,)).astype(np.float32))
    input_gate_w = tf.Variable(np.random.normal(
        scale=0.1, size=(diminput, dimhidden)).astype(np.float32))
    input_gate_u = tf.Variable(np.random.normal(
        scale=0.1, size=(dimhidden, dimhidden)).astype(np.float32))
    input_gate_b = tf.Variable(np.random.normal(
        scale=0.1, size=(dimhidden,)).astype(np.float32))
    output_gate_w = tf.Variable(np.random.normal(
        scale=0.1, size=(diminput, dimhidden)).astype(np.float32))
    output_gate_u = tf.Variable(np.random.normal(
        scale=0.1, size=(dimhidden, dimhidden)).astype(np.float32))
    output_gate_b = tf.Variable(np.random.normal(
        scale=0.1, size=(dimhidden,)).astype(np.float32))
    tanh_w = tf.Variable(np.random.normal(
        scale=0.1, size=(diminput, dimhidden)).astype(np.float32))
    tanh_u = tf.Variable(np.random.normal(
        scale=0.1, size=(dimhidden, dimhidden)).astype(np.float32))
    tanh_b = tf.Variable(np.random.normal(
        scale=0.1, size=(dimhidden,)).astype(np.float32))
    out_weights = tf.Variable(np.random.normal(
        scale=0.1, size=(dimhidden, dimoutput)).astype(np.float32))
    out_bias = tf.Variable(np.random.normal(
        scale=0.1, size=(dimoutput,)).astype(np.float32))
    initial_state = tf.zeros((tf.shape(x)[0], dimhidden), dtype=tf.float32)

    last_c_state = initial_state
    last_h_state = initial_state

    for i in range(nsteps):
        cur_x = tf.slice(x, (0, i * diminput), (-1, diminput))
        # forget gate
        cur_forget = tf.matmul(last_h_state, forget_gate_u) + \
            tf.matmul(cur_x, forget_gate_w) + forget_gate_b
        cur_forget = tf.sigmoid(cur_forget)
        # input gate
        cur_input = tf.matmul(last_h_state, input_gate_u) + \
            tf.matmul(cur_x, input_gate_w) + input_gate_b
        cur_input = tf.sigmoid(cur_input)
        # output gate
        cur_output = tf.matmul(last_h_state, output_gate_u) + \
            tf.matmul(cur_x, output_gate_w) + output_gate_b
        cur_output = tf.sigmoid(cur_output)
        # tanh
        cur_tanh = tf.matmul(last_h_state, tanh_u) + \
            tf.matmul(cur_x, tanh_w) + tanh_b
        cur_tanh = tf.tanh(cur_tanh)

        last_c_state = last_c_state * cur_forget + cur_input * cur_tanh
        last_h_state = tf.tanh(last_c_state) * cur_output

    y = tf.matmul(last_h_state, out_weights) + out_bias
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_)
    loss = tf.reduce_mean(loss)
    return loss, y