import numpy as np import tensorflow as tf def tf_lstm(x, y_): ''' LSTM model in TensorFlow, for MNIST dataset. Parameters: x: Variable(tensorflow.python.framework.ops.Tensor), shape (N, dims) y_: Variable(tensorflow.python.framework.ops.Tensor), shape (N, num_classes) Return: loss: Variable(tensorflow.python.framework.ops.Tensor), shape (1,) y: Variable(tensorflow.python.framework.ops.Tensor), shape (N, num_classes) ''' print("Building LSTM model in tensorflow...") diminput = 28 dimhidden = 128 dimoutput = 10 nsteps = 28 forget_gate_w = tf.Variable(np.random.normal( scale=0.1, size=(diminput, dimhidden)).astype(np.float32)) forget_gate_u = tf.Variable(np.random.normal( scale=0.1, size=(dimhidden, dimhidden)).astype(np.float32)) forget_gate_b = tf.Variable(np.random.normal( scale=0.1, size=(dimhidden,)).astype(np.float32)) input_gate_w = tf.Variable(np.random.normal( scale=0.1, size=(diminput, dimhidden)).astype(np.float32)) input_gate_u = tf.Variable(np.random.normal( scale=0.1, size=(dimhidden, dimhidden)).astype(np.float32)) input_gate_b = tf.Variable(np.random.normal( scale=0.1, size=(dimhidden,)).astype(np.float32)) output_gate_w = tf.Variable(np.random.normal( scale=0.1, size=(diminput, dimhidden)).astype(np.float32)) output_gate_u = tf.Variable(np.random.normal( scale=0.1, size=(dimhidden, dimhidden)).astype(np.float32)) output_gate_b = tf.Variable(np.random.normal( scale=0.1, size=(dimhidden,)).astype(np.float32)) tanh_w = tf.Variable(np.random.normal( scale=0.1, size=(diminput, dimhidden)).astype(np.float32)) tanh_u = tf.Variable(np.random.normal( scale=0.1, size=(dimhidden, dimhidden)).astype(np.float32)) tanh_b = tf.Variable(np.random.normal( scale=0.1, size=(dimhidden,)).astype(np.float32)) out_weights = tf.Variable(np.random.normal( scale=0.1, size=(dimhidden, dimoutput)).astype(np.float32)) out_bias = tf.Variable(np.random.normal( scale=0.1, size=(dimoutput,)).astype(np.float32)) initial_state = tf.zeros((tf.shape(x)[0], dimhidden), dtype=tf.float32) last_c_state = initial_state last_h_state = initial_state for i in range(nsteps): cur_x = tf.slice(x, (0, i * diminput), (-1, diminput)) # forget gate cur_forget = tf.matmul(last_h_state, forget_gate_u) + \ tf.matmul(cur_x, forget_gate_w) + forget_gate_b cur_forget = tf.sigmoid(cur_forget) # input gate cur_input = tf.matmul(last_h_state, input_gate_u) + \ tf.matmul(cur_x, input_gate_w) + input_gate_b cur_input = tf.sigmoid(cur_input) # output gate cur_output = tf.matmul(last_h_state, output_gate_u) + \ tf.matmul(cur_x, output_gate_w) + output_gate_b cur_output = tf.sigmoid(cur_output) # tanh cur_tanh = tf.matmul(last_h_state, tanh_u) + \ tf.matmul(cur_x, tanh_w) + tanh_b cur_tanh = tf.tanh(cur_tanh) last_c_state = last_c_state * cur_forget + cur_input * cur_tanh last_h_state = tf.tanh(last_c_state) * cur_output y = tf.matmul(last_h_state, out_weights) + out_bias loss = tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_) loss = tf.reduce_mean(loss) return loss, y