# imports import tensorflow as tf import numpy as np import matplotlib.pyplot as plt img_h = img_w = 28 # MNIST images are 28x28 img_size_flat = img_h * img_w # 28x28=784, the total number of pixels n_classes = 10 # Number of classes, one class per digit def load_data(mode='train'): """ Function to (download and) load the MNIST data :param mode: train or test :return: images and the corresponding labels """ from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) if mode == 'train': x_train, y_train, x_valid, y_valid = mnist.train.images, mnist.train.labels, \ mnist.validation.images, mnist.validation.labels return x_train, y_train, x_valid, y_valid elif mode == 'test': x_test, y_test = mnist.test.images, mnist.test.labels return x_test, y_test def randomize(x, y): """ Randomizes the order of data samples and their corresponding labels""" permutation = np.random.permutation(y.shape[0]) shuffled_x = x[permutation, :] shuffled_y = y[permutation] return shuffled_x, shuffled_y def get_next_batch(x, y, start, end): x_batch = x[start:end] y_batch = y[start:end] return x_batch, y_batch # Load MNIST data x_train, y_train, x_valid, y_valid = load_data(mode='train') print("Size of:") print("- Training-set:\t\t{}".format(len(y_train))) print("- Validation-set:\t{}".format(len(y_valid))) print('x_train:\t{}'.format(x_train.shape)) print('y_train:\t{}'.format(y_train.shape)) print('x_train:\t{}'.format(x_valid.shape)) print('y_valid:\t{}'.format(y_valid.shape)) print(y_valid[:5, :]) # Hyper-parameters epochs = 10 # Total number of training epochs batch_size = 100 # Training batch size display_freq = 100 # Frequency of displaying the training results learning_rate = 0.001 # The optimization initial learning rate h1 = 200 # number of nodes in the 1st hidden layer # weight and bais wrappers def weight_variable(name, shape): """ Create a weight variable with appropriate initialization :param name: weight name :param shape: weight shape :return: initialized weight variable """ initer = tf.truncated_normal_initializer(stddev=0.01) return tf.get_variable('W_' + name, dtype=tf.float32, shape=shape, initializer=initer) def bias_variable(name, shape): """ Create a bias variable with appropriate initialization :param name: bias variable name :param shape: bias variable shape :return: initialized bias variable """ initial = tf.constant(0., shape=shape, dtype=tf.float32) return tf.get_variable('b_' + name, dtype=tf.float32, initializer=initial) def fc_layer(x, num_units, name, use_relu=True): """ Create a fully-connected layer :param x: input from previous layer :param num_units: number of hidden units in the fully-connected layer :param name: layer name :param use_relu: boolean to add ReLU non-linearity (or not) :return: The output array """ in_dim = x.get_shape()[1] W = weight_variable(name, shape=[in_dim, num_units]) b = bias_variable(name, [num_units]) layer = tf.matmul(x, W) layer += b if use_relu: layer = tf.nn.relu(layer) return layer # Create the graph for the linear model # Placeholders for inputs (x) and outputs(y) x = tf.placeholder(tf.float32, shape=[None, img_size_flat], name='X') y = tf.placeholder(tf.float32, shape=[None, n_classes], name='Y') # Create a fully-connected layer with h1 nodes as hidden layer fc1 = fc_layer(x, h1, 'FC1', use_relu=True) # Create a fully-connected layer with n_classes nodes as output layer output_logits = fc_layer(fc1, n_classes, 'OUT', use_relu=False) # Define the loss function, optimizer, and accuracy logits = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=output_logits) loss = tf.reduce_mean(logits, name='loss') optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, name='Adam-op').minimize(loss) correct_prediction = tf.equal(tf.argmax(output_logits, 1), tf.argmax(y, 1), name='correct_pred') accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy') # Network predictions cls_prediction = tf.argmax(output_logits, axis=1, name='predictions') # export graph #tf.train.export_meta_graph(filename='neural_network.meta', graph=tf.get_default_graph(), clear_extraneous_savers= True, as_text = True) # Create the op for initializing all variables init = tf.global_variables_initializer() # Create an interactive session (to keep the session in the other cells) sess = tf.InteractiveSession() # Initialize all variables sess.run(init) # Number of training iterations in each epoch num_tr_iter = int(len(y_train) / batch_size) for epoch in range(epochs): print('Training epoch: {}'.format(epoch + 1)) # Randomly shuffle the training data at the beginning of each epoch x_train, y_train = randomize(x_train, y_train) for iteration in range(num_tr_iter): start = iteration * batch_size end = (iteration + 1) * batch_size x_batch, y_batch = get_next_batch(x_train, y_train, start, end) # Run optimization op (backprop) feed_dict_batch = {x: x_batch, y: y_batch} sess.run(optimizer, feed_dict=feed_dict_batch) if iteration % display_freq == 0: # Calculate and display the batch loss and accuracy loss_batch, acc_batch = sess.run([loss, accuracy], feed_dict=feed_dict_batch) print("iter {0:3d}:\t Loss={1:.2f},\tTraining Accuracy={2:.01%}". format(iteration, loss_batch, acc_batch)) # Run validation after every epoch feed_dict_valid = {x: x_valid[:1000], y: y_valid[:1000]} loss_valid, acc_valid = sess.run([loss, accuracy], feed_dict=feed_dict_valid) print('---------------------------------------------------------') print("Epoch: {0}, validation loss: {1:.2f}, validation accuracy: {2:.01%}". format(epoch + 1, loss_valid, acc_valid)) print('---------------------------------------------------------')