|
-
- # imports
- import tensorflow as tf
- import numpy as np
- import matplotlib.pyplot as plt
-
- img_h = img_w = 28 # MNIST images are 28x28
- img_size_flat = img_h * img_w # 28x28=784, the total number of pixels
- n_classes = 10 # Number of classes, one class per digit
-
- def load_data(mode='train'):
- """
- Function to (download and) load the MNIST data
- :param mode: train or test
- :return: images and the corresponding labels
- """
- from tensorflow.examples.tutorials.mnist import input_data
- mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
- if mode == 'train':
- x_train, y_train, x_valid, y_valid = mnist.train.images, mnist.train.labels, \
- mnist.validation.images, mnist.validation.labels
- return x_train, y_train, x_valid, y_valid
- elif mode == 'test':
- x_test, y_test = mnist.test.images, mnist.test.labels
- return x_test, y_test
-
- def randomize(x, y):
- """ Randomizes the order of data samples and their corresponding labels"""
- permutation = np.random.permutation(y.shape[0])
- shuffled_x = x[permutation, :]
- shuffled_y = y[permutation]
- return shuffled_x, shuffled_y
-
- def get_next_batch(x, y, start, end):
- x_batch = x[start:end]
- y_batch = y[start:end]
- return x_batch, y_batch
-
- # Load MNIST data
- x_train, y_train, x_valid, y_valid = load_data(mode='train')
- print("Size of:")
- print("- Training-set:\t\t{}".format(len(y_train)))
- print("- Validation-set:\t{}".format(len(y_valid)))
-
- print('x_train:\t{}'.format(x_train.shape))
- print('y_train:\t{}'.format(y_train.shape))
- print('x_train:\t{}'.format(x_valid.shape))
- print('y_valid:\t{}'.format(y_valid.shape))
-
- print(y_valid[:5, :])
-
- # Hyper-parameters
- epochs = 10 # Total number of training epochs
- batch_size = 100 # Training batch size
- display_freq = 100 # Frequency of displaying the training results
- learning_rate = 0.001 # The optimization initial learning rate
-
- h1 = 200 # number of nodes in the 1st hidden layer
-
- # weight and bais wrappers
- def weight_variable(name, shape):
- """
- Create a weight variable with appropriate initialization
- :param name: weight name
- :param shape: weight shape
- :return: initialized weight variable
- """
- initer = tf.truncated_normal_initializer(stddev=0.01)
- return tf.get_variable('W_' + name,
- dtype=tf.float32,
- shape=shape,
- initializer=initer)
-
-
- def bias_variable(name, shape):
- """
- Create a bias variable with appropriate initialization
- :param name: bias variable name
- :param shape: bias variable shape
- :return: initialized bias variable
- """
- initial = tf.constant(0., shape=shape, dtype=tf.float32)
- return tf.get_variable('b_' + name,
- dtype=tf.float32,
- initializer=initial)
-
- def fc_layer(x, num_units, name, use_relu=True):
- """
- Create a fully-connected layer
- :param x: input from previous layer
- :param num_units: number of hidden units in the fully-connected layer
- :param name: layer name
- :param use_relu: boolean to add ReLU non-linearity (or not)
- :return: The output array
- """
- in_dim = x.get_shape()[1]
- W = weight_variable(name, shape=[in_dim, num_units])
- b = bias_variable(name, [num_units])
- layer = tf.matmul(x, W)
- layer += b
- if use_relu:
- layer = tf.nn.relu(layer)
- return layer
-
- # Create the graph for the linear model
- # Placeholders for inputs (x) and outputs(y)
- x = tf.placeholder(tf.float32, shape=[None, img_size_flat], name='X')
- y = tf.placeholder(tf.float32, shape=[None, n_classes], name='Y')
-
- # Create a fully-connected layer with h1 nodes as hidden layer
- fc1 = fc_layer(x, h1, 'FC1', use_relu=True)
- # Create a fully-connected layer with n_classes nodes as output layer
- output_logits = fc_layer(fc1, n_classes, 'OUT', use_relu=False)
-
- # Define the loss function, optimizer, and accuracy
- logits = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=output_logits)
- loss = tf.reduce_mean(logits, name='loss')
- optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, name='Adam-op').minimize(loss)
- correct_prediction = tf.equal(tf.argmax(output_logits, 1), tf.argmax(y, 1), name='correct_pred')
- accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
-
- # Network predictions
- cls_prediction = tf.argmax(output_logits, axis=1, name='predictions')
-
- # export graph
- #tf.train.export_meta_graph(filename='neural_network.meta', graph=tf.get_default_graph(), clear_extraneous_savers= True, as_text = True)
-
- # Create the op for initializing all variables
- init = tf.global_variables_initializer()
-
- # Create an interactive session (to keep the session in the other cells)
- sess = tf.InteractiveSession()
- # Initialize all variables
- sess.run(init)
- # Number of training iterations in each epoch
- num_tr_iter = int(len(y_train) / batch_size)
- for epoch in range(epochs):
- print('Training epoch: {}'.format(epoch + 1))
- # Randomly shuffle the training data at the beginning of each epoch
- x_train, y_train = randomize(x_train, y_train)
- for iteration in range(num_tr_iter):
- start = iteration * batch_size
- end = (iteration + 1) * batch_size
- x_batch, y_batch = get_next_batch(x_train, y_train, start, end)
-
- # Run optimization op (backprop)
- feed_dict_batch = {x: x_batch, y: y_batch}
- sess.run(optimizer, feed_dict=feed_dict_batch)
-
- if iteration % display_freq == 0:
- # Calculate and display the batch loss and accuracy
- loss_batch, acc_batch = sess.run([loss, accuracy],
- feed_dict=feed_dict_batch)
-
- print("iter {0:3d}:\t Loss={1:.2f},\tTraining Accuracy={2:.01%}".
- format(iteration, loss_batch, acc_batch))
-
- # Run validation after every epoch
- feed_dict_valid = {x: x_valid[:1000], y: y_valid[:1000]}
- loss_valid, acc_valid = sess.run([loss, accuracy], feed_dict=feed_dict_valid)
- print('---------------------------------------------------------')
- print("Epoch: {0}, validation loss: {1:.2f}, validation accuracy: {2:.01%}".
- format(epoch + 1, loss_valid, acc_valid))
- print('---------------------------------------------------------')
|