Ex. 4: Neural Networks Learning

bohare · May 13, 2016 · 383d491 · 383d491
1 parent 0616c4a
commit 383d491
Show file tree

Hide file tree

Showing 12 changed files with 543 additions and 50 deletions.
diff --git a/checkNNGradients.m b/checkNNGradients.m
@@ -0,0 +1,52 @@
+function checkNNGradients(lambda)
+%CHECKNNGRADIENTS Creates a small neural network to check the
+%backpropagation gradients
+%   CHECKNNGRADIENTS(lambda) Creates a small neural network to check the
+%   backpropagation gradients, it will output the analytical gradients
+%   produced by your backprop code and the numerical gradients (computed
+%   using computeNumericalGradient). These two gradient computations should
+%   result in very similar values.
+%
+
+if ~exist('lambda', 'var') || isempty(lambda)
+    lambda = 0;
+end
+
+input_layer_size = 3;
+hidden_layer_size = 5;
+num_labels = 3;
+m = 5;
+
+% We generate some 'random' test data
+Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size);
+Theta2 = debugInitializeWeights(num_labels, hidden_layer_size);
+% Reusing debugInitializeWeights to generate X
+X  = debugInitializeWeights(m, input_layer_size - 1);
+y  = 1 + mod(1:m, num_labels)';
+
+% Unroll parameters
+nn_params = [Theta1(:) ; Theta2(:)];
+
+% Short hand for cost function
+costFunc = @(p) nnCostFunction(p, input_layer_size, hidden_layer_size, ...
+                               num_labels, X, y, lambda);
+
+[cost, grad] = costFunc(nn_params);
+numgrad = computeNumericalGradient(costFunc, nn_params);
+
+% Visually examine the two gradient computations.  The two columns
+% you get should be very similar. 
+disp([numgrad grad]);
+fprintf(['The above two columns you get should be very similar.\n' ...
+         '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']);
+
+% Evaluate the norm of the difference between two solutions.  
+% If you have a correct implementation, and assuming you used EPSILON = 0.0001 
+% in computeNumericalGradient.m, then diff below should be less than 1e-9
+diff = norm(numgrad-grad)/norm(numgrad+grad);
+
+fprintf(['If your backpropagation implementation is correct, then \n' ...
+         'the relative difference will be small (less than 1e-9). \n' ...
+         '\nRelative Difference: %g\n'], diff);
+
+end
diff --git a/computeNumericalGradient.m b/computeNumericalGradient.m
@@ -0,0 +1,29 @@
+function numgrad = computeNumericalGradient(J, theta)
+%COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
+%and gives us a numerical estimate of the gradient.
+%   numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
+%   gradient of the function J around theta. Calling y = J(theta) should
+%   return the function value at theta.
+
+% Notes: The following code implements numerical gradient checking, and 
+%        returns the numerical gradient.It sets numgrad(i) to (a numerical 
+%        approximation of) the partial derivative of J with respect to the 
+%        i-th input argument, evaluated at theta. (i.e., numgrad(i) should 
+%        be the (approximately) the partial derivative of J with respect 
+%        to theta(i).)
+%                
+
+numgrad = zeros(size(theta));
+perturb = zeros(size(theta));
+e = 1e-4;
+for p = 1:numel(theta)
+    % Set perturbation vector
+    perturb(p) = e;
+    loss1 = J(theta - perturb);
+    loss2 = J(theta + perturb);
+    % Compute Numerical Gradient
+    numgrad(p) = (loss2 - loss1) / (2*e);
+    perturb(p) = 0;
+end
+
+end
diff --git a/debugInitializeWeights.m b/debugInitializeWeights.m
@@ -0,0 +1,22 @@
+function W = debugInitializeWeights(fan_out, fan_in)
+%DEBUGINITIALIZEWEIGHTS Initialize the weights of a layer with fan_in
+%incoming connections and fan_out outgoing connections using a fixed
+%strategy, this will help you later in debugging
+%   W = DEBUGINITIALIZEWEIGHTS(fan_in, fan_out) initializes the weights 
+%   of a layer with fan_in incoming connections and fan_out outgoing 
+%   connections using a fix set of values
+%
+%   Note that W should be set to a matrix of size(1 + fan_in, fan_out) as
+%   the first row of W handles the "bias" terms
+%
+
+% Set W to zeros
+W = zeros(fan_out, 1 + fan_in);
+
+% Initialize W using "sin", this ensures that W is always of the same
+% values and will be useful for debugging
+W = reshape(sin(1:numel(W)), size(W)) / 10;
+
+% =========================================================================
+
+end
diff --git a/ex4.m b/ex4.m
@@ -0,0 +1,234 @@
+%% Machine Learning Online Class - Exercise 4 Neural Network Learning
+
+%  Instructions
+%  ------------
+% 
+%  This file contains code that helps you get started on the
+%  linear exercise. You will need to complete the following functions 
+%  in this exericse:
+%
+%     sigmoidGradient.m
+%     randInitializeWeights.m
+%     nnCostFunction.m
+%
+%  For this exercise, you will not need to change any code in this file,
+%  or any other files other than those mentioned above.
+%
+
+%% Initialization
+clear ; close all; clc
+
+%% Setup the parameters you will use for this exercise
+input_layer_size  = 400;  % 20x20 Input Images of Digits
+hidden_layer_size = 25;   % 25 hidden units
+num_labels = 10;          % 10 labels, from 1 to 10   
+                          % (note that we have mapped "0" to label 10)
+
+%% =========== Part 1: Loading and Visualizing Data =============
+%  We start the exercise by first loading and visualizing the dataset. 
+%  You will be working with a dataset that contains handwritten digits.
+%
+
+% Load Training Data
+fprintf('Loading and Visualizing Data ...\n')
+
+load('ex4data1.mat');
+m = size(X, 1);
+
+% Randomly select 100 data points to display
+sel = randperm(size(X, 1));
+sel = sel(1:100);
+
+displayData(X(sel, :));
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% ================ Part 2: Loading Parameters ================
+% In this part of the exercise, we load some pre-initialized 
+% neural network parameters.
+
+fprintf('\nLoading Saved Neural Network Parameters ...\n')
+
+% Load the weights into variables Theta1 and Theta2
+load('ex4weights.mat');
+
+% Unroll parameters 
+nn_params = [Theta1(:) ; Theta2(:)];
+
+%% ================ Part 3: Compute Cost (Feedforward) ================
+%  To the neural network, you should first start by implementing the
+%  feedforward part of the neural network that returns the cost only. You
+%  should complete the code in nnCostFunction.m to return cost. After
+%  implementing the feedforward to compute the cost, you can verify that
+%  your implementation is correct by verifying that you get the same cost
+%  as us for the fixed debugging parameters.
+%
+%  We suggest implementing the feedforward cost *without* regularization
+%  first so that it will be easier for you to debug. Later, in part 4, you
+%  will get to implement the regularized cost.
+%
+fprintf('\nFeedforward Using Neural Network ...\n')
+
+% Weight regularization parameter (we set this to 0 here).
+lambda = 0;
+
+J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ...
+                   num_labels, X, y, lambda);
+
+fprintf(['Cost at parameters (loaded from ex4weights): %f '...
+         '\n(this value should be about 0.287629)\n'], J);
+
+fprintf('\nProgram paused. Press enter to continue.\n');
+pause;
+
+%% =============== Part 4: Implement Regularization ===============
+%  Once your cost function implementation is correct, you should now
+%  continue to implement the regularization with the cost.
+%
+
+fprintf('\nChecking Cost Function (w/ Regularization) ... \n')
+
+% Weight regularization parameter (we set this to 1 here).
+lambda = 1;
+
+J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ...
+                   num_labels, X, y, lambda);
+
+fprintf(['Cost at parameters (loaded from ex4weights): %f '...
+         '\n(this value should be about 0.383770)\n'], J);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% ================ Part 5: Sigmoid Gradient  ================
+%  Before you start implementing the neural network, you will first
+%  implement the gradient for the sigmoid function. You should complete the
+%  code in the sigmoidGradient.m file.
+%
+
+fprintf('\nEvaluating sigmoid gradient...\n')
+
+g = sigmoidGradient([1 -0.5 0 0.5 1]);
+fprintf('Sigmoid gradient evaluated at [1 -0.5 0 0.5 1]:\n  ');
+fprintf('%f ', g);
+fprintf('\n\n');
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% ================ Part 6: Initializing Pameters ================
+%  In this part of the exercise, you will be starting to implment a two
+%  layer neural network that classifies digits. You will start by
+%  implementing a function to initialize the weights of the neural network
+%  (randInitializeWeights.m)
+
+fprintf('\nInitializing Neural Network Parameters ...\n')
+
+initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size);
+initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels);
+
+% Unroll parameters
+initial_nn_params = [initial_Theta1(:) ; initial_Theta2(:)];
+
+
+%% =============== Part 7: Implement Backpropagation ===============
+%  Once your cost matches up with ours, you should proceed to implement the
+%  backpropagation algorithm for the neural network. You should add to the
+%  code you've written in nnCostFunction.m to return the partial
+%  derivatives of the parameters.
+%
+fprintf('\nChecking Backpropagation... \n');
+
+%  Check gradients by running checkNNGradients
+checkNNGradients;
+
+fprintf('\nProgram paused. Press enter to continue.\n');
+pause;
+
+
+%% =============== Part 8: Implement Regularization ===============
+%  Once your backpropagation implementation is correct, you should now
+%  continue to implement the regularization with the cost and gradient.
+%
+
+fprintf('\nChecking Backpropagation (w/ Regularization) ... \n')
+
+%  Check gradients by running checkNNGradients
+lambda = 3;
+checkNNGradients(lambda);
+
+% Also output the costFunction debugging values
+debug_J  = nnCostFunction(nn_params, input_layer_size, ...
+                          hidden_layer_size, num_labels, X, y, lambda);
+
+fprintf(['\n\nCost at (fixed) debugging parameters (w/ lambda = 10): %f ' ...
+         '\n(this value should be about 0.576051)\n\n'], debug_J);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% =================== Part 8: Training NN ===================
+%  You have now implemented all the code necessary to train a neural 
+%  network. To train your neural network, we will now use "fmincg", which
+%  is a function which works similarly to "fminunc". Recall that these
+%  advanced optimizers are able to train our cost functions efficiently as
+%  long as we provide them with the gradient computations.
+%
+fprintf('\nTraining Neural Network... \n')
+
+%  After you have completed the assignment, change the MaxIter to a larger
+%  value to see how more training helps.
+options = optimset('MaxIter', 50);
+
+%  You should also try different values of lambda
+lambda = 1;
+
+% Create "short hand" for the cost function to be minimized
+costFunction = @(p) nnCostFunction(p, ...
+                                   input_layer_size, ...
+                                   hidden_layer_size, ...
+                                   num_labels, X, y, lambda);
+
+% Now, costFunction is a function that takes in only one argument (the
+% neural network parameters)
+[nn_params, cost] = fmincg(costFunction, initial_nn_params, options);
+
+% Obtain Theta1 and Theta2 back from nn_params
+Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ...
+                 hidden_layer_size, (input_layer_size + 1));
+
+Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ...
+                 num_labels, (hidden_layer_size + 1));
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% ================= Part 9: Visualize Weights =================
+%  You can now "visualize" what the neural network is learning by 
+%  displaying the hidden units to see what features they are capturing in 
+%  the data.
+
+fprintf('\nVisualizing Neural Network... \n')
+
+displayData(Theta1(:, 2:end));
+
+fprintf('\nProgram paused. Press enter to continue.\n');
+pause;
+
+%% ================= Part 10: Implement Predict =================
+%  After training the neural network, we would like to use it to predict
+%  the labels. You will now implement the "predict" function to use the
+%  neural network to predict the labels of the training set. This lets
+%  you compute the training set accuracy.
+
+pred = predict(Theta1, Theta2, X);
+
+fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100);
+
+
diff --git a/ex4data1.mat b/ex4data1.mat
diff --git a/ex4weights.mat b/ex4weights.mat