-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0616c4a
commit 383d491
Showing
12 changed files
with
543 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
function checkNNGradients(lambda) | ||
%CHECKNNGRADIENTS Creates a small neural network to check the | ||
%backpropagation gradients | ||
% CHECKNNGRADIENTS(lambda) Creates a small neural network to check the | ||
% backpropagation gradients, it will output the analytical gradients | ||
% produced by your backprop code and the numerical gradients (computed | ||
% using computeNumericalGradient). These two gradient computations should | ||
% result in very similar values. | ||
% | ||
|
||
if ~exist('lambda', 'var') || isempty(lambda) | ||
lambda = 0; | ||
end | ||
|
||
input_layer_size = 3; | ||
hidden_layer_size = 5; | ||
num_labels = 3; | ||
m = 5; | ||
|
||
% We generate some 'random' test data | ||
Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size); | ||
Theta2 = debugInitializeWeights(num_labels, hidden_layer_size); | ||
% Reusing debugInitializeWeights to generate X | ||
X = debugInitializeWeights(m, input_layer_size - 1); | ||
y = 1 + mod(1:m, num_labels)'; | ||
|
||
% Unroll parameters | ||
nn_params = [Theta1(:) ; Theta2(:)]; | ||
|
||
% Short hand for cost function | ||
costFunc = @(p) nnCostFunction(p, input_layer_size, hidden_layer_size, ... | ||
num_labels, X, y, lambda); | ||
|
||
[cost, grad] = costFunc(nn_params); | ||
numgrad = computeNumericalGradient(costFunc, nn_params); | ||
|
||
% Visually examine the two gradient computations. The two columns | ||
% you get should be very similar. | ||
disp([numgrad grad]); | ||
fprintf(['The above two columns you get should be very similar.\n' ... | ||
'(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']); | ||
|
||
% Evaluate the norm of the difference between two solutions. | ||
% If you have a correct implementation, and assuming you used EPSILON = 0.0001 | ||
% in computeNumericalGradient.m, then diff below should be less than 1e-9 | ||
diff = norm(numgrad-grad)/norm(numgrad+grad); | ||
|
||
fprintf(['If your backpropagation implementation is correct, then \n' ... | ||
'the relative difference will be small (less than 1e-9). \n' ... | ||
'\nRelative Difference: %g\n'], diff); | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
function numgrad = computeNumericalGradient(J, theta) | ||
%COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences" | ||
%and gives us a numerical estimate of the gradient. | ||
% numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical | ||
% gradient of the function J around theta. Calling y = J(theta) should | ||
% return the function value at theta. | ||
|
||
% Notes: The following code implements numerical gradient checking, and | ||
% returns the numerical gradient.It sets numgrad(i) to (a numerical | ||
% approximation of) the partial derivative of J with respect to the | ||
% i-th input argument, evaluated at theta. (i.e., numgrad(i) should | ||
% be the (approximately) the partial derivative of J with respect | ||
% to theta(i).) | ||
% | ||
|
||
numgrad = zeros(size(theta)); | ||
perturb = zeros(size(theta)); | ||
e = 1e-4; | ||
for p = 1:numel(theta) | ||
% Set perturbation vector | ||
perturb(p) = e; | ||
loss1 = J(theta - perturb); | ||
loss2 = J(theta + perturb); | ||
% Compute Numerical Gradient | ||
numgrad(p) = (loss2 - loss1) / (2*e); | ||
perturb(p) = 0; | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
function W = debugInitializeWeights(fan_out, fan_in) | ||
%DEBUGINITIALIZEWEIGHTS Initialize the weights of a layer with fan_in | ||
%incoming connections and fan_out outgoing connections using a fixed | ||
%strategy, this will help you later in debugging | ||
% W = DEBUGINITIALIZEWEIGHTS(fan_in, fan_out) initializes the weights | ||
% of a layer with fan_in incoming connections and fan_out outgoing | ||
% connections using a fix set of values | ||
% | ||
% Note that W should be set to a matrix of size(1 + fan_in, fan_out) as | ||
% the first row of W handles the "bias" terms | ||
% | ||
|
||
% Set W to zeros | ||
W = zeros(fan_out, 1 + fan_in); | ||
|
||
% Initialize W using "sin", this ensures that W is always of the same | ||
% values and will be useful for debugging | ||
W = reshape(sin(1:numel(W)), size(W)) / 10; | ||
|
||
% ========================================================================= | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,234 @@ | ||
%% Machine Learning Online Class - Exercise 4 Neural Network Learning | ||
|
||
% Instructions | ||
% ------------ | ||
% | ||
% This file contains code that helps you get started on the | ||
% linear exercise. You will need to complete the following functions | ||
% in this exericse: | ||
% | ||
% sigmoidGradient.m | ||
% randInitializeWeights.m | ||
% nnCostFunction.m | ||
% | ||
% For this exercise, you will not need to change any code in this file, | ||
% or any other files other than those mentioned above. | ||
% | ||
|
||
%% Initialization | ||
clear ; close all; clc | ||
|
||
%% Setup the parameters you will use for this exercise | ||
input_layer_size = 400; % 20x20 Input Images of Digits | ||
hidden_layer_size = 25; % 25 hidden units | ||
num_labels = 10; % 10 labels, from 1 to 10 | ||
% (note that we have mapped "0" to label 10) | ||
|
||
%% =========== Part 1: Loading and Visualizing Data ============= | ||
% We start the exercise by first loading and visualizing the dataset. | ||
% You will be working with a dataset that contains handwritten digits. | ||
% | ||
|
||
% Load Training Data | ||
fprintf('Loading and Visualizing Data ...\n') | ||
|
||
load('ex4data1.mat'); | ||
m = size(X, 1); | ||
|
||
% Randomly select 100 data points to display | ||
sel = randperm(size(X, 1)); | ||
sel = sel(1:100); | ||
|
||
displayData(X(sel, :)); | ||
|
||
fprintf('Program paused. Press enter to continue.\n'); | ||
pause; | ||
|
||
|
||
%% ================ Part 2: Loading Parameters ================ | ||
% In this part of the exercise, we load some pre-initialized | ||
% neural network parameters. | ||
|
||
fprintf('\nLoading Saved Neural Network Parameters ...\n') | ||
|
||
% Load the weights into variables Theta1 and Theta2 | ||
load('ex4weights.mat'); | ||
|
||
% Unroll parameters | ||
nn_params = [Theta1(:) ; Theta2(:)]; | ||
|
||
%% ================ Part 3: Compute Cost (Feedforward) ================ | ||
% To the neural network, you should first start by implementing the | ||
% feedforward part of the neural network that returns the cost only. You | ||
% should complete the code in nnCostFunction.m to return cost. After | ||
% implementing the feedforward to compute the cost, you can verify that | ||
% your implementation is correct by verifying that you get the same cost | ||
% as us for the fixed debugging parameters. | ||
% | ||
% We suggest implementing the feedforward cost *without* regularization | ||
% first so that it will be easier for you to debug. Later, in part 4, you | ||
% will get to implement the regularized cost. | ||
% | ||
fprintf('\nFeedforward Using Neural Network ...\n') | ||
|
||
% Weight regularization parameter (we set this to 0 here). | ||
lambda = 0; | ||
|
||
J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ... | ||
num_labels, X, y, lambda); | ||
|
||
fprintf(['Cost at parameters (loaded from ex4weights): %f '... | ||
'\n(this value should be about 0.287629)\n'], J); | ||
|
||
fprintf('\nProgram paused. Press enter to continue.\n'); | ||
pause; | ||
|
||
%% =============== Part 4: Implement Regularization =============== | ||
% Once your cost function implementation is correct, you should now | ||
% continue to implement the regularization with the cost. | ||
% | ||
|
||
fprintf('\nChecking Cost Function (w/ Regularization) ... \n') | ||
|
||
% Weight regularization parameter (we set this to 1 here). | ||
lambda = 1; | ||
|
||
J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ... | ||
num_labels, X, y, lambda); | ||
|
||
fprintf(['Cost at parameters (loaded from ex4weights): %f '... | ||
'\n(this value should be about 0.383770)\n'], J); | ||
|
||
fprintf('Program paused. Press enter to continue.\n'); | ||
pause; | ||
|
||
|
||
%% ================ Part 5: Sigmoid Gradient ================ | ||
% Before you start implementing the neural network, you will first | ||
% implement the gradient for the sigmoid function. You should complete the | ||
% code in the sigmoidGradient.m file. | ||
% | ||
|
||
fprintf('\nEvaluating sigmoid gradient...\n') | ||
|
||
g = sigmoidGradient([1 -0.5 0 0.5 1]); | ||
fprintf('Sigmoid gradient evaluated at [1 -0.5 0 0.5 1]:\n '); | ||
fprintf('%f ', g); | ||
fprintf('\n\n'); | ||
|
||
fprintf('Program paused. Press enter to continue.\n'); | ||
pause; | ||
|
||
|
||
%% ================ Part 6: Initializing Pameters ================ | ||
% In this part of the exercise, you will be starting to implment a two | ||
% layer neural network that classifies digits. You will start by | ||
% implementing a function to initialize the weights of the neural network | ||
% (randInitializeWeights.m) | ||
|
||
fprintf('\nInitializing Neural Network Parameters ...\n') | ||
|
||
initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size); | ||
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels); | ||
|
||
% Unroll parameters | ||
initial_nn_params = [initial_Theta1(:) ; initial_Theta2(:)]; | ||
|
||
|
||
%% =============== Part 7: Implement Backpropagation =============== | ||
% Once your cost matches up with ours, you should proceed to implement the | ||
% backpropagation algorithm for the neural network. You should add to the | ||
% code you've written in nnCostFunction.m to return the partial | ||
% derivatives of the parameters. | ||
% | ||
fprintf('\nChecking Backpropagation... \n'); | ||
|
||
% Check gradients by running checkNNGradients | ||
checkNNGradients; | ||
|
||
fprintf('\nProgram paused. Press enter to continue.\n'); | ||
pause; | ||
|
||
|
||
%% =============== Part 8: Implement Regularization =============== | ||
% Once your backpropagation implementation is correct, you should now | ||
% continue to implement the regularization with the cost and gradient. | ||
% | ||
|
||
fprintf('\nChecking Backpropagation (w/ Regularization) ... \n') | ||
|
||
% Check gradients by running checkNNGradients | ||
lambda = 3; | ||
checkNNGradients(lambda); | ||
|
||
% Also output the costFunction debugging values | ||
debug_J = nnCostFunction(nn_params, input_layer_size, ... | ||
hidden_layer_size, num_labels, X, y, lambda); | ||
|
||
fprintf(['\n\nCost at (fixed) debugging parameters (w/ lambda = 10): %f ' ... | ||
'\n(this value should be about 0.576051)\n\n'], debug_J); | ||
|
||
fprintf('Program paused. Press enter to continue.\n'); | ||
pause; | ||
|
||
|
||
%% =================== Part 8: Training NN =================== | ||
% You have now implemented all the code necessary to train a neural | ||
% network. To train your neural network, we will now use "fmincg", which | ||
% is a function which works similarly to "fminunc". Recall that these | ||
% advanced optimizers are able to train our cost functions efficiently as | ||
% long as we provide them with the gradient computations. | ||
% | ||
fprintf('\nTraining Neural Network... \n') | ||
|
||
% After you have completed the assignment, change the MaxIter to a larger | ||
% value to see how more training helps. | ||
options = optimset('MaxIter', 50); | ||
|
||
% You should also try different values of lambda | ||
lambda = 1; | ||
|
||
% Create "short hand" for the cost function to be minimized | ||
costFunction = @(p) nnCostFunction(p, ... | ||
input_layer_size, ... | ||
hidden_layer_size, ... | ||
num_labels, X, y, lambda); | ||
|
||
% Now, costFunction is a function that takes in only one argument (the | ||
% neural network parameters) | ||
[nn_params, cost] = fmincg(costFunction, initial_nn_params, options); | ||
|
||
% Obtain Theta1 and Theta2 back from nn_params | ||
Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ... | ||
hidden_layer_size, (input_layer_size + 1)); | ||
|
||
Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ... | ||
num_labels, (hidden_layer_size + 1)); | ||
|
||
fprintf('Program paused. Press enter to continue.\n'); | ||
pause; | ||
|
||
|
||
%% ================= Part 9: Visualize Weights ================= | ||
% You can now "visualize" what the neural network is learning by | ||
% displaying the hidden units to see what features they are capturing in | ||
% the data. | ||
|
||
fprintf('\nVisualizing Neural Network... \n') | ||
|
||
displayData(Theta1(:, 2:end)); | ||
|
||
fprintf('\nProgram paused. Press enter to continue.\n'); | ||
pause; | ||
|
||
%% ================= Part 10: Implement Predict ================= | ||
% After training the neural network, we would like to use it to predict | ||
% the labels. You will now implement the "predict" function to use the | ||
% neural network to predict the labels of the training set. This lets | ||
% you compute the training set accuracy. | ||
|
||
pred = predict(Theta1, Theta2, X); | ||
|
||
fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100); | ||
|
||
|
Binary file not shown.
Binary file not shown.
Oops, something went wrong.