forked from FreshRicardo/MLP-implementation-with-numpy
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5202518
commit 67f13d3
Showing
6 changed files
with
377 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from NN import dense | ||
from NN import model | ||
|
||
Dense = dense.Dense | ||
Model = model.Model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
import numpy as np | ||
|
||
class Dense: | ||
def __init__(self, input_size, output_size, activation='sigmoid', kernel_initializer='MSRA', | ||
bias_initializer='zeros'): | ||
""" | ||
Initialize a dense layer. | ||
Parameters | ||
---------- | ||
input_size : int | ||
The input size of the layer | ||
output_size : int | ||
The output size of the layer | ||
activation : string | ||
The activation function for neurons | ||
kernel_initializer: string | ||
Initializer for the kernel weights matrix. | ||
bias_initializer: string | ||
Initializer for the bias vector. | ||
""" | ||
self.activation = activation | ||
self.kernel_initializer = kernel_initializer | ||
self.bias_initializer = bias_initializer | ||
self.initialize(input_size, output_size) | ||
|
||
def initialize(self, input_size, output_size): | ||
""" | ||
Initialize the kernel weights matrix and the bias vector. | ||
Xavier and MSRA initializaitons are provided. | ||
""" | ||
if self.kernel_initializer == 'Xavier': | ||
var = 2. / (input_size + output_size) | ||
bound = np.sqrt(3.0 * var) | ||
self.w = np.random.uniform(-bound, bound, size=(input_size, output_size)) | ||
|
||
if self.kernel_initializer == 'MSRA': | ||
s = np.sqrt(2 / (input_size + output_size)) | ||
self.w = np.random.normal(loc=0.0, scale=s, size=(input_size, output_size)) | ||
|
||
if self.bias_initializer == 'zeros': | ||
self.b = np.zeros(output_size) | ||
|
||
def forward(self, x): | ||
""" | ||
Returns a forward result of a layer. | ||
""" | ||
return self.activation_func(np.dot(x, self.w) + self.b) | ||
|
||
def activation_func(self, x): | ||
""" | ||
Returns the activation result. | ||
Sigmoid and Relu are provided. | ||
""" | ||
if self.activation == 'sigmoid': | ||
return 1 / (1 + np.exp(-x)) | ||
if self.activation == 'relu': | ||
return np.maximum(x, 0) | ||
if self.activation == 'None': | ||
return x | ||
|
||
def grad_activation(self, x): | ||
""" | ||
Returns the derivative of the activation function. | ||
""" | ||
if self.activation == 'sigmoid': | ||
a = self.activation_func(x) | ||
return a * (1 - a) | ||
if self.activation == 'relu': | ||
x_ = x.copy() | ||
x_[x_ <= 0] = 0 | ||
x_[x_ > 0] = 1 | ||
return x_ | ||
|
||
def backprop(self, x, sigma, w_next, learning_rate): | ||
""" | ||
Back-propagation process of a layer. | ||
Parameters | ||
---------- | ||
x : np.array | ||
2d array, the input of the layer | ||
sigma : np.array | ||
2d array, the iteration term in the Back-propagation process. | ||
w_next : np.array | ||
2d array, the weights matrix of the next layer. | ||
learning_rate: int | ||
The tuning parameter in an optimization algorithm. | ||
Returns | ||
------- | ||
np.array | ||
the iteration term for next backprop calculation. | ||
""" | ||
wx = np.dot(x, self.w) + self.b | ||
if w_next is not None: | ||
sigma = np.multiply(np.dot(sigma, w_next.T), self.grad_activation(wx)) | ||
|
||
gradient_w = np.dot(x.T, sigma) | ||
gradient_b = sigma.sum(axis=0) | ||
|
||
self.w -= learning_rate * gradient_w / x.shape[0] | ||
self.b -= learning_rate * gradient_b / x.shape[0] | ||
|
||
return sigma |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,190 @@ | ||
import numpy as np | ||
|
||
class Model: | ||
def __init__(self, learning_rate): | ||
""" | ||
Initialize a deep model. | ||
Parameters | ||
---------- | ||
learning_rate: int | ||
The tuning parameter in an optimization algorithm. | ||
""" | ||
self.learning_rate = learning_rate | ||
self.layers = [] | ||
|
||
def add(self, layer): | ||
""" | ||
Add a single layer to the network. | ||
""" | ||
self.layers.append(layer) | ||
|
||
def forward(self, input): | ||
""" | ||
A forward pass of a deep model. | ||
Parameters | ||
---------- | ||
input: np.array | ||
2d array, the input data feeded to the network. | ||
Returns | ||
------- | ||
list | ||
the output of each layer. | ||
""" | ||
xs = [input] | ||
x = input | ||
for l in self.layers: | ||
xs.append(l.forward(x)) | ||
x = xs[-1] | ||
return xs | ||
|
||
def softmax(self, x): | ||
""" | ||
Softmax function | ||
Parameters | ||
---------- | ||
x: np.array | ||
2d array, the input of softmax. | ||
Returns | ||
------- | ||
np.array | ||
the result of softmax calculation. | ||
""" | ||
exps = np.exp(x - np.max(x, axis=1, keepdims=True)) | ||
return exps / np.sum(exps, axis=1, keepdims=True) | ||
|
||
def softmax_cross_entropy_with_logits(self, labels, logits): | ||
""" | ||
Cross entropy loss with softmax. | ||
Parameters | ||
---------- | ||
labels: np.array | ||
2d array, labels of input data. | ||
logits: np.array | ||
2d array, logtis predicted by the network. | ||
Returns | ||
------- | ||
float | ||
the cross entropy loss. | ||
""" | ||
p = self.softmax(logits) | ||
loss = -np.mean(labels * np.log(p + 1e-15)) | ||
return loss | ||
|
||
def gradient_loss(self, labels, logits): | ||
""" | ||
Derivative of cross entropy loss with softmax. | ||
Parameters | ||
---------- | ||
labels: np.array | ||
2d array, labels of input data. | ||
logits: np.array | ||
2d array, logtis predicted by the network. | ||
Returns | ||
------- | ||
np.array | ||
derivative of cross entropy loss with Softmax. | ||
""" | ||
p = self.softmax(logits) | ||
return p - labels | ||
|
||
def train(self, x, y): | ||
""" | ||
Train process of the network. | ||
Parameters | ||
---------- | ||
x: np.array | ||
2d array, input features. | ||
y: np.array | ||
2d array, input labels. | ||
Returns | ||
------- | ||
float | ||
loss divided by number of samples. | ||
""" | ||
xs = self.forward(x) | ||
y_pred = xs[-1] | ||
loss = self.softmax_cross_entropy_with_logits(y, y_pred) | ||
sigma = self.gradient_loss(y, y_pred) | ||
|
||
w_next = None | ||
for l_idx in range(len(self.layers))[::-1]: | ||
layer = self.layers[l_idx] | ||
x_layer = xs[l_idx] | ||
|
||
w_save = layer.w.copy() | ||
sigma = layer.backprop(x_layer, sigma, w_next, self.learning_rate) | ||
|
||
w_next = w_save | ||
|
||
return loss | ||
|
||
def batch_generator(self, x, y, batch_size): | ||
""" | ||
Split data into batches for mini-batch gradient descent. | ||
Parameters | ||
---------- | ||
x: np.array | ||
2d array, input features. | ||
y: np.array | ||
2d array, input labels. | ||
Returns | ||
------- | ||
generator | ||
a generator of batches. | ||
""" | ||
for i in range(0, len(x), batch_size): | ||
yield x[i:i + batch_size], y[i:i + batch_size] | ||
|
||
def fit(self, x, y, batch_size): | ||
""" | ||
Fit the network. | ||
Parameters | ||
---------- | ||
x: np.array | ||
2d array, input features. | ||
y: np.array | ||
2d array, input labels. | ||
Returns | ||
------- | ||
float | ||
mean loss of batches. | ||
""" | ||
losses = [] | ||
for train_x, train_y in self.batch_generator(x, y, batch_size): | ||
batch_loss = self.train(train_x, train_y) | ||
losses.append(batch_loss) | ||
return np.mean(losses) | ||
|
||
def predict(self, input): | ||
""" | ||
Compute predictions. | ||
Parameters | ||
---------- | ||
x: np.array | ||
2d array, input features. | ||
y: np.array | ||
2d array, input labels. | ||
Returns | ||
------- | ||
np.array | ||
indices of the largest logit for each sample. | ||
""" | ||
y_pred = self.forward(input)[-1] | ||
return y_pred.argmax(axis=-1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,33 @@ | ||
# MLP-implementation-with-numpy | ||
|
||
The implementation of a fully connected NN with pure numpy. | ||
|
||
## Example | ||
Start with a multi-class classification problem. Use MNIST dataset(60000 train samples and 10000 test samples) as an example. | ||
|
||
Construct a MLP with two hidden layers. One has 256 neurons, another has 64 neurons. The accuracy on testset reaches 0.9819 after 50 epochs. For details, please refer to example.py. | ||
|
||
![accuracy](pics/accuracy.png) | ||
|
||
## Environment | ||
|
||
* Numpy | ||
* python=3.6.12 | ||
|
||
## Usage | ||
|
||
from NN import Dense, Model | ||
MLP = Model(0.1) | ||
MLP.add(Dense(100,64,activation='relu')) | ||
MLP.add(Dense(64,10,activation='None')) | ||
|
||
## Todo | ||
* Add loss functions. | ||
* Add tanh and ohter activations. | ||
* Add optimizers. | ||
* Add learning rate decay. | ||
|
||
## Reference | ||
|
||
* [Machine Learning for Beginners: An Introduction to Neural Networks](https://victorzhou.com/blog/intro-to-neural-networks/) | ||
* [A Derivation of Backpropagation in Matrix Form](https://sudeepraja.github.io/Neural/) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import tensorflow as tf | ||
import numpy as np | ||
from NN import Dense, Model | ||
import matplotlib.pyplot as plt | ||
|
||
########################################## | ||
#get data | ||
def onehot_encoding(x): | ||
encoded = np.zeros((x.size, x.max()+1)) | ||
encoded[np.arange(x.size),x] = 1 | ||
return encoded | ||
|
||
(train_x, train_y), (test_x, test_y) = tf.keras.datasets.mnist.load_data(path='mnist.npz') | ||
num_pixels = train_x.shape[1] * train_x.shape[2] | ||
train_x = train_x.reshape(train_x.shape[0],num_pixels).astype('float32')/255. | ||
test_x = test_x.reshape(test_x.shape[0],num_pixels).astype('float32')/255. | ||
train_y = onehot_encoding(train_y) | ||
test_y = onehot_encoding(test_y) | ||
|
||
########################################## | ||
#construct the model | ||
MLP = Model(0.1) | ||
MLP.add(Dense(784,256,activation='relu')) | ||
MLP.add(Dense(256,64,activation='relu')) | ||
MLP.add(Dense(64,10,activation='None')) | ||
|
||
train_label = train_y.argmax(axis=1) | ||
test_label =test_y.argmax(axis=1) | ||
train_acc = [] | ||
val_acc = [] | ||
|
||
########################################## | ||
#fit the model | ||
for epoch in range(50): | ||
print(epoch, MLP.fit(train_x,train_y, 64)) | ||
train_acc.append((MLP.predict(train_x) == train_label).mean()) | ||
val_acc.append((MLP.predict(test_x) == test_label).mean()) | ||
|
||
########################################## | ||
#visualize | ||
plt.plot(train_acc, label='train accuracy') | ||
plt.plot(val_acc, label='val accuracy') | ||
plt.legend() | ||
plt.grid() | ||
plt.xlabel('epoch') | ||
plt.ylabel('accuracy') |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.