Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
FreshRicardo committed Nov 16, 2020
1 parent 5202518 commit 67f13d3
Show file tree
Hide file tree
Showing 6 changed files with 377 additions and 0 deletions.
5 changes: 5 additions & 0 deletions NN/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from NN import dense
from NN import model

Dense = dense.Dense
Model = model.Model
105 changes: 105 additions & 0 deletions NN/dense.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import numpy as np

class Dense:
def __init__(self, input_size, output_size, activation='sigmoid', kernel_initializer='MSRA',
bias_initializer='zeros'):
"""
Initialize a dense layer.
Parameters
----------
input_size : int
The input size of the layer
output_size : int
The output size of the layer
activation : string
The activation function for neurons
kernel_initializer: string
Initializer for the kernel weights matrix.
bias_initializer: string
Initializer for the bias vector.
"""
self.activation = activation
self.kernel_initializer = kernel_initializer
self.bias_initializer = bias_initializer
self.initialize(input_size, output_size)

def initialize(self, input_size, output_size):
"""
Initialize the kernel weights matrix and the bias vector.
Xavier and MSRA initializaitons are provided.
"""
if self.kernel_initializer == 'Xavier':
var = 2. / (input_size + output_size)
bound = np.sqrt(3.0 * var)
self.w = np.random.uniform(-bound, bound, size=(input_size, output_size))

if self.kernel_initializer == 'MSRA':
s = np.sqrt(2 / (input_size + output_size))
self.w = np.random.normal(loc=0.0, scale=s, size=(input_size, output_size))

if self.bias_initializer == 'zeros':
self.b = np.zeros(output_size)

def forward(self, x):
"""
Returns a forward result of a layer.
"""
return self.activation_func(np.dot(x, self.w) + self.b)

def activation_func(self, x):
"""
Returns the activation result.
Sigmoid and Relu are provided.
"""
if self.activation == 'sigmoid':
return 1 / (1 + np.exp(-x))
if self.activation == 'relu':
return np.maximum(x, 0)
if self.activation == 'None':
return x

def grad_activation(self, x):
"""
Returns the derivative of the activation function.
"""
if self.activation == 'sigmoid':
a = self.activation_func(x)
return a * (1 - a)
if self.activation == 'relu':
x_ = x.copy()
x_[x_ <= 0] = 0
x_[x_ > 0] = 1
return x_

def backprop(self, x, sigma, w_next, learning_rate):
"""
Back-propagation process of a layer.
Parameters
----------
x : np.array
2d array, the input of the layer
sigma : np.array
2d array, the iteration term in the Back-propagation process.
w_next : np.array
2d array, the weights matrix of the next layer.
learning_rate: int
The tuning parameter in an optimization algorithm.
Returns
-------
np.array
the iteration term for next backprop calculation.
"""
wx = np.dot(x, self.w) + self.b
if w_next is not None:
sigma = np.multiply(np.dot(sigma, w_next.T), self.grad_activation(wx))

gradient_w = np.dot(x.T, sigma)
gradient_b = sigma.sum(axis=0)

self.w -= learning_rate * gradient_w / x.shape[0]
self.b -= learning_rate * gradient_b / x.shape[0]

return sigma
190 changes: 190 additions & 0 deletions NN/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
import numpy as np

class Model:
def __init__(self, learning_rate):
"""
Initialize a deep model.
Parameters
----------
learning_rate: int
The tuning parameter in an optimization algorithm.
"""
self.learning_rate = learning_rate
self.layers = []

def add(self, layer):
"""
Add a single layer to the network.
"""
self.layers.append(layer)

def forward(self, input):
"""
A forward pass of a deep model.
Parameters
----------
input: np.array
2d array, the input data feeded to the network.
Returns
-------
list
the output of each layer.
"""
xs = [input]
x = input
for l in self.layers:
xs.append(l.forward(x))
x = xs[-1]
return xs

def softmax(self, x):
"""
Softmax function
Parameters
----------
x: np.array
2d array, the input of softmax.
Returns
-------
np.array
the result of softmax calculation.
"""
exps = np.exp(x - np.max(x, axis=1, keepdims=True))
return exps / np.sum(exps, axis=1, keepdims=True)

def softmax_cross_entropy_with_logits(self, labels, logits):
"""
Cross entropy loss with softmax.
Parameters
----------
labels: np.array
2d array, labels of input data.
logits: np.array
2d array, logtis predicted by the network.
Returns
-------
float
the cross entropy loss.
"""
p = self.softmax(logits)
loss = -np.mean(labels * np.log(p + 1e-15))
return loss

def gradient_loss(self, labels, logits):
"""
Derivative of cross entropy loss with softmax.
Parameters
----------
labels: np.array
2d array, labels of input data.
logits: np.array
2d array, logtis predicted by the network.
Returns
-------
np.array
derivative of cross entropy loss with Softmax.
"""
p = self.softmax(logits)
return p - labels

def train(self, x, y):
"""
Train process of the network.
Parameters
----------
x: np.array
2d array, input features.
y: np.array
2d array, input labels.
Returns
-------
float
loss divided by number of samples.
"""
xs = self.forward(x)
y_pred = xs[-1]
loss = self.softmax_cross_entropy_with_logits(y, y_pred)
sigma = self.gradient_loss(y, y_pred)

w_next = None
for l_idx in range(len(self.layers))[::-1]:
layer = self.layers[l_idx]
x_layer = xs[l_idx]

w_save = layer.w.copy()
sigma = layer.backprop(x_layer, sigma, w_next, self.learning_rate)

w_next = w_save

return loss

def batch_generator(self, x, y, batch_size):
"""
Split data into batches for mini-batch gradient descent.
Parameters
----------
x: np.array
2d array, input features.
y: np.array
2d array, input labels.
Returns
-------
generator
a generator of batches.
"""
for i in range(0, len(x), batch_size):
yield x[i:i + batch_size], y[i:i + batch_size]

def fit(self, x, y, batch_size):
"""
Fit the network.
Parameters
----------
x: np.array
2d array, input features.
y: np.array
2d array, input labels.
Returns
-------
float
mean loss of batches.
"""
losses = []
for train_x, train_y in self.batch_generator(x, y, batch_size):
batch_loss = self.train(train_x, train_y)
losses.append(batch_loss)
return np.mean(losses)

def predict(self, input):
"""
Compute predictions.
Parameters
----------
x: np.array
2d array, input features.
y: np.array
2d array, input labels.
Returns
-------
np.array
indices of the largest logit for each sample.
"""
y_pred = self.forward(input)[-1]
return y_pred.argmax(axis=-1)
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,33 @@
# MLP-implementation-with-numpy

The implementation of a fully connected NN with pure numpy.

## Example
Start with a multi-class classification problem. Use MNIST dataset(60000 train samples and 10000 test samples) as an example.

Construct a MLP with two hidden layers. One has 256 neurons, another has 64 neurons. The accuracy on testset reaches 0.9819 after 50 epochs. For details, please refer to example.py.

![accuracy](pics/accuracy.png)

## Environment

* Numpy
* python=3.6.12

## Usage

from NN import Dense, Model
MLP = Model(0.1)
MLP.add(Dense(100,64,activation='relu'))
MLP.add(Dense(64,10,activation='None'))

## Todo
* Add loss functions.
* Add tanh and ohter activations.
* Add optimizers.
* Add learning rate decay.

## Reference

* [Machine Learning for Beginners: An Introduction to Neural Networks](https://victorzhou.com/blog/intro-to-neural-networks/)
* [A Derivation of Backpropagation in Matrix Form](https://sudeepraja.github.io/Neural/)
46 changes: 46 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import tensorflow as tf
import numpy as np
from NN import Dense, Model
import matplotlib.pyplot as plt

##########################################
#get data
def onehot_encoding(x):
encoded = np.zeros((x.size, x.max()+1))
encoded[np.arange(x.size),x] = 1
return encoded

(train_x, train_y), (test_x, test_y) = tf.keras.datasets.mnist.load_data(path='mnist.npz')
num_pixels = train_x.shape[1] * train_x.shape[2]
train_x = train_x.reshape(train_x.shape[0],num_pixels).astype('float32')/255.
test_x = test_x.reshape(test_x.shape[0],num_pixels).astype('float32')/255.
train_y = onehot_encoding(train_y)
test_y = onehot_encoding(test_y)

##########################################
#construct the model
MLP = Model(0.1)
MLP.add(Dense(784,256,activation='relu'))
MLP.add(Dense(256,64,activation='relu'))
MLP.add(Dense(64,10,activation='None'))

train_label = train_y.argmax(axis=1)
test_label =test_y.argmax(axis=1)
train_acc = []
val_acc = []

##########################################
#fit the model
for epoch in range(50):
print(epoch, MLP.fit(train_x,train_y, 64))
train_acc.append((MLP.predict(train_x) == train_label).mean())
val_acc.append((MLP.predict(test_x) == test_label).mean())

##########################################
#visualize
plt.plot(train_acc, label='train accuracy')
plt.plot(val_acc, label='val accuracy')
plt.legend()
plt.grid()
plt.xlabel('epoch')
plt.ylabel('accuracy')
Binary file added pics/accuracy.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 67f13d3

Please sign in to comment.