first commit

royforlife · Nov 16, 2020 · 67f13d3 · 67f13d3
1 parent 5202518
commit 67f13d3
Show file tree

Hide file tree

Showing 6 changed files with 377 additions and 0 deletions.
diff --git a/NN/__init__.py b/NN/__init__.py
@@ -0,0 +1,5 @@
+from NN import dense
+from NN import model
+
+Dense = dense.Dense
+Model = model.Model
diff --git a/NN/dense.py b/NN/dense.py
@@ -0,0 +1,105 @@
+import numpy as np
+
+class Dense:
+    def __init__(self, input_size, output_size, activation='sigmoid', kernel_initializer='MSRA',
+                 bias_initializer='zeros'):
+        """
+        Initialize a dense layer.
+
+        Parameters
+        ----------
+        input_size : int
+            The input size of the layer
+        output_size : int
+            The output size of the layer
+        activation : string
+            The activation function for neurons
+        kernel_initializer: string
+            Initializer for the kernel weights matrix.
+        bias_initializer: string
+             Initializer for the bias vector.
+        """
+        self.activation = activation
+        self.kernel_initializer = kernel_initializer
+        self.bias_initializer = bias_initializer
+        self.initialize(input_size, output_size)
+
+    def initialize(self, input_size, output_size):
+        """
+        Initialize the kernel weights matrix and the bias vector.
+        Xavier and MSRA initializaitons are provided.
+        """
+        if self.kernel_initializer == 'Xavier':
+            var = 2. / (input_size + output_size)
+            bound = np.sqrt(3.0 * var)
+            self.w = np.random.uniform(-bound, bound, size=(input_size, output_size))
+
+        if self.kernel_initializer == 'MSRA':
+            s = np.sqrt(2 / (input_size + output_size))
+            self.w = np.random.normal(loc=0.0, scale=s, size=(input_size, output_size))
+
+        if self.bias_initializer == 'zeros':
+            self.b = np.zeros(output_size)
+
+    def forward(self, x):
+        """
+        Returns a forward result of a layer.
+        """
+        return self.activation_func(np.dot(x, self.w) + self.b)
+
+    def activation_func(self, x):
+        """
+        Returns the activation result.
+        Sigmoid and Relu are provided.
+        """
+        if self.activation == 'sigmoid':
+            return 1 / (1 + np.exp(-x))
+        if self.activation == 'relu':
+            return np.maximum(x, 0)
+        if self.activation == 'None':
+            return x
+
+    def grad_activation(self, x):
+        """
+        Returns the derivative of the activation function.
+        """
+        if self.activation == 'sigmoid':
+            a = self.activation_func(x)
+            return a * (1 - a)
+        if self.activation == 'relu':
+            x_ = x.copy()
+            x_[x_ <= 0] = 0
+            x_[x_ > 0] = 1
+            return x_
+
+    def backprop(self, x, sigma, w_next, learning_rate):
+        """
+        Back-propagation process of a layer.
+
+        Parameters
+        ----------
+        x : np.array
+            2d array, the input of the layer
+        sigma : np.array
+            2d array, the iteration term in the Back-propagation process.
+        w_next : np.array
+            2d array, the weights matrix of the next layer.
+        learning_rate: int
+            The tuning parameter in an optimization algorithm.
+
+        Returns
+        -------
+        np.array
+            the iteration term for next backprop calculation.
+        """
+        wx = np.dot(x, self.w) + self.b
+        if w_next is not None:
+            sigma = np.multiply(np.dot(sigma, w_next.T), self.grad_activation(wx))
+
+        gradient_w = np.dot(x.T, sigma)
+        gradient_b = sigma.sum(axis=0)
+
+        self.w -= learning_rate * gradient_w / x.shape[0]
+        self.b -= learning_rate * gradient_b / x.shape[0]
+
+        return sigma
diff --git a/NN/model.py b/NN/model.py
@@ -0,0 +1,190 @@
+import numpy as np
+
+class Model:
+    def __init__(self, learning_rate):
+        """
+        Initialize a deep model.
+
+        Parameters
+        ----------
+        learning_rate: int
+            The tuning parameter in an optimization algorithm.
+        """
+        self.learning_rate = learning_rate
+        self.layers = []
+
+    def add(self, layer):
+        """
+        Add a single layer to the network.
+        """
+        self.layers.append(layer)
+
+    def forward(self, input):
+        """
+        A forward pass of a deep model.
+
+        Parameters
+        ----------
+        input: np.array
+            2d array, the input data feeded to the network.
+
+        Returns
+        -------
+        list
+            the output of each layer.
+        """
+        xs = [input]
+        x = input
+        for l in self.layers:
+            xs.append(l.forward(x))
+            x = xs[-1]
+        return xs
+
+    def softmax(self, x):
+        """
+        Softmax function
+
+        Parameters
+        ----------
+        x: np.array
+            2d array, the input of softmax.
+
+        Returns
+        -------
+        np.array
+            the result of softmax calculation.
+        """
+        exps = np.exp(x - np.max(x, axis=1, keepdims=True))
+        return exps / np.sum(exps, axis=1, keepdims=True)
+
+    def softmax_cross_entropy_with_logits(self, labels, logits):
+        """
+        Cross entropy loss with softmax.
+
+        Parameters
+        ----------
+        labels: np.array
+            2d array, labels of input data.
+        logits: np.array
+            2d array, logtis predicted by the network.
+
+        Returns
+        -------
+        float
+            the cross entropy loss.
+        """
+        p = self.softmax(logits)
+        loss = -np.mean(labels * np.log(p + 1e-15))
+        return loss
+
+    def gradient_loss(self, labels, logits):
+        """
+        Derivative of cross entropy loss with softmax.
+
+        Parameters
+        ----------
+        labels: np.array
+            2d array, labels of input data.
+        logits: np.array
+            2d array, logtis predicted by the network.
+
+        Returns
+        -------
+        np.array
+            derivative of cross entropy loss with Softmax.
+        """
+        p = self.softmax(logits)
+        return p - labels
+
+    def train(self, x, y):
+        """
+        Train process of the network.
+
+        Parameters
+        ----------
+        x: np.array
+            2d array, input features.
+        y: np.array
+            2d array, input labels.
+
+        Returns
+        -------
+        float
+            loss divided by number of samples.
+        """
+        xs = self.forward(x)
+        y_pred = xs[-1]
+        loss = self.softmax_cross_entropy_with_logits(y, y_pred)
+        sigma = self.gradient_loss(y, y_pred)
+
+        w_next = None
+        for l_idx in range(len(self.layers))[::-1]:
+            layer = self.layers[l_idx]
+            x_layer = xs[l_idx]
+
+            w_save = layer.w.copy()
+            sigma = layer.backprop(x_layer, sigma, w_next, self.learning_rate)
+
+            w_next = w_save
+
+        return loss
+
+    def batch_generator(self, x, y, batch_size):
+        """
+        Split data into batches for mini-batch gradient descent.
+
+        Parameters
+        ----------
+        x: np.array
+            2d array, input features.
+        y: np.array
+            2d array, input labels.
+
+        Returns
+        -------
+        generator
+            a generator of batches.
+        """
+        for i in range(0, len(x), batch_size):
+            yield x[i:i + batch_size], y[i:i + batch_size]
+
+    def fit(self, x, y, batch_size):
+        """
+        Fit the network.
+
+        Parameters
+        ----------
+        x: np.array
+            2d array, input features.
+        y: np.array
+            2d array, input labels.
+
+        Returns
+        -------
+        float
+            mean loss of batches.
+        """
+        losses = []
+        for train_x, train_y in self.batch_generator(x, y, batch_size):
+            batch_loss = self.train(train_x, train_y)
+            losses.append(batch_loss)
+        return np.mean(losses)
+
+    def predict(self, input):
+        """
+        Compute predictions.
+
+        Parameters
+        ----------
+        x: np.array
+            2d array, input features.
+        y: np.array
+            2d array, input labels.
+
+        Returns
+        -------
+        np.array
+            indices of the largest logit for each sample.
+        """
+        y_pred = self.forward(input)[-1]
+        return y_pred.argmax(axis=-1)
diff --git a/README.md b/README.md
@@ -1,2 +1,33 @@
 # MLP-implementation-with-numpy
+
 The implementation of a fully connected NN with pure numpy.
+
+## Example
+Start with a multi-class classification problem. Use MNIST dataset(60000 train samples and 10000 test samples) as an example. 
+
+Construct a MLP with two hidden layers. One has 256 neurons, another has 64 neurons. The accuracy on testset reaches 0.9819 after 50 epochs. For details, please refer to example.py.
+
+![accuracy](pics/accuracy.png)
+
+## Environment
+
+* Numpy
+* python=3.6.12
+
+## Usage
+
+	from NN import Dense, Model
+	MLP = Model(0.1)
+	MLP.add(Dense(100,64,activation='relu'))
+	MLP.add(Dense(64,10,activation='None'))
+
+## Todo
+* Add loss functions.
+* Add tanh and ohter activations.
+* Add optimizers.
+* Add learning rate decay.
+
+## Reference
+
+* [Machine Learning for Beginners: An Introduction to Neural Networks](https://victorzhou.com/blog/intro-to-neural-networks/)
+* [A Derivation of Backpropagation in Matrix Form](https://sudeepraja.github.io/Neural/)
diff --git a/example.py b/example.py
@@ -0,0 +1,46 @@
+import tensorflow as tf
+import numpy as np
+from NN import Dense, Model
+import matplotlib.pyplot as plt
+
+##########################################
+#get data
+def onehot_encoding(x):
+    encoded = np.zeros((x.size, x.max()+1))
+    encoded[np.arange(x.size),x] = 1
+    return encoded
+
+(train_x, train_y), (test_x, test_y) = tf.keras.datasets.mnist.load_data(path='mnist.npz')
+num_pixels = train_x.shape[1] * train_x.shape[2]
+train_x = train_x.reshape(train_x.shape[0],num_pixels).astype('float32')/255.
+test_x = test_x.reshape(test_x.shape[0],num_pixels).astype('float32')/255.
+train_y = onehot_encoding(train_y)
+test_y = onehot_encoding(test_y)
+
+##########################################
+#construct the model
+MLP = Model(0.1)
+MLP.add(Dense(784,256,activation='relu'))
+MLP.add(Dense(256,64,activation='relu'))
+MLP.add(Dense(64,10,activation='None'))
+
+train_label = train_y.argmax(axis=1)
+test_label =test_y.argmax(axis=1)
+train_acc = []
+val_acc = []
+
+##########################################
+#fit the model
+for epoch in range(50):
+    print(epoch, MLP.fit(train_x,train_y, 64))
+    train_acc.append((MLP.predict(train_x) == train_label).mean())
+    val_acc.append((MLP.predict(test_x) == test_label).mean())
+
+##########################################
+#visualize
+plt.plot(train_acc, label='train accuracy')
+plt.plot(val_acc, label='val accuracy')
+plt.legend()
+plt.grid()
+plt.xlabel('epoch')
+plt.ylabel('accuracy')
diff --git a/pics/accuracy.png b/pics/accuracy.png