forked from FreshRicardo/MLP-implementation-with-numpy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdense.py
105 lines (91 loc) · 3.33 KB
/
dense.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import numpy as np
class Dense:
def __init__(self, input_size, output_size, activation='sigmoid', kernel_initializer='MSRA',
bias_initializer='zeros'):
"""
Initialize a dense layer.
Parameters
----------
input_size : int
The input size of the layer
output_size : int
The output size of the layer
activation : string
The activation function for neurons
kernel_initializer: string
Initializer for the kernel weights matrix.
bias_initializer: string
Initializer for the bias vector.
"""
self.activation = activation
self.kernel_initializer = kernel_initializer
self.bias_initializer = bias_initializer
self.initialize(input_size, output_size)
def initialize(self, input_size, output_size):
"""
Initialize the kernel weights matrix and the bias vector.
Xavier and MSRA initializaitons are provided.
"""
if self.kernel_initializer == 'Xavier':
var = 2. / (input_size + output_size)
bound = np.sqrt(3.0 * var)
self.w = np.random.uniform(-bound, bound, size=(input_size, output_size))
if self.kernel_initializer == 'MSRA':
s = np.sqrt(2 / (input_size + output_size))
self.w = np.random.normal(loc=0.0, scale=s, size=(input_size, output_size))
if self.bias_initializer == 'zeros':
self.b = np.zeros(output_size)
def forward(self, x):
"""
Returns a forward result of a layer.
"""
return self.activation_func(np.dot(x, self.w) + self.b)
def activation_func(self, x):
"""
Returns the activation result.
Sigmoid and Relu are provided.
"""
if self.activation == 'sigmoid':
return 1 / (1 + np.exp(-x))
if self.activation == 'relu':
return np.maximum(x, 0)
if self.activation == 'None':
return x
def grad_activation(self, x):
"""
Returns the derivative of the activation function.
"""
if self.activation == 'sigmoid':
a = self.activation_func(x)
return a * (1 - a)
if self.activation == 'relu':
x_ = x.copy()
x_[x_ <= 0] = 0
x_[x_ > 0] = 1
return x_
def backprop(self, x, sigma, w_next, learning_rate):
"""
Back-propagation process of a layer.
Parameters
----------
x : np.array
2d array, the input of the layer
sigma : np.array
2d array, the iteration term in the Back-propagation process.
w_next : np.array
2d array, the weights matrix of the next layer.
learning_rate: int
The tuning parameter in an optimization algorithm.
Returns
-------
np.array
the iteration term for next backprop calculation.
"""
wx = np.dot(x, self.w) + self.b
if w_next is not None:
sigma = np.multiply(np.dot(sigma, w_next.T), self.grad_activation(wx))
gradient_w = np.dot(x.T, sigma)
gradient_b = sigma.sum(axis=0)
self.w -= learning_rate * gradient_w / x.shape[0]
self.b -= learning_rate * gradient_b / x.shape[0]
return sigma