Skip to content

Commit

Permalink
commented load_architecture.py
Browse files Browse the repository at this point in the history
  • Loading branch information
ilennaj committed Sep 1, 2020
1 parent 24d1d33 commit aee8b56
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 30 deletions.
63 changes: 48 additions & 15 deletions custompackage/.ipynb_checkpoints/load_architecture-checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ def kronecker(matrix1, matrix2):
class simple_fcnn(nn.Module):
'''
2 layer feed forward neural network.
Will code in Linear, Sigmoid, or ReLU activation functions.
Activation = {'relu', 'sigmoid', 'linear'}
Will use leaky ReLU activation functions.
Activation = {'relu', 'linear'}
'''

def __init__(self, Input_size=3072, Hidden_size=3072, Output_size=1, Activation="relu"):
super(simple_fcnn, self).__init__()
'''
Inputs: Input_size, Hidden_size, Output_size
Inputs: Input_size, Hidden_size, Output_size, Activation
'''
# Initialize architecture parameters
self.Input_size = Input_size
Expand Down Expand Up @@ -48,8 +48,8 @@ def __init__(self, Input_size=3072, Hidden_size=3072, Output_size=1, Activation=
def forward(self, x):
'''
Forward step for network. Establishes Architecture.
Inputs: Input, Hidden
Outputs: Output, Hidden
Inputs: Input
Outputs: Output
'''
# Prepare input for appropriate architecture

Expand All @@ -67,14 +67,14 @@ def forward(self, x):

class ktree_gen(nn.Module):
'''
Tree NN
k-Tree neural network
'''

def __init__(self, ds='mnist', Activation="relu", Sparse=True,
Input_order=None, Repeats=1, Padded=False):
super(ktree_gen, self).__init__()
'''
Inputs: Input_size, Hidden_size, Output_size
Inputs: ds (dataset), activation, sparse, input_order, repeats, padded
'''
# Initialize architecture parameters
self.ds = ds
Expand All @@ -89,32 +89,53 @@ def __init__(self, ds='mnist', Activation="relu", Sparse=True,
# Set freeze masks

#Specify tree dimensions
# If using 28x28 datasets...
if (ds == 'mnist') or (ds == 'fmnist') or (ds == 'kmnist') or (ds == 'emnist'):
# If padded, use 1024 sized tree, completely binary tree
if Padded:
self.k = [1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1]
# If not padded, use 784 sized tree,
# 7:1 between layers 1 and 2, and layers 2 and 3
else:
self.k = [784, 112, 16, 8, 4, 2, 1]
# If using 3x32x32 datasets...
elif (ds == 'svhn') or (ds == 'cifar10'):
# Use 3072 sized tree
# 3:1 between layers 1 and 2, otherwise binary
self.k = [3072, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1]
# If using 16x16 datasets...
elif ds == 'usps':
# Use 256 sized tree
self.k = [256, 128, 64, 32, 16, 8, 4, 2, 1]
else:
print('Select a dataset')
return(None)

# Make layers of tree architecture

# Name each layer in each subtree for reference later
self.names = np.empty((self.Repeats, len(self.k)-1),dtype=object)
# Initialize freeze mask for use in training loop
self.freeze_mask_set = []
# For each repeat or subtree, make a sparse layer that is initialized correctly
for j in range(self.Repeats):
# For each layer within each subtree
for i in range(len(self.k)-1):
# Assign name of the layer, indexed by layer (i) and subtree (j)
name = ''.join(['w',str(j),'_',str(i)])
# Initialize the layer with the appropriate name
self.add_module(name, nn.Linear(self.k[i],self.k[i+1]))
# Set bias of layer to zeros
self._modules[name].bias = nn.Parameter(torch.zeros_like(self._modules[name].bias))
# Use custom method to re-initialize the layer weights and create freeze mask for that layer
self._modules[name].weight.data, freeze_mask = self.initialize(self._modules[name])
# Add the layer name to the list of names
self.names[j,i] = name
# Set the freeze mask for the first subtree, which should be the same for all subtrees
if j < 1:
self.freeze_mask_set.append(freeze_mask)

# Initialize root node, aka soma node aka output node
self.root = nn.Linear(Repeats, 1)

# Initialize nonlinearities
Expand All @@ -124,46 +145,58 @@ def __init__(self, ds='mnist', Activation="relu", Sparse=True,
def forward(self, x):
'''
Forward step for network. Establishes Architecture.
Inputs: Input, Hidden
Outputs: Output, Hidden
Inputs: Input
Outputs: Output
'''

y_out = []
# Step through every layer in each subtree of model, applying nonlinearities
for j in range(self.Repeats):
y = x
for i in range(len(self.k)-1):
if self.Activation == 'relu':
y = self.relu(self._modules[self.names[j,i]](y))
else:
y = self._modules[self.names[j,i]](y)
# keep track of pen-ultimate layer outputs
y_out.append(y)


# Calculate final output, joining the outputs of each subtree together
output = self.sigmoid(self.root(torch.cat((y_out), dim=1)))

return(output)

def initialize(self, layer):
# Kaiming initialize weights accounting for sparsity

# Extract weights from layer we are reinitializing
weights = layer.weight.data
# If sparse, change the initializations based on density (sparsity)
if self.Sparse:
if weights.shape[1] == 3072:
if weights.shape[1] == 3072: # first layer of 3x32x32 image datasets
inp_block = torch.ones((1,3))
elif (weights.shape[1] == 784) or (weights.shape[1] == 112):
elif (weights.shape[1] == 784) or (weights.shape[1] == 112): # first or second layer of 28x28 datasets
inp_block = torch.ones((1,7))
else:
inp_block = torch.ones((1,2))
inp_block = torch.ones((1,2)) # all other layers (or 32x32)

# Set up mask for where each node receives a set of inputs of equal size to the input block
inp_mask = kronecker(torch.eye(weights.shape[0]), inp_block)

# Calculate density
density = len(np.where(inp_mask)[0])/len(inp_mask.reshape(-1))

# Generate Kaiming initialization with gain = 1/density
weights = torch.nn.init.normal_(weights, mean=0.0, std=math.sqrt(2/(weights.shape[1]*density)))


# Where no inputs will be received, set weights to zero
weights[inp_mask == 0] = 0
else:
else: # If not sparse, use typical kaiming normalization
weights = torch.nn.init.normal_(weights, mean=0.0, std=math.sqrt(2/(weights.shape[1])))

# Generate freeze mask for use in training to keep weights initialized to zero at zero
mask_gen = torch.zeros_like(weights)
# Indicate where weights are equal to zero
freeze_mask = mask_gen == weights

return(weights, freeze_mask)
Expand Down
63 changes: 48 additions & 15 deletions custompackage/load_architecture.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ def kronecker(matrix1, matrix2):
class simple_fcnn(nn.Module):
'''
2 layer feed forward neural network.
Will code in Linear, Sigmoid, or ReLU activation functions.
Activation = {'relu', 'sigmoid', 'linear'}
Will use leaky ReLU activation functions.
Activation = {'relu', 'linear'}
'''

def __init__(self, Input_size=3072, Hidden_size=3072, Output_size=1, Activation="relu"):
super(simple_fcnn, self).__init__()
'''
Inputs: Input_size, Hidden_size, Output_size
Inputs: Input_size, Hidden_size, Output_size, Activation
'''
# Initialize architecture parameters
self.Input_size = Input_size
Expand Down Expand Up @@ -48,8 +48,8 @@ def __init__(self, Input_size=3072, Hidden_size=3072, Output_size=1, Activation=
def forward(self, x):
'''
Forward step for network. Establishes Architecture.
Inputs: Input, Hidden
Outputs: Output, Hidden
Inputs: Input
Outputs: Output
'''
# Prepare input for appropriate architecture

Expand All @@ -67,14 +67,14 @@ def forward(self, x):

class ktree_gen(nn.Module):
'''
Tree NN
k-Tree neural network
'''

def __init__(self, ds='mnist', Activation="relu", Sparse=True,
Input_order=None, Repeats=1, Padded=False):
super(ktree_gen, self).__init__()
'''
Inputs: Input_size, Hidden_size, Output_size
Inputs: ds (dataset), activation, sparse, input_order, repeats, padded
'''
# Initialize architecture parameters
self.ds = ds
Expand All @@ -89,32 +89,53 @@ def __init__(self, ds='mnist', Activation="relu", Sparse=True,
# Set freeze masks

#Specify tree dimensions
# If using 28x28 datasets...
if (ds == 'mnist') or (ds == 'fmnist') or (ds == 'kmnist') or (ds == 'emnist'):
# If padded, use 1024 sized tree, completely binary tree
if Padded:
self.k = [1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1]
# If not padded, use 784 sized tree,
# 7:1 between layers 1 and 2, and layers 2 and 3
else:
self.k = [784, 112, 16, 8, 4, 2, 1]
# If using 3x32x32 datasets...
elif (ds == 'svhn') or (ds == 'cifar10'):
# Use 3072 sized tree
# 3:1 between layers 1 and 2, otherwise binary
self.k = [3072, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1]
# If using 16x16 datasets...
elif ds == 'usps':
# Use 256 sized tree
self.k = [256, 128, 64, 32, 16, 8, 4, 2, 1]
else:
print('Select a dataset')
return(None)

# Make layers of tree architecture

# Name each layer in each subtree for reference later
self.names = np.empty((self.Repeats, len(self.k)-1),dtype=object)
# Initialize freeze mask for use in training loop
self.freeze_mask_set = []
# For each repeat or subtree, make a sparse layer that is initialized correctly
for j in range(self.Repeats):
# For each layer within each subtree
for i in range(len(self.k)-1):
# Assign name of the layer, indexed by layer (i) and subtree (j)
name = ''.join(['w',str(j),'_',str(i)])
# Initialize the layer with the appropriate name
self.add_module(name, nn.Linear(self.k[i],self.k[i+1]))
# Set bias of layer to zeros
self._modules[name].bias = nn.Parameter(torch.zeros_like(self._modules[name].bias))
# Use custom method to re-initialize the layer weights and create freeze mask for that layer
self._modules[name].weight.data, freeze_mask = self.initialize(self._modules[name])
# Add the layer name to the list of names
self.names[j,i] = name
# Set the freeze mask for the first subtree, which should be the same for all subtrees
if j < 1:
self.freeze_mask_set.append(freeze_mask)

# Initialize root node, aka soma node aka output node
self.root = nn.Linear(Repeats, 1)

# Initialize nonlinearities
Expand All @@ -124,46 +145,58 @@ def __init__(self, ds='mnist', Activation="relu", Sparse=True,
def forward(self, x):
'''
Forward step for network. Establishes Architecture.
Inputs: Input, Hidden
Outputs: Output, Hidden
Inputs: Input
Outputs: Output
'''

y_out = []
# Step through every layer in each subtree of model, applying nonlinearities
for j in range(self.Repeats):
y = x
for i in range(len(self.k)-1):
if self.Activation == 'relu':
y = self.relu(self._modules[self.names[j,i]](y))
else:
y = self._modules[self.names[j,i]](y)
# keep track of pen-ultimate layer outputs
y_out.append(y)


# Calculate final output, joining the outputs of each subtree together
output = self.sigmoid(self.root(torch.cat((y_out), dim=1)))

return(output)

def initialize(self, layer):
# Kaiming initialize weights accounting for sparsity

# Extract weights from layer we are reinitializing
weights = layer.weight.data
# If sparse, change the initializations based on density (sparsity)
if self.Sparse:
if weights.shape[1] == 3072:
if weights.shape[1] == 3072: # first layer of 3x32x32 image datasets
inp_block = torch.ones((1,3))
elif (weights.shape[1] == 784) or (weights.shape[1] == 112):
elif (weights.shape[1] == 784) or (weights.shape[1] == 112): # first or second layer of 28x28 datasets
inp_block = torch.ones((1,7))
else:
inp_block = torch.ones((1,2))
inp_block = torch.ones((1,2)) # all other layers (or 32x32)

# Set up mask for where each node receives a set of inputs of equal size to the input block
inp_mask = kronecker(torch.eye(weights.shape[0]), inp_block)

# Calculate density
density = len(np.where(inp_mask)[0])/len(inp_mask.reshape(-1))

# Generate Kaiming initialization with gain = 1/density
weights = torch.nn.init.normal_(weights, mean=0.0, std=math.sqrt(2/(weights.shape[1]*density)))


# Where no inputs will be received, set weights to zero
weights[inp_mask == 0] = 0
else:
else: # If not sparse, use typical kaiming normalization
weights = torch.nn.init.normal_(weights, mean=0.0, std=math.sqrt(2/(weights.shape[1])))

# Generate freeze mask for use in training to keep weights initialized to zero at zero
mask_gen = torch.zeros_like(weights)
# Indicate where weights are equal to zero
freeze_mask = mask_gen == weights

return(weights, freeze_mask)
Expand Down

0 comments on commit aee8b56

Please sign in to comment.