commented load_architecture.py

personx000 · Sep 1, 2020 · aee8b56 · aee8b56
1 parent 24d1d33
commit aee8b56
Show file tree

Hide file tree

Showing 2 changed files with 96 additions and 30 deletions.
diff --git a/custompackage/.ipynb_checkpoints/load_architecture-checkpoint.py b/custompackage/.ipynb_checkpoints/load_architecture-checkpoint.py
@@ -12,14 +12,14 @@ def kronecker(matrix1, matrix2):
 class simple_fcnn(nn.Module):
     '''
     2 layer feed forward neural network. 
-    Will code in Linear, Sigmoid, or ReLU activation functions.
-    Activation = {'relu', 'sigmoid', 'linear'}
+    Will use leaky ReLU activation functions.
+    Activation = {'relu', 'linear'}
     '''
 
     def __init__(self, Input_size=3072, Hidden_size=3072, Output_size=1, Activation="relu"):
         super(simple_fcnn, self).__init__()
         '''
-        Inputs: Input_size, Hidden_size, Output_size
+        Inputs: Input_size, Hidden_size, Output_size, Activation
         '''
         # Initialize architecture parameters
         self.Input_size = Input_size
@@ -48,8 +48,8 @@ def __init__(self, Input_size=3072, Hidden_size=3072, Output_size=1, Activation=
     def forward(self, x):
         '''
         Forward step for network. Establishes Architecture.
-        Inputs: Input, Hidden
-        Outputs: Output, Hidden
+        Inputs: Input
+        Outputs: Output
         '''
         # Prepare input for appropriate architecture
 
@@ -67,14 +67,14 @@ def forward(self, x):
 
 class ktree_gen(nn.Module):
     '''
-    Tree NN
+    k-Tree neural network
     '''
 
     def __init__(self, ds='mnist', Activation="relu", Sparse=True,
                  Input_order=None, Repeats=1, Padded=False):
         super(ktree_gen, self).__init__()
         '''
-        Inputs: Input_size, Hidden_size, Output_size
+        Inputs: ds (dataset), activation, sparse, input_order, repeats, padded
         '''
         # Initialize architecture parameters
         self.ds = ds
@@ -89,32 +89,53 @@ def __init__(self, ds='mnist', Activation="relu", Sparse=True,
         # Set freeze masks
 
         #Specify tree dimensions
+        # If using 28x28 datasets...
         if (ds == 'mnist') or (ds == 'fmnist') or (ds == 'kmnist') or (ds == 'emnist'):
+            # If padded, use 1024 sized tree, completely binary tree
             if Padded:
                 self.k = [1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1]
+            # If not padded, use 784 sized tree, 
+            # 7:1 between layers 1 and 2, and layers 2 and 3
             else:
                 self.k = [784, 112, 16, 8, 4, 2, 1]
+        # If using 3x32x32 datasets...
         elif (ds == 'svhn') or (ds == 'cifar10'):
+            # Use 3072 sized tree
+            # 3:1 between layers 1 and 2, otherwise binary
             self.k = [3072, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1]
+        # If using 16x16 datasets...
         elif ds == 'usps':
+            # Use 256 sized tree
             self.k = [256, 128, 64, 32, 16, 8, 4, 2, 1]
         else:
             print('Select a dataset')
             return(None)
 
+        # Make layers of tree architecture
 
+        # Name each layer in each subtree for reference later
         self.names = np.empty((self.Repeats, len(self.k)-1),dtype=object)
+        # Initialize freeze mask for use in training loop
         self.freeze_mask_set = []
+        # For each repeat or subtree, make a sparse layer that is initialized correctly
         for j in range(self.Repeats):
+            # For each layer within each subtree
             for i in range(len(self.k)-1):
+                # Assign name of the layer, indexed by layer (i) and subtree (j)
                 name = ''.join(['w',str(j),'_',str(i)])
+                # Initialize the layer with the appropriate name
                 self.add_module(name, nn.Linear(self.k[i],self.k[i+1]))
+                # Set bias of layer to zeros
                 self._modules[name].bias = nn.Parameter(torch.zeros_like(self._modules[name].bias)) 
+                # Use custom method to re-initialize the layer weights and create freeze mask for that layer
                 self._modules[name].weight.data, freeze_mask = self.initialize(self._modules[name])
+                # Add the layer name to the list of names
                 self.names[j,i] = name
+                # Set the freeze mask for the first subtree, which should be the same for all subtrees
                 if j < 1:
                     self.freeze_mask_set.append(freeze_mask)
 
+        # Initialize root node, aka soma node aka output node
         self.root = nn.Linear(Repeats, 1)
 
         # Initialize nonlinearities
@@ -124,46 +145,58 @@ def __init__(self, ds='mnist', Activation="relu", Sparse=True,
     def forward(self, x):
         '''
         Forward step for network. Establishes Architecture.
-        Inputs: Input, Hidden
-        Outputs: Output, Hidden
+        Inputs: Input
+        Outputs: Output
         '''
 
         y_out = []
+        # Step through every layer in each subtree of model, applying nonlinearities
         for j in range(self.Repeats):
             y = x
             for i in range(len(self.k)-1):
                 if self.Activation == 'relu':
                     y = self.relu(self._modules[self.names[j,i]](y))
                 else:
                     y = self._modules[self.names[j,i]](y)
+            # keep track of pen-ultimate layer outputs
             y_out.append(y)
-
+
+        # Calculate final output, joining the outputs of each subtree together
         output = self.sigmoid(self.root(torch.cat((y_out), dim=1)))
 
         return(output)
 
     def initialize(self, layer):
         # Kaiming initialize weights accounting for sparsity
+
+        # Extract weights from layer we are reinitializing
         weights = layer.weight.data
+        # If sparse, change the initializations based on density (sparsity)
         if self.Sparse:
-            if weights.shape[1] == 3072:
+            if weights.shape[1] == 3072: # first layer of 3x32x32 image datasets
                 inp_block = torch.ones((1,3))
-            elif (weights.shape[1] == 784) or (weights.shape[1] == 112):
+            elif (weights.shape[1] == 784) or (weights.shape[1] == 112): # first or second layer of 28x28 datasets
                 inp_block = torch.ones((1,7))
             else:
-                inp_block = torch.ones((1,2))
+                inp_block = torch.ones((1,2)) # all other layers (or 32x32)
+
+            # Set up mask for where each node receives a set of inputs of equal size to the input block
             inp_mask = kronecker(torch.eye(weights.shape[0]), inp_block)
 
+            # Calculate density
             density = len(np.where(inp_mask)[0])/len(inp_mask.reshape(-1))
 
             # Generate Kaiming initialization with gain = 1/density
             weights = torch.nn.init.normal_(weights, mean=0.0, std=math.sqrt(2/(weights.shape[1]*density)))
-
+
+            # Where no inputs will be received, set weights to zero
             weights[inp_mask == 0] = 0
-        else:
+        else: # If not sparse, use typical kaiming normalization
             weights = torch.nn.init.normal_(weights, mean=0.0, std=math.sqrt(2/(weights.shape[1])))
 
+        # Generate freeze mask for use in training to keep weights initialized to zero at zero
         mask_gen = torch.zeros_like(weights)
+        # Indicate where weights are equal to zero
         freeze_mask = mask_gen == weights
 
         return(weights, freeze_mask)

diff --git a/custompackage/load_architecture.py b/custompackage/load_architecture.py
@@ -12,14 +12,14 @@ def kronecker(matrix1, matrix2):
 class simple_fcnn(nn.Module):
     '''
     2 layer feed forward neural network. 
-    Will code in Linear, Sigmoid, or ReLU activation functions.
-    Activation = {'relu', 'sigmoid', 'linear'}
+    Will use leaky ReLU activation functions.
+    Activation = {'relu', 'linear'}
     '''
 
     def __init__(self, Input_size=3072, Hidden_size=3072, Output_size=1, Activation="relu"):
         super(simple_fcnn, self).__init__()
         '''
-        Inputs: Input_size, Hidden_size, Output_size
+        Inputs: Input_size, Hidden_size, Output_size, Activation
         '''
         # Initialize architecture parameters
         self.Input_size = Input_size
@@ -48,8 +48,8 @@ def __init__(self, Input_size=3072, Hidden_size=3072, Output_size=1, Activation=
     def forward(self, x):
         '''
         Forward step for network. Establishes Architecture.
-        Inputs: Input, Hidden
-        Outputs: Output, Hidden
+        Inputs: Input
+        Outputs: Output
         '''
         # Prepare input for appropriate architecture
 
@@ -67,14 +67,14 @@ def forward(self, x):
 
 class ktree_gen(nn.Module):
     '''
-    Tree NN
+    k-Tree neural network
     '''
 
     def __init__(self, ds='mnist', Activation="relu", Sparse=True,
                  Input_order=None, Repeats=1, Padded=False):
         super(ktree_gen, self).__init__()
         '''
-        Inputs: Input_size, Hidden_size, Output_size
+        Inputs: ds (dataset), activation, sparse, input_order, repeats, padded
         '''
         # Initialize architecture parameters
         self.ds = ds
@@ -89,32 +89,53 @@ def __init__(self, ds='mnist', Activation="relu", Sparse=True,
         # Set freeze masks
 
         #Specify tree dimensions
+        # If using 28x28 datasets...
         if (ds == 'mnist') or (ds == 'fmnist') or (ds == 'kmnist') or (ds == 'emnist'):
+            # If padded, use 1024 sized tree, completely binary tree
             if Padded:
                 self.k = [1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1]
+            # If not padded, use 784 sized tree, 
+            # 7:1 between layers 1 and 2, and layers 2 and 3
             else:
                 self.k = [784, 112, 16, 8, 4, 2, 1]
+        # If using 3x32x32 datasets...
         elif (ds == 'svhn') or (ds == 'cifar10'):
+            # Use 3072 sized tree
+            # 3:1 between layers 1 and 2, otherwise binary
             self.k = [3072, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1]
+        # If using 16x16 datasets...
         elif ds == 'usps':
+            # Use 256 sized tree
             self.k = [256, 128, 64, 32, 16, 8, 4, 2, 1]
         else:
             print('Select a dataset')
             return(None)
 
+        # Make layers of tree architecture
 
+        # Name each layer in each subtree for reference later
         self.names = np.empty((self.Repeats, len(self.k)-1),dtype=object)
+        # Initialize freeze mask for use in training loop
         self.freeze_mask_set = []
+        # For each repeat or subtree, make a sparse layer that is initialized correctly
         for j in range(self.Repeats):
+            # For each layer within each subtree
             for i in range(len(self.k)-1):
+                # Assign name of the layer, indexed by layer (i) and subtree (j)
                 name = ''.join(['w',str(j),'_',str(i)])
+                # Initialize the layer with the appropriate name
                 self.add_module(name, nn.Linear(self.k[i],self.k[i+1]))
+                # Set bias of layer to zeros
                 self._modules[name].bias = nn.Parameter(torch.zeros_like(self._modules[name].bias)) 
+                # Use custom method to re-initialize the layer weights and create freeze mask for that layer
                 self._modules[name].weight.data, freeze_mask = self.initialize(self._modules[name])
+                # Add the layer name to the list of names
                 self.names[j,i] = name
+                # Set the freeze mask for the first subtree, which should be the same for all subtrees
                 if j < 1:
                     self.freeze_mask_set.append(freeze_mask)
 
+        # Initialize root node, aka soma node aka output node
         self.root = nn.Linear(Repeats, 1)
 
         # Initialize nonlinearities
@@ -124,46 +145,58 @@ def __init__(self, ds='mnist', Activation="relu", Sparse=True,
     def forward(self, x):
         '''
         Forward step for network. Establishes Architecture.
-        Inputs: Input, Hidden
-        Outputs: Output, Hidden
+        Inputs: Input
+        Outputs: Output
         '''
 
         y_out = []
+        # Step through every layer in each subtree of model, applying nonlinearities
         for j in range(self.Repeats):
             y = x
             for i in range(len(self.k)-1):
                 if self.Activation == 'relu':
                     y = self.relu(self._modules[self.names[j,i]](y))
                 else:
                     y = self._modules[self.names[j,i]](y)
+            # keep track of pen-ultimate layer outputs
             y_out.append(y)
-
+
+        # Calculate final output, joining the outputs of each subtree together
         output = self.sigmoid(self.root(torch.cat((y_out), dim=1)))
 
         return(output)
 
     def initialize(self, layer):
         # Kaiming initialize weights accounting for sparsity
+
+        # Extract weights from layer we are reinitializing
         weights = layer.weight.data
+        # If sparse, change the initializations based on density (sparsity)
         if self.Sparse:
-            if weights.shape[1] == 3072:
+            if weights.shape[1] == 3072: # first layer of 3x32x32 image datasets
                 inp_block = torch.ones((1,3))
-            elif (weights.shape[1] == 784) or (weights.shape[1] == 112):
+            elif (weights.shape[1] == 784) or (weights.shape[1] == 112): # first or second layer of 28x28 datasets
                 inp_block = torch.ones((1,7))
             else:
-                inp_block = torch.ones((1,2))
+                inp_block = torch.ones((1,2)) # all other layers (or 32x32)
+
+            # Set up mask for where each node receives a set of inputs of equal size to the input block
             inp_mask = kronecker(torch.eye(weights.shape[0]), inp_block)
 
+            # Calculate density
             density = len(np.where(inp_mask)[0])/len(inp_mask.reshape(-1))
 
             # Generate Kaiming initialization with gain = 1/density
             weights = torch.nn.init.normal_(weights, mean=0.0, std=math.sqrt(2/(weights.shape[1]*density)))
-
+
+            # Where no inputs will be received, set weights to zero
             weights[inp_mask == 0] = 0
-        else:
+        else: # If not sparse, use typical kaiming normalization
             weights = torch.nn.init.normal_(weights, mean=0.0, std=math.sqrt(2/(weights.shape[1])))
 
+        # Generate freeze mask for use in training to keep weights initialized to zero at zero
         mask_gen = torch.zeros_like(weights)
+        # Indicate where weights are equal to zero
         freeze_mask = mask_gen == weights
 
         return(weights, freeze_mask)