Rearranged and polished all scripts and necessary notebooks, moved da…

…ta to results folder level
personx000 · Sep 2, 2020 · 860ce18 · 860ce18
1 parent aee8b56
commit 860ce18
Show file tree

Hide file tree

Showing 27 changed files with 589 additions and 46 deletions.
diff --git a/combine_classes.py b/combine_classes.py
@@ -1,71 +1,76 @@
-# Make 'classes' file
-# TODO: Reassign file save destinations, comment, and remove extra lines of code
+# Run Order: 2nd, 1 out of 1
+# Make 'classes' file in results folder
 
 import numpy
 
-paired_test = np.load('./results/20200509/confused_pairs_all.npy')
 
-print(paired_test.shape)
+########
+#### Look at confused pairs for all datasets except EMNIST
+########
+paired_test = np.load('./results/confused_pairs_all.npy')
 
-ds = 0
+# Use the mean of each paired class set over all trials
 pt_mean = np.mean(paired_test, axis=1)
 
+# Initialize dataset set
 ds_set = ['mnist', 'fmnist', 'kmnist','svhn','usps', 'cifar10']
 
+# Initialize classes variable for record keeping
 classes = []
 
-paired_test
+# For each dataset
 for i, ds in enumerate(ds_set):
+    # Select the paired class means for the selected dataset
     focus = pt_mean[i]
-
+
+    # Select pair of classes that have the lowest score
     a = np.min(focus[np.nonzero(focus)])
-    b = np.sort(focus[np.nonzero(focus)])
-    e = b[len(b)-2]
     c = np.where(focus == a)
-    d = np.where(focus == e)
 
-    classes.append([c[0][0], c[1][0], ds, a, d[0][0], d[1][0], e])
+    # Record keeping
+    classes.append([c[0][0], c[1][0], ds])
 
 classes = np.array(classes, dtype=object)
 classes_orig = classes
-print(classes)
-# np.save('./results/20200511/classes.npy', classes[:,:3], allow_pickle=True)
 
-# np.load('./results/20200511/classes.npy', allow_pickle=True)
 
-paired_test = np.load('./results/20200509/confused_pairs_emnist_upper.npy')
+########
+#### Look at confused pairs for only EMNIST
+########
+
+paired_test = np.load('./results/confused_pairs_emnist_upper.npy')
 paired_test.shape
 
 pt_mean = np.mean(paired_test, axis=1)
 
+# Initialize dataset set
 ds_set = ['emnist']
+
+# Initialize classes variable for record keeping
 classes = []
+# For each dataset (only EMNIST)
 for i, ds in enumerate(ds_set):
+    # Select the paired class means for the selected dataset
     focus = pt_mean[i]
-
+
+    # Select pair of classes that have the lowest score
     a = np.min(focus[np.nonzero(focus)])
-    print(a)
-    b = np.sort(focus[np.nonzero(focus)])
-    e = b[len(b)-2]
     c = np.where(focus == a)
-    d = np.where(focus == e)
-
-    classes.append([c[0][0]+10, c[1][0]+10, ds, a, d[0][0]+10, d[1][0]+10, e ])
+
+    # Record keeping
+    classes.append([c[0][0]+10, c[1][0]+10, ds])
 
 classes = np.array(classes, dtype=object)
 
-print(classes)
-# intermed = np.concatenate((class_orig, classes), 0)
-# np.save('./results/20200511/classes_emnist_perf.npy', intermed, allow_pickle=True)
-# np.save('./results/20200511/classes.npy', classes, allow_pickle=True)
+########
+#### Organize final class pairs into an array for further use
+########
 
-# classes_orig = np.load('./results/20200511/classes.npy', allow_pickle=True)
-print(classes_orig.shape, classes[:,:].shape)
-classes_final = np.concatenate((classes_orig[:,:3],classes[:,:3]),0)
+classes_final = np.concatenate((classes_orig,classes),0)
 a = classes_final[3:6].copy()
 b = classes_final[6].copy()
 classes_final[3] = b
 classes_final[4:] = a
 
 print(classes_final)
-# # np.save('./results/20200511/classes.npy', classes_final, allow_pickle=True)
+np.save('./results/classes.npy', classes_final, allow_pickle=True)
diff --git a/confused_pairs.py b/confused_pairs.py
@@ -0,0 +1,65 @@
+# Run Order: 1st, 1 out of 2
+# Determine most confused pairs of classes in all datasets except for EMNIST
+
+from custompackage.load_data import *
+from custompackage.load_architecture import *
+from custompackage.traintestloop import *
+
+import torch
+from torch.utils.data import DataLoader
+import numpy as np
+import math
+import torchvision
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+import scipy
+import os
+import glob
+import pandas as pd
+import pickle
+
+if not os.path.exists('results'):
+    os.makedirs('results')
+
+# Initialize parameters for dataset loading
+bs = 256
+weighting = 'paired'
+trials = 10
+ds_set = ['mnist', 'fmnist', 'kmnist','svhn','usps','cifar10']
+
+# Initialize for record keeping
+paired_test = np.zeros((len(ds_set),trials,10,10))
+for m in range(trials):
+    # For each 10-class dataset
+    for k, ds in enumerate(ds_set):
+        # Go through each class
+        for i in range(10):
+            t1 = i
+            # and pair it with every other class
+            for j in range(i+1,10):
+                t2 = j
+
+                # Load the binary classification dataloaders
+                trainloaders, validloaders, testloader = dataset_weighted_split_all(bs, t1, t2, weighting, trials, ds)
+
+                # Assign entirety of the datasets within each dataloader to a variable
+                X_train = trainloaders[0].dataset.tensors[0]
+                y_train = trainloaders[0].dataset.tensors[1]
+                X_test = testloader.dataset.tensors[0]
+                y_test = testloader.dataset.tensors[1]
+
+
+                # initialize lda
+                lda = LinearDiscriminantAnalysis()
+
+                # fit to images, labels
+                lda.fit(X_train, y_train)
+
+                # see accuracy for validation set
+                score_test = lda.score(X_test, y_test)
+
+#                 print(ds, m, i, j, score_test)
+
+                #Record keeping
+                paired_test[k, m, i, j] = score_test
+
+                np.save('./results/confused_pairs_all.npy', paired_test)
diff --git a/confused_pairs_emnist.py b/confused_pairs_emnist.py
@@ -0,0 +1,68 @@
+# Run Order: 1st, 2 out of 2
+# Determine most confused pairs of classes in only EMNIST dataset, specifically the uppercase letters
+
+
+from custompackage.load_data import *
+from custompackage.load_architecture import *
+from custompackage.traintestloop import *
+
+import torch
+from torch.utils.data import DataLoader
+import numpy as np
+import math
+import torchvision
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+import scipy
+import os
+import glob
+import pandas as pd
+import pickle
+
+if not os.path.exists('results'):
+    os.makedirs('results')
+
+# Testing uppercase Letters from EMNIST only
+
+# Initialize parameters for dataset loading
+bs = 256
+weighting = 'paired'
+trials = 10
+ds_set = ['emnist']
+
+# Initialize for record keeping
+paired_test = np.zeros((len(ds_set),trials,26,26))
+for m in range(trials):
+    # For each 10-class dataset
+    for k, ds in enumerate(ds_set):
+        # Go through each class
+        for i in range(10, 36):
+            t1 = i
+            # and pair it with every other class
+            for j in range(i+1,36):
+                t2 = j
+
+                # Load the binary classification dataloaders
+                trainloaders, validloaders, testloader = dataset_weighted_split_all(bs, t1, t2, weighting, trials, ds)
+
+                # Assign entirety of the datasets within each dataloader to a variable
+                X_train = trainloaders[0].dataset.tensors[0]
+                y_train = trainloaders[0].dataset.tensors[1]
+                X_test = testloader.dataset.tensors[0]
+                y_test = testloader.dataset.tensors[1]
+
+
+                # initialize lda
+                lda = LinearDiscriminantAnalysis()
+
+                # fit to images, labels
+                lda.fit(X_train, y_train)
+
+                # see accuracy for validation set
+                score_test = lda.score(X_test, y_test)
+
+#                 print(ds, m, i, j, score_test)
+
+                #Record keeping
+                paired_test[k,m, i-10,j-10] = score_test
+
+                np.save('./results/confused_pairs_emnist_upper.npy', paired_test)
diff --git a/fcnn.py b/fcnn.py
@@ -0,0 +1,77 @@
+# Run Order: 3rd, 1 out of 2
+### Train and test fcnn model
+### Saves test loss and test accuracy
+### all classes script, early stopping implemented
+
+
+from custompackage.load_data import *
+from custompackage.load_architecture import *
+from custompackage.traintestloop import *
+
+
+import torch
+from torch.utils.data import DataLoader
+import torch.optim as optim
+import torch.nn as nn
+import numpy as np
+import math
+import torchvision
+from torchvision import transforms
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from sklearn.metrics import classification_report, confusion_matrix
+from sklearn.svm import SVC
+from sklearn.linear_model import LogisticRegression
+from sklearn.naive_bayes import GaussianNB
+import matplotlib
+import matplotlib.pyplot as plt
+from torch.optim.optimizer import required
+from torch.utils.data.dataset import random_split
+import scipy
+import os
+import glob
+import pandas as pd
+import pickle
+from pytorchtools import EarlyStopping
+
+
+# Test space for networks
+# Select Class Set
+class_set = 0
+
+
+# Initialize settings
+bs = 256
+weighting = 'paired'
+trials = 10
+epochs = 2000
+trees_set = [1,2,4,8,16,32]
+
+# Load class-dataset list
+classes = np.load('./results/classes.npy', allow_pickle=True)
+
+# Initialize final test loss and accuracy variables
+loss = np.zeros((len(classes), trials, len(trees_set)))
+acc = np.zeros((len(classes), trials, len(trees_set)))
+
+
+# For each dataset enumerated from classes list
+for j, (t1, t2, ds) in enumerate(classes):
+    print(t1, t2, ds)
+    # Load data loaders
+    trainloaders, validloaders, testloader = dataset_weighted_split_all(bs, t1, t2, weighting, trials, ds, permute=False)
+    # Initialize input size for model initialization purposes
+    input_size = trainloaders[0].dataset.tensors[0][0].shape[0]
+    # For each trial
+    for i in range(trials):
+        # For every k-tree defined by trees_set
+        for k, trees in enumerate(trees_set):
+            print(j, i, k)
+            # Initialize the fcnn model, such that hidden layer is twice the number of trees
+            model = simple_fcnn(input_size, 2*trees, 1).cuda()
+            #Train and test fcnn, assigning loss and acc values
+            loss_curve, acc_curve, loss[j,i,k], acc[j,i,k], model_t = train_test_fc(model, trainloaders[i],
+                                              validloaders[i], testloader, epochs=epochs)
+
+            # Save accuracy and loss arrays
+            np.save('./results/fcnn_acc_'+str(class_set)+'.npy', acc)
+            np.save('./results/fcnn_loss_'+str(class_set)+'.npy', loss)
diff --git a/ktree_benchmarking.ipynb b/ktree_benchmarking.ipynb
@@ -11,29 +11,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "ModuleNotFoundError",
-     "evalue": "No module named 'custompackage.neuron_capacity'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
-      "\u001b[1;32m<ipython-input-7-2c96415184fb>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mcustompackage\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload_architecture\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[1;33m*\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mcustompackage\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtraintestloop\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[1;33m*\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mcustompackage\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mneuron_capacity\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[1;33m*\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mcustompackage\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msin_ineq\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[1;33m*\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'custompackage.neuron_capacity'"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "\n",
     "\n",
     "from custompackage.load_data import *\n",
     "from custompackage.load_architecture import *\n",
     "from custompackage.traintestloop import *\n",
-    "from custompackage.neuron_capacity import *\n",
-    "from custompackage.sin_ineq import *\n",
     "\n",
     "import torch\n",
     "from torch.utils.data import DataLoader\n",
@@ -1956,6 +1942,18 @@
     "plt.imshow(model.w0_3.weight.data.cpu()[:16,:16])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": null,