forked from ilennaj/ktree
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Rearranged and polished all scripts and necessary notebooks, moved da…
…ta to results folder level
- Loading branch information
Showing
27 changed files
with
589 additions
and
46 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,71 +1,76 @@ | ||
# Make 'classes' file | ||
# TODO: Reassign file save destinations, comment, and remove extra lines of code | ||
# Run Order: 2nd, 1 out of 1 | ||
# Make 'classes' file in results folder | ||
|
||
import numpy | ||
|
||
paired_test = np.load('./results/20200509/confused_pairs_all.npy') | ||
|
||
print(paired_test.shape) | ||
######## | ||
#### Look at confused pairs for all datasets except EMNIST | ||
######## | ||
paired_test = np.load('./results/confused_pairs_all.npy') | ||
|
||
ds = 0 | ||
# Use the mean of each paired class set over all trials | ||
pt_mean = np.mean(paired_test, axis=1) | ||
|
||
# Initialize dataset set | ||
ds_set = ['mnist', 'fmnist', 'kmnist','svhn','usps', 'cifar10'] | ||
|
||
# Initialize classes variable for record keeping | ||
classes = [] | ||
|
||
paired_test | ||
# For each dataset | ||
for i, ds in enumerate(ds_set): | ||
# Select the paired class means for the selected dataset | ||
focus = pt_mean[i] | ||
|
||
|
||
# Select pair of classes that have the lowest score | ||
a = np.min(focus[np.nonzero(focus)]) | ||
b = np.sort(focus[np.nonzero(focus)]) | ||
e = b[len(b)-2] | ||
c = np.where(focus == a) | ||
d = np.where(focus == e) | ||
|
||
classes.append([c[0][0], c[1][0], ds, a, d[0][0], d[1][0], e]) | ||
# Record keeping | ||
classes.append([c[0][0], c[1][0], ds]) | ||
|
||
classes = np.array(classes, dtype=object) | ||
classes_orig = classes | ||
print(classes) | ||
# np.save('./results/20200511/classes.npy', classes[:,:3], allow_pickle=True) | ||
|
||
# np.load('./results/20200511/classes.npy', allow_pickle=True) | ||
|
||
paired_test = np.load('./results/20200509/confused_pairs_emnist_upper.npy') | ||
######## | ||
#### Look at confused pairs for only EMNIST | ||
######## | ||
|
||
paired_test = np.load('./results/confused_pairs_emnist_upper.npy') | ||
paired_test.shape | ||
|
||
pt_mean = np.mean(paired_test, axis=1) | ||
|
||
# Initialize dataset set | ||
ds_set = ['emnist'] | ||
|
||
# Initialize classes variable for record keeping | ||
classes = [] | ||
# For each dataset (only EMNIST) | ||
for i, ds in enumerate(ds_set): | ||
# Select the paired class means for the selected dataset | ||
focus = pt_mean[i] | ||
|
||
|
||
# Select pair of classes that have the lowest score | ||
a = np.min(focus[np.nonzero(focus)]) | ||
print(a) | ||
b = np.sort(focus[np.nonzero(focus)]) | ||
e = b[len(b)-2] | ||
c = np.where(focus == a) | ||
d = np.where(focus == e) | ||
|
||
classes.append([c[0][0]+10, c[1][0]+10, ds, a, d[0][0]+10, d[1][0]+10, e ]) | ||
|
||
# Record keeping | ||
classes.append([c[0][0]+10, c[1][0]+10, ds]) | ||
|
||
classes = np.array(classes, dtype=object) | ||
|
||
print(classes) | ||
# intermed = np.concatenate((class_orig, classes), 0) | ||
# np.save('./results/20200511/classes_emnist_perf.npy', intermed, allow_pickle=True) | ||
# np.save('./results/20200511/classes.npy', classes, allow_pickle=True) | ||
######## | ||
#### Organize final class pairs into an array for further use | ||
######## | ||
|
||
# classes_orig = np.load('./results/20200511/classes.npy', allow_pickle=True) | ||
print(classes_orig.shape, classes[:,:].shape) | ||
classes_final = np.concatenate((classes_orig[:,:3],classes[:,:3]),0) | ||
classes_final = np.concatenate((classes_orig,classes),0) | ||
a = classes_final[3:6].copy() | ||
b = classes_final[6].copy() | ||
classes_final[3] = b | ||
classes_final[4:] = a | ||
|
||
print(classes_final) | ||
# # np.save('./results/20200511/classes.npy', classes_final, allow_pickle=True) | ||
np.save('./results/classes.npy', classes_final, allow_pickle=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# Run Order: 1st, 1 out of 2 | ||
# Determine most confused pairs of classes in all datasets except for EMNIST | ||
|
||
from custompackage.load_data import * | ||
from custompackage.load_architecture import * | ||
from custompackage.traintestloop import * | ||
|
||
import torch | ||
from torch.utils.data import DataLoader | ||
import numpy as np | ||
import math | ||
import torchvision | ||
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis | ||
import scipy | ||
import os | ||
import glob | ||
import pandas as pd | ||
import pickle | ||
|
||
if not os.path.exists('results'): | ||
os.makedirs('results') | ||
|
||
# Initialize parameters for dataset loading | ||
bs = 256 | ||
weighting = 'paired' | ||
trials = 10 | ||
ds_set = ['mnist', 'fmnist', 'kmnist','svhn','usps','cifar10'] | ||
|
||
# Initialize for record keeping | ||
paired_test = np.zeros((len(ds_set),trials,10,10)) | ||
for m in range(trials): | ||
# For each 10-class dataset | ||
for k, ds in enumerate(ds_set): | ||
# Go through each class | ||
for i in range(10): | ||
t1 = i | ||
# and pair it with every other class | ||
for j in range(i+1,10): | ||
t2 = j | ||
|
||
# Load the binary classification dataloaders | ||
trainloaders, validloaders, testloader = dataset_weighted_split_all(bs, t1, t2, weighting, trials, ds) | ||
|
||
# Assign entirety of the datasets within each dataloader to a variable | ||
X_train = trainloaders[0].dataset.tensors[0] | ||
y_train = trainloaders[0].dataset.tensors[1] | ||
X_test = testloader.dataset.tensors[0] | ||
y_test = testloader.dataset.tensors[1] | ||
|
||
|
||
# initialize lda | ||
lda = LinearDiscriminantAnalysis() | ||
|
||
# fit to images, labels | ||
lda.fit(X_train, y_train) | ||
|
||
# see accuracy for validation set | ||
score_test = lda.score(X_test, y_test) | ||
|
||
# print(ds, m, i, j, score_test) | ||
|
||
#Record keeping | ||
paired_test[k, m, i, j] = score_test | ||
|
||
np.save('./results/confused_pairs_all.npy', paired_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
# Run Order: 1st, 2 out of 2 | ||
# Determine most confused pairs of classes in only EMNIST dataset, specifically the uppercase letters | ||
|
||
|
||
from custompackage.load_data import * | ||
from custompackage.load_architecture import * | ||
from custompackage.traintestloop import * | ||
|
||
import torch | ||
from torch.utils.data import DataLoader | ||
import numpy as np | ||
import math | ||
import torchvision | ||
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis | ||
import scipy | ||
import os | ||
import glob | ||
import pandas as pd | ||
import pickle | ||
|
||
if not os.path.exists('results'): | ||
os.makedirs('results') | ||
|
||
# Testing uppercase Letters from EMNIST only | ||
|
||
# Initialize parameters for dataset loading | ||
bs = 256 | ||
weighting = 'paired' | ||
trials = 10 | ||
ds_set = ['emnist'] | ||
|
||
# Initialize for record keeping | ||
paired_test = np.zeros((len(ds_set),trials,26,26)) | ||
for m in range(trials): | ||
# For each 10-class dataset | ||
for k, ds in enumerate(ds_set): | ||
# Go through each class | ||
for i in range(10, 36): | ||
t1 = i | ||
# and pair it with every other class | ||
for j in range(i+1,36): | ||
t2 = j | ||
|
||
# Load the binary classification dataloaders | ||
trainloaders, validloaders, testloader = dataset_weighted_split_all(bs, t1, t2, weighting, trials, ds) | ||
|
||
# Assign entirety of the datasets within each dataloader to a variable | ||
X_train = trainloaders[0].dataset.tensors[0] | ||
y_train = trainloaders[0].dataset.tensors[1] | ||
X_test = testloader.dataset.tensors[0] | ||
y_test = testloader.dataset.tensors[1] | ||
|
||
|
||
# initialize lda | ||
lda = LinearDiscriminantAnalysis() | ||
|
||
# fit to images, labels | ||
lda.fit(X_train, y_train) | ||
|
||
# see accuracy for validation set | ||
score_test = lda.score(X_test, y_test) | ||
|
||
# print(ds, m, i, j, score_test) | ||
|
||
#Record keeping | ||
paired_test[k,m, i-10,j-10] = score_test | ||
|
||
np.save('./results/confused_pairs_emnist_upper.npy', paired_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
# Run Order: 3rd, 1 out of 2 | ||
### Train and test fcnn model | ||
### Saves test loss and test accuracy | ||
### all classes script, early stopping implemented | ||
|
||
|
||
from custompackage.load_data import * | ||
from custompackage.load_architecture import * | ||
from custompackage.traintestloop import * | ||
|
||
|
||
import torch | ||
from torch.utils.data import DataLoader | ||
import torch.optim as optim | ||
import torch.nn as nn | ||
import numpy as np | ||
import math | ||
import torchvision | ||
from torchvision import transforms | ||
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis | ||
from sklearn.metrics import classification_report, confusion_matrix | ||
from sklearn.svm import SVC | ||
from sklearn.linear_model import LogisticRegression | ||
from sklearn.naive_bayes import GaussianNB | ||
import matplotlib | ||
import matplotlib.pyplot as plt | ||
from torch.optim.optimizer import required | ||
from torch.utils.data.dataset import random_split | ||
import scipy | ||
import os | ||
import glob | ||
import pandas as pd | ||
import pickle | ||
from pytorchtools import EarlyStopping | ||
|
||
|
||
# Test space for networks | ||
# Select Class Set | ||
class_set = 0 | ||
|
||
|
||
# Initialize settings | ||
bs = 256 | ||
weighting = 'paired' | ||
trials = 10 | ||
epochs = 2000 | ||
trees_set = [1,2,4,8,16,32] | ||
|
||
# Load class-dataset list | ||
classes = np.load('./results/classes.npy', allow_pickle=True) | ||
|
||
# Initialize final test loss and accuracy variables | ||
loss = np.zeros((len(classes), trials, len(trees_set))) | ||
acc = np.zeros((len(classes), trials, len(trees_set))) | ||
|
||
|
||
# For each dataset enumerated from classes list | ||
for j, (t1, t2, ds) in enumerate(classes): | ||
print(t1, t2, ds) | ||
# Load data loaders | ||
trainloaders, validloaders, testloader = dataset_weighted_split_all(bs, t1, t2, weighting, trials, ds, permute=False) | ||
# Initialize input size for model initialization purposes | ||
input_size = trainloaders[0].dataset.tensors[0][0].shape[0] | ||
# For each trial | ||
for i in range(trials): | ||
# For every k-tree defined by trees_set | ||
for k, trees in enumerate(trees_set): | ||
print(j, i, k) | ||
# Initialize the fcnn model, such that hidden layer is twice the number of trees | ||
model = simple_fcnn(input_size, 2*trees, 1).cuda() | ||
#Train and test fcnn, assigning loss and acc values | ||
loss_curve, acc_curve, loss[j,i,k], acc[j,i,k], model_t = train_test_fc(model, trainloaders[i], | ||
validloaders[i], testloader, epochs=epochs) | ||
|
||
# Save accuracy and loss arrays | ||
np.save('./results/fcnn_acc_'+str(class_set)+'.npy', acc) | ||
np.save('./results/fcnn_loss_'+str(class_set)+'.npy', loss) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.