diff --git a/.gitignore b/.gitignore index 28a14d1..762fbf5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.jpg *.tar.xz *.pt +*.pkl +*.csv diff --git a/428_525613_cf_Codes.zip b/428_525613_cf_Codes.zip new file mode 100644 index 0000000..509ba51 Binary files /dev/null and b/428_525613_cf_Codes.zip differ diff --git a/428_663089_cf_Scene_Challenge.zip b/428_663089_cf_Scene_Challenge.zip new file mode 100644 index 0000000..6031d6c Binary files /dev/null and b/428_663089_cf_Scene_Challenge.zip differ diff --git a/best_submissions/.~lock.all_submissions_ranked.csv# b/best_submissions/.~lock.all_submissions_ranked.csv# new file mode 100644 index 0000000..5f0dc54 --- /dev/null +++ b/best_submissions/.~lock.all_submissions_ranked.csv# @@ -0,0 +1 @@ +,yash,yash-XPS-15-9560,31.03.2019 14:07,file:///home/yash/.config/libreoffice/4; \ No newline at end of file diff --git a/code/MobileNetV2.py b/code/MobileNetV2.py new file mode 100644 index 0000000..a9b3900 --- /dev/null +++ b/code/MobileNetV2.py @@ -0,0 +1,125 @@ +import torch.nn as nn +import math + + +def conv_bn(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU6(inplace=True) + ) + + +def conv_1x1_bn(inp, oup): + return nn.Sequential( + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU6(inplace=True) + ) + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = round(inp * expand_ratio) + self.use_res_connect = self.stride == 1 and inp == oup + + if expand_ratio == 1: + self.conv = nn.Sequential( + # dw + nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), + nn.BatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ) + else: + self.conv = nn.Sequential( + # pw + nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), + nn.BatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + # dw + nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), + nn.BatchNorm2d(hidden_dim), + nn.ReLU6(inplace=True), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2(nn.Module): + def __init__(self, n_class=1000, input_size=224, width_mult=1.): + super(MobileNetV2, self).__init__() + block = InvertedResidual + input_channel = 32 + last_channel = 1280 + interverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] + + # building first layer + assert input_size % 32 == 0 + input_channel = int(input_channel * width_mult) + self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel + self.features = [conv_bn(3, input_channel, 2)] + # building inverted residual blocks + for t, c, n, s in interverted_residual_setting: + output_channel = int(c * width_mult) + for i in range(n): + if i == 0: + self.features.append(block(input_channel, output_channel, s, expand_ratio=t)) + else: + self.features.append(block(input_channel, output_channel, 1, expand_ratio=t)) + input_channel = output_channel + # building last several layers + self.features.append(conv_1x1_bn(input_channel, self.last_channel)) + # make it nn.Sequential + self.features = nn.Sequential(*self.features) + + # building classifier + self.classifier = nn.Sequential( + nn.Dropout(0.2), + nn.Linear(self.last_channel, n_class), + ) + + self._initialize_weights() + + def forward(self, x): + x = self.features(x) + x = x.mean(3).mean(2) + x = self.classifier(x) + return x + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + n = m.weight.size(1) + m.weight.data.normal_(0, 0.01) + m.bias.data.zero_() diff --git a/code/__pycache__/fine_tuning_config_file.cpython-36.pyc b/code/__pycache__/fine_tuning_config_file.cpython-36.pyc index 2695430..02de8c8 100644 Binary files a/code/__pycache__/fine_tuning_config_file.cpython-36.pyc and b/code/__pycache__/fine_tuning_config_file.cpython-36.pyc differ diff --git a/code/check_validation.py b/code/check_validation.py new file mode 100644 index 0000000..edf4533 --- /dev/null +++ b/code/check_validation.py @@ -0,0 +1,71 @@ +import torch +import torch.hub +import pretrainedmodels +import torch.nn as nn +import torch.utils.data as data +import torchvision.datasets as datasets +import torchvision.models as models +import torchvision.transforms as transforms +import numpy as np +from PIL import Image +import os +import pdb +import pickle + +class ImageFolderWithPaths(datasets.ImageFolder): + """Custom dataset that includes image file paths. Extends + torchvision.datasets.ImageFolder + """ + + # override the __getitem__ method. this is the method dataloader calls + def __getitem__(self, index): + # this is what ImageFolder normally returns + original_tuple = super(ImageFolderWithPaths, self).__getitem__(index) + # the image file path + path = self.imgs[index][0] + # make a new tuple that includes original and the path + tuple_with_path = (original_tuple + (path,)) + return tuple_with_path + + +inp_size = 331 +data_transforms = transforms.Compose([ + transforms.Resize(inp_size), + transforms.CenterCrop(inp_size), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + +data_dir = "../imgs_resized/" + +dsets = {x: ImageFolderWithPaths(os.path.join(data_dir, x), data_transforms) for x in ['val']} + +dset_loaders = {x: torch.utils.data.DataLoader(dsets[x], batch_size=1, shuffle=True, num_workers=25) for x in ['val']} + +dset_sizes = {x: len(dsets[x]) for x in ['val']} + +#model = models.resnet18(num_classes=6) +#model.load_state_dict(torch.load('fine_tuned_best_model.pt')) + +model = pretrainedmodels.xception(num_classes=1000) +num_ftrs = model.last_linear.in_features +model.last_linear = nn.Linear(num_ftrs, 6) +model.load_state_dict(torch.load('best_model_xception_cutout_aug.pt')) + +#model = torch.hub.load('moskomule/senet.pytorch', 'se_resnet20', num_classes=6) +#model.load_state_dict(torch.load('best_model_senet20_aug_nofreeze.pt')) + +model.cuda() +model.eval() + +for data in dset_loaders['val']: + image, label, path = data + image_var = torch.autograd.Variable(image.cuda(), volatile=True) + y_pred = model(image_var) + smax = nn.Softmax() + smax_out = smax(y_pred)[0] + pred = np.argmax(smax_out.cpu().data).item() + label = label.cpu().data.item() + + if pred!=label: + print("path:", path, "pred:", pred, "label:", label) diff --git a/code/create_features.py b/code/create_features.py new file mode 100644 index 0000000..fc382c5 --- /dev/null +++ b/code/create_features.py @@ -0,0 +1,98 @@ +### Section 1 - First, let's import everything we will be needing. + +from __future__ import print_function, division +import torch +import torch.nn as nn +from torch.autograd import Variable +import numpy as np +import torchvision +from torchvision import datasets, models, transforms +import copy +import os +from PIL import ImageFile +ImageFile.LOAD_TRUNCATED_IMAGES = True +from fine_tuning_config_file import * +import pdb +from tqdm import tqdm + +### Non-deeplearning +from sklearn.svm import NuSVC +from sklearn.metrics import accuracy_score + +use_gpu = GPU_MODE +if use_gpu: + torch.cuda.set_device(CUDA_DEVICE) + +count=0 + +data_transforms = { + 'train': transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]), + 'val': transforms.Compose([ + transforms.Resize(224), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]), +} + + + +data_dir = DATA_DIR +dsets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) + for x in ['train', 'val']} +dset_loaders = {x: torch.utils.data.DataLoader(dsets[x], batch_size=BATCH_SIZE, + shuffle=True, num_workers=25) + for x in ['train', 'val']} +dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']} +dset_classes = dsets['train'].classes + + +### SECTION 3 : Writing the functions that do training and validation phase. +def create_features(model, phase="train"): + model.eval() + + for i, data in tqdm(enumerate(dset_loaders[phase])): + inputs, labels = data + if use_gpu: + inputs = Variable(inputs.float().cuda()) + else: + print("Use a GPU!") + + features_var = model(inputs) + features = features_var.squeeze().cpu().data.numpy() + + if i==0: + X = features + Y = labels.numpy() + else: + X = np.concatenate((X, features), axis=0) + Y = np.concatenate((Y, labels), axis=0) + + return X, Y + + + +model_ft = models.resnet18(pretrained=False, num_classes=6) +model_ft.load_state_dict(torch.load('best_model_resnet18_aug.pt')) + +my_model = nn.Sequential(*list(model_ft.children())[:-1]) +for param in my_model.parameters(): + param.requires_grad = False + +if use_gpu: + model_ft.cuda() + +train_x, train_y = create_features(my_model, phase="train") +val_x, val_y = create_features(my_model, phase="val") + +pdb.set_trace() + +clf = NuSVC(gamma='scale', verbose=True) +clf.fit(train_x, train_y) +predictions = clf.predict(val_x) +print("Val accuracy:", accuracy_score(val_y, predictions)) diff --git a/code/create_submission.py b/code/create_submission.py index 0f6a564..f680132 100644 --- a/code/create_submission.py +++ b/code/create_submission.py @@ -9,32 +9,54 @@ csv_map = {} -pkl_file = open('../dumps/dump_xception_cutout_aug.pkl', 'rb') +pkl_file = open('../dumps/dump_fastai_incepres2_full.pkl', 'rb') dump0 = pickle.load(pkl_file) pkl_file.close() -pkl_file = open('../dumps/dump_resnet50_cutout_aug.pkl', 'rb') +pkl_file = open('../dumps/dump_fastai_wrn_full.pkl', 'rb') dump1 = pickle.load(pkl_file) pkl_file.close() -pkl_file = open('../dumps/dump_alexnet_cutout_aug.pkl', 'rb') +pkl_file = open('../dumps/dump_nasnet_cutout_aug.pkl', 'rb') dump2 = pickle.load(pkl_file) pkl_file.close() -pkl_file = open('../dumps/dump_squeeze1_1_cutout_aug.pkl', 'rb') +pkl_file = open('../dumps/dump_resnext101_32_cutout_aug.pkl', 'rb') dump3 = pickle.load(pkl_file) pkl_file.close() -pkl_file = open('../dumps/dump_nasnet_cutout_aug.pkl', 'rb') +pkl_file = open('../dumps/dump_dense161_cutout_aug.pkl', 'rb') dump4 = pickle.load(pkl_file) pkl_file.close() -#pkl_file = open('../dumps/dump_resnet34_aug_nofreeze.pkl', 'rb') -#dump5 = pickle.load(pkl_file) -#pkl_file.close() +pkl_file = open('../dumps/dump_fastai_res152_v2_full.pkl', 'rb') +dump5 = pickle.load(pkl_file) +pkl_file.close() + +pkl_file = open('../dumps/dump_fastai_resnext10164_full.pkl', 'rb') +dump6 = pickle.load(pkl_file) +pkl_file.close() + +pkl_file = open('../dumps/dump_fastai_res152.pkl', 'rb') +dump7 = pickle.load(pkl_file) +pkl_file.close() + +pkl_file = open('../dumps/dump_fastai_incep4_full.pkl', 'rb') +dump8 = pickle.load(pkl_file) +pkl_file.close() + +pkl_file = open('../dumps/dump_fastai_dn161.pkl', 'rb') +dump9 = pickle.load(pkl_file) +pkl_file.close() + +pkl_file = open('../dumps/dump_fastai_res152_full.pkl', 'rb') +dump10 = pickle.load(pkl_file) +pkl_file.close() -for fnum in dump1: - avg_arr = (dump0[fnum]+dump1[fnum]+dump2[fnum]+dump3[fnum]+dump4[fnum])/5.0 +for fnum in dump0: + #avg_arr = (dump0[fnum]+dump1[fnum]+2*dump2[fnum]+dump3[fnum]+6*dump4[fnum]+dump5[fnum]+dump6[fnum]+dump7[fnum]+dump8[fnum]+6*dump9[fnum])/13.0 + avg_arr = (dump4[fnum]+dump5[fnum]+dump6[fnum]+dump7[fnum]+dump8[fnum]+dump9[fnum]+dump10[fnum])/6.0 + #avg_arr = dump0[fnum] y_pred = torch.from_numpy(avg_arr) smax = nn.Softmax() smax_out = smax(y_pred) @@ -42,7 +64,9 @@ csv_map[fnum] = c #print(fnum, ": ", c) -with open("../submissions/submission_xcep_res50_alex_squ_nas_allcut.csv", 'w') as csvfile: +#with open("../submissions/submission_xcep_wrn_full_2_nas_resnext32_6_dense161_res152_v2_full_resnext64_full_res152_incep4_v2_full_fast_3_dn161.csv", 'w') as csvfile: +with open("../submissions/submission_dense161_cut_res152_v2_full_resnext64_full_res152_incep4_full_fast_dn161_res152_full.csv", 'w') as csvfile: +#with open("../submissions/submission_xcep_cutout_full.csv", 'w') as csvfile: fieldnames = ['image_name', 'label'] csvfile.write('image_name,label') csvfile.write('\n') diff --git a/code/dump_output.py b/code/dump_output.py index 7c40874..c7c604c 100644 --- a/code/dump_output.py +++ b/code/dump_output.py @@ -50,20 +50,20 @@ def __len__(self): num_workers=1, pin_memory=False) -# model = pretrainedmodels.nasnetamobile(num_classes=1000) -model = models.squeezenet1_1() +model = pretrainedmodels.xception(num_classes=1000) +#model = models.squeezenet1_1() -#num_ftrs = model.last_linear.in_features -#model.last_linear = nn.Linear(num_ftrs, 6) -model.classifier = nn.Sequential( - nn.Dropout(p=0.5), - nn.Conv2d(512, 6, kernel_size=1), - nn.ReLU(inplace=True), - nn.AdaptiveAvgPool2d((1, 1)) - ) -model.num_classes = 6 +num_ftrs = model.last_linear.in_features +model.last_linear = nn.Linear(num_ftrs, 6) +#model.classifier = nn.Sequential( +# nn.Dropout(p=0.5), +# nn.Conv2d(512, 6, kernel_size=1), +# nn.ReLU(inplace=True), +# nn.AdaptiveAvgPool2d((1, 1)) +# ) +#model.num_classes = 6 -model.load_state_dict(torch.load('best_model_squeeze1_1_cutout_aug.pt')) +model.load_state_dict(torch.load('best_model_xcep_cutout_full.pt')) model.cuda() model.eval() @@ -80,6 +80,6 @@ def __len__(self): pred = y_pred[0].cpu().data.numpy() csv_map[filepath] = pred -output = open('../dumps/dump_squeeze1_1_cutout_aug.pkl', 'wb') +output = open('../dumps/dump_xcep_cutout_full.pkl', 'wb') pickle.dump(csv_map, output) output.close() diff --git a/code/dump_output_patchwise.py b/code/dump_output_patchwise.py new file mode 100644 index 0000000..10a0fc3 --- /dev/null +++ b/code/dump_output_patchwise.py @@ -0,0 +1,82 @@ +import torch +import torch.hub +import pretrainedmodels +import torch.nn as nn +import torch.utils.data as data +import torchvision.datasets as datasets +import torchvision.models as models +import torchvision.transforms as transforms +import numpy as np +from PIL import Image +import os +import pdb +import pickle +from patchwise import Quadrant + +class TestImageFolder(data.Dataset): + def __init__(self, root, transform=None): + images = [] + for filename in os.listdir(root): + if filename.endswith('jpg'): + images.append('{}'.format(filename)) + + self.root = root + self.imgs = images + self.transform = transform + + def __getitem__(self, index): + filename = self.imgs[index] + img = Image.open(os.path.join(self.root, filename)) + if img.layers==1: + print(filename) + img = img.convert('RGB') + if self.transform is not None: + img = self.transform(img) + return img, filename + + def __len__(self): + return len(self.imgs) + +normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + +inp_size = 224 +testdir = "../imgs_resized/test/" +test_loader = data.DataLoader( + TestImageFolder(testdir, + transforms.Compose([ + transforms.Scale(inp_size), + transforms.CenterCrop(inp_size), + transforms.ToTensor(), + normalize, + ])), + batch_size=1, + shuffle=False, + num_workers=1, + pin_memory=False) + +model = Quadrant() +model.load_state_dict(torch.load('best_model_dense161_cutout_patchwise.pt')) + +model.cuda() +model.eval() + +csv_map = {} + +for i, (images, filepath) in enumerate(test_loader): + filepath = os.path.splitext(os.path.basename(filepath[0]))[0] + filepath = int(filepath) + image_var = torch.autograd.Variable(images.cuda(), volatile=True) + patch1 = image_var[:,:,0:112, 0:112] + patch2 = image_var[:,:,0:112, 113:] + patch3 = image_var[:,:,113:, 0:112] + patch4 = image_var[:,:,113:, 113:] + try: + y_pred = model(patch1, patch2, patch3, patch4) + except: + pdb.set_trace() + pred = y_pred[0].cpu().data.numpy() + csv_map[filepath] = pred + +output = open('../dumps/dump_dense161_cutout_patchwise.pkl', 'wb') +pickle.dump(csv_map, output) +output.close() diff --git a/code/fastai_full.py b/code/fastai_full.py new file mode 100644 index 0000000..0887a49 --- /dev/null +++ b/code/fastai_full.py @@ -0,0 +1,79 @@ +from fastai.imports import * +from fastai.transforms import * +from fastai.conv_learner import * +from fastai.model import * +from fastai.dataset import * +from fastai.sgdr import * +from fastai.plots import * + +os.environ["CUDA_VISIBLE_DEVICES"] = '0' + +PATH = "../imgs_resized/" +sz = 224 + +arch = resnet18 +bs = 32 + +data = ImageClassifierData.from_paths(PATH, bs=bs, tfms=tfms_from_model(arch, sz), test_name="test") +learn = ConvLearner.pretrained(arch, data, precompute=True) + +def accuracy_np(preds, targs): + preds, targs = preds.cpu(), targs.cpu() + preds = np.argmax(preds, 1) + return (preds.numpy()==targs.numpy()).mean() + +# aug_tfms = [RandomLighting(b=0.5, c=0, tfm_y=TfmType.NO), +# RandomZoom(zoom_max=1), +# RandomStretch(max_stretch=2), +# RandomFlip(), +# GoogleNetResize(targ_sz=224), +# Cutout(n_holes=8, length=12, tfm_y=TfmType.NO)] + +aug_tfms = transforms_side_on+[Cutout(n_holes=8, length=12, tfm_y=TfmType.NO)] + +tfms = tfms_from_model(arch, sz, aug_tfms=aug_tfms, max_zoom=1.1) + +data = ImageClassifierData.from_paths(PATH, bs=bs, tfms=tfms, test_name="test") +learn = ConvLearner.pretrained(arch, data, precompute=True) + +lrf = learn.lr_find() +ind = np.argmin(learn.sched.losses) +lr = learn.sched.lrs[ind-100] + +learn.fit(lr, 2, metrics=[accuracy_np]) + +learn.precompute=False +learn.fit(0.002, 3, cycle_len=1, metrics=[accuracy_np]) + +learn.unfreeze() +lrs = np.array([lr*0.01, lr*0.1, lr]) +learn.fit(lrs, 3, cycle_len=1, cycle_mult=2, metrics=[accuracy_np]) +learn.fit(lrs, 3, cycle_len=1, cycle_mult=2, metrics=[accuracy_np]) +lrs = lrs*0.5 +learn.fit(lrs, 3, cycle_len=1, cycle_mult=2, metrics=[accuracy_np]) +learn.fit(lrs, 3, cycle_len=1, cycle_mult=2, metrics=[accuracy_np]) +lrs = lrs*0.7 +learn.fit(lrs, 3, cycle_len=1, cycle_mult=2, metrics=[accuracy_np]) +learn.fit(lrs, 3, cycle_len=1, cycle_mult=2, metrics=[accuracy_np]) + +#### TESTING +log_preds,y = learn.TTA(is_test=True) +probs = np.mean(np.exp(log_preds),0) +csv_map = {} + +import os +filenames = os.listdir('../imgs_resized/test/') + +for i,fname in enumerate(filenames): + csv_map[int(fname[:-4])] = probs[i] + +output = open("../dumps/dump_fastai_res18_tanvi.pkl", 'wb') +pickle.dump(csv_map, output) +output.close() + +preds = np.argmax(probs, axis=1) +submission = pd.DataFrame( + {'image_name': filenames, + 'label': preds, + }) +submission.to_csv('../submissions/submission_fastai_res18_tanvi.csv') diff --git a/code/fastai_init.py b/code/fastai_init.py new file mode 100644 index 0000000..e5d29b8 --- /dev/null +++ b/code/fastai_init.py @@ -0,0 +1,27 @@ +from fastai.imports import * +from fastai.transforms import * +from fastai.conv_learner import * +from fastai.model import * +from fastai.dataset import * +from fastai.sgdr import * +from fastai.plots import * + +PATH = "../imgs_resized/" +sz = 224 + +arch = wrn + +data = ImageClassifierData.from_paths(PATH, bs=64, tfms=tfms_from_model(arch, sz), test_name="test") +learn = ConvLearner.pretrained(arch, data, precompute=True) + +def accuracy_np(preds, targs): + preds, targs = preds.cpu(), targs.cpu() + preds = np.argmax(preds, 1) + return (preds.numpy()==targs.numpy()).mean() + +aug_tfms = [RandomLighting(b=0.5, c=0, tfm_y=TfmType.NO), + RandomRotateZoom(deg=25, zoom=2, stretch=1), + RandomStretch(max_stretch=2), + Cutout(n_holes=5, length=16, tfm_y=TfmType.NO)] + +tfms = tfms_from_model(arch, sz, aug_tfms=aug_tfms, max_zoom=1.1) diff --git a/code/fine_tuning_config_file.py b/code/fine_tuning_config_file.py index a55eebf..549a154 100644 --- a/code/fine_tuning_config_file.py +++ b/code/fine_tuning_config_file.py @@ -9,7 +9,7 @@ DATA_DIR = '../imgs_resized/' # to run with the sample dataset, just set to 'hymenoptera_data' # DATALOADER PROPERTIES -BATCH_SIZE = 16 # Set as high as possible. If you keep it too high, you'll get an out of memory error. +BATCH_SIZE = 4 # Set as high as possible. If you keep it too high, you'll get an out of memory error. ### GPU SETTINGS diff --git a/code/main_fine_tuning_patch.py b/code/main_fine_tuning_patch.py new file mode 100644 index 0000000..43d46cc --- /dev/null +++ b/code/main_fine_tuning_patch.py @@ -0,0 +1,278 @@ +### Section 1 - First, let's import everything we will be needing. + +from __future__ import print_function, division +import torch +import pretrainedmodels +import torch.hub +import torch.nn as nn +import torch.optim as optim +from torch.autograd import Variable +import numpy as np +import torchvision +from torchvision import datasets, models, transforms +import matplotlib.pyplot as plt +import time +import copy +import os +from PIL import ImageFile, Image +ImageFile.LOAD_TRUNCATED_IMAGES = True +from fine_tuning_config_file import * +import pdb +from tqdm import tqdm +from MobileNetV2 import MobileNetV2 +from torch.utils import data as D +from albumentations import ( + HorizontalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90, Cutout, + Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue, + IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, + IAASharpen, IAAEmboss, RandomContrast, RandomBrightness, Flip, OneOf, Compose, ToGray +) +from patchwise import Quadrant + + +## If you want to keep a track of your network on tensorboard, set USE_TENSORBOARD TO 1 in config file. + +if USE_TENSORBOARD: + from pycrayon import CrayonClient + cc = CrayonClient(hostname=TENSORBOARD_SERVER) + try: + cc.remove_experiment(EXP_NAME) + except: + pass + foo = cc.create_experiment(EXP_NAME) + + +## If you want to use the GPU, set GPU_MODE TO 1 in config file + +use_gpu = GPU_MODE +if use_gpu: + torch.cuda.set_device(CUDA_DEVICE) + +count=0 + +def strong_aug(p=.5): + return Compose([ + HorizontalFlip(p=0.5), + ToGray(p=0.1), + OneOf([ + IAAAdditiveGaussianNoise(), + GaussNoise(), + ], p=0.4), + OneOf([ + MotionBlur(p=.2), + MedianBlur(blur_limit=3, p=.1), + Blur(blur_limit=3, p=.1), + ], p=0.2), + OneOf([ + OpticalDistortion(p=0.3), + GridDistortion(p=.1), + IAAPiecewiseAffine(p=0.3), + ], p=0.2), + OneOf([ + CLAHE(clip_limit=2), + IAASharpen(), + RandomContrast(), + RandomBrightness(), + ], p=0.3), + HueSaturationValue(p=0.3), + ], p=p) + +def augment(aug, image): + return aug(image=image)['image'] + +class MyTransform(object): + def __call__(self, img): + aug = strong_aug(p=1.0) + return Image.fromarray(augment(aug, np.array(img))) + +inp_size = 224 + +data_transforms = { + 'train': transforms.Compose([ + MyTransform(), + transforms.RandomResizedCrop(inp_size), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]), + 'val': transforms.Compose([ + transforms.Resize(inp_size), + transforms.CenterCrop(inp_size), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]), +} + + + +data_dir = DATA_DIR +dsets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) + for x in ['train', 'val']} +dset_loaders = {x: torch.utils.data.DataLoader(dsets[x], batch_size=BATCH_SIZE, + shuffle=True, num_workers=25) + for x in ['train', 'val']} +dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']} +dset_classes = dsets['train'].classes + + +### SECTION 3 : Writing the functions that do training and validation phase. + +def train_model(model, criterion, optimizer, lr_scheduler, num_epochs=100): + since = time.time() + + best_model = model + best_acc = 0.0 + + for epoch in range(num_epochs): + print('Epoch {}/{}'.format(epoch, num_epochs - 1)) + print('-' * 10) + + # Each epoch has a training and validation phase + for phase in ['train', 'val']: + if phase == 'train': + mode='train' + optimizer = lr_scheduler(optimizer, epoch) + model.train() # Set model to training mode + else: + model.eval() + mode='val' + + running_loss = 0.0 + running_corrects = 0 + + counter=0 + # Iterate over data. + for data in tqdm(dset_loaders[phase]): + inputs, labels = data + # print(inputs.size()) + # wrap them in Variable + if use_gpu: + try: + # pdb.set_trace() + inputs, labels = Variable(inputs.float().cuda()), Variable(labels.long().cuda()) + except: + pdb.set_trace() + print(inputs,labels) + else: + inputs, labels = Variable(inputs), Variable(labels) + + # Set gradient to zero to delete history of computations in previous epoch. Track operations so that differentiation can be done automatically. + + patch1 = inputs[:,:,0:112, 0:112] + patch2 = inputs[:,:,0:112, 113:] + patch3 = inputs[:,:,113:, 0:112] + patch4 = inputs[:,:,113:, 113:] + + optimizer.zero_grad() + outputs = model(patch1, patch2, patch3, patch4) + _, preds = torch.max(outputs.data, 1) + + loss = criterion(outputs, labels) + # print('loss done') + # Just so that you can keep track that something's happening and don't feel like the program isn't running. + # if counter%10==0: + # print("Reached iteration ",counter) + counter+=1 + + # backward + optimize only if in training phase + if phase == 'train': + # print('loss backward') + loss.backward() + # print('done loss backward') + optimizer.step() + # print('done optim') + # print evaluation statistics + try: + # running_loss += loss.data[0] + running_loss += loss.item() + # print(labels.data) + # print(preds) + running_corrects += torch.sum(preds == labels.data) + # print('running correct =',running_corrects) + except: + print('unexpected error, could not calculate loss or do a sum.') + print('trying epoch loss') + epoch_loss = running_loss / dset_sizes[phase] + epoch_acc = running_corrects.item() / float(dset_sizes[phase]) + print('{} Loss: {:.4f} Acc: {:.4f}'.format( + phase, epoch_loss, epoch_acc)) + + + # deep copy the model + if phase == 'val': + if USE_TENSORBOARD: + foo.add_scalar_value('epoch_loss',epoch_loss,step=epoch) + foo.add_scalar_value('epoch_acc',epoch_acc,step=epoch) + if epoch_acc > best_acc: + best_acc = epoch_acc + best_model = copy.deepcopy(model) + print('new best accuracy = ',best_acc) + time_elapsed = time.time() - since + print('Training complete in {:.0f}m {:.0f}s'.format( + time_elapsed // 60, time_elapsed % 60)) + print('Best val Acc: {:4f}'.format(best_acc)) + print('returning and looping back') + return best_model + +# This function changes the learning rate over the training model. +def exp_lr_scheduler(optimizer, epoch, init_lr=BASE_LR, lr_decay_epoch=EPOCH_DECAY): + """Decay learning rate by a factor of DECAY_WEIGHT every lr_decay_epoch epochs.""" + lr = init_lr * (DECAY_WEIGHT**(epoch // lr_decay_epoch)) + + if epoch % lr_decay_epoch == 0: + print('LR is set to {}'.format(lr)) + + for param_group in optimizer.param_groups: + param_group['lr'] = lr + + return optimizer + + +### SECTION 4 : DEFINING MODEL ARCHITECTURE. + +# We use Resnet18 here. If you have more computational power, feel free to swap it with Resnet50, Resnet100 or Resnet152. +# Since we are doing fine-tuning, or transfer learning we will use the pretrained net weights. In the last line, the number of classes has been specified. +# Set the number of classes in the config file by setting the right value for NUM_CLASSES. + +################ RESNET +# model_ft = models.resnet152(pretrained=True) +# model_ft = pretrainedmodels.resnext101_64x4d(num_classes=1000, pretrained='imagenet') +#model_ft = torch.hub.load( +# 'moskomule/senet.pytorch', +# 'se_resnet20', +# num_classes=6) +#for param in model_ft.parameters(): +# param.requires_grad = False +#num_ftrs = model_ft.fc.in_features +#model_ft.fc = nn.Linear(num_ftrs, NUM_CLASSES) +# num_ftrs = model_ft.classifier[6].in_features +# model_ft.classifier[6] = nn.Linear(num_ftrs, NUM_CLASSES) + +# num_ftrs = model_ft.last_linear.in_features +# model_ft.last_linear = nn.Linear(num_ftrs, NUM_CLASSES) + +################ MobileV2-Net +# model_ft = MobileNetV2(n_class=1000) +# state_dict = torch.load('mobilenet_v2.pth.tar') +# num_ftrs = model_ft.classifier[1].in_features +# model_ft.classifier[1] = nn.Linear(num_ftrs, NUM_CLASSES) + +model_ft = Quadrant() + +criterion = nn.CrossEntropyLoss() + +if use_gpu: + criterion.cuda() + model_ft.cuda() + +optimizer_ft = optim.RMSprop(model_ft.parameters(), lr=0.0001) + + + +# Run the functions and save the best model in the function model_ft. +model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, + num_epochs=30) + +# Save model +torch.save(model_ft.state_dict(), 'best_model_resnext64_cutout_aug.pt') +# model_ft.save_state_dict('fine_tuned_best_model.pt') diff --git a/code/patchwise.py b/code/patchwise.py new file mode 100644 index 0000000..630adbe --- /dev/null +++ b/code/patchwise.py @@ -0,0 +1,57 @@ +import torch +import torch.nn as nn +from torchvision import models +import pdb + +NUM_CLASSES = 6 + +class Quadrant(torch.nn.Module): + def __init__(self): + """ + In the constructor we instantiate two nn.Linear modules and assign them as + member variables. + """ + super(Quadrant, self).__init__() + model1 = models.densenet161(pretrained=True, num_classes=1000) + self.model1 = nn.Sequential(*list(model1.children())[:-1]) + + model2 = models.densenet161(pretrained=True, num_classes=1000) + self.model2 = nn.Sequential(*list(model2.children())[:-1]) + + model3 = models.densenet161(pretrained=True, num_classes=1000) + self.model3 = nn.Sequential(*list(model3.children())[:-1]) + + model4 = models.densenet161(pretrained=True, num_classes=1000) + self.model4 = nn.Sequential(*list(model4.children())[:-1]) + + num_ftrs = model1.classifier.in_features + \ + model2.classifier.in_features + \ + model3.classifier.in_features + \ + model4.classifier.in_features + + del model1 + del model2 + del model3 + del model4 + + self.avgpool = nn.AvgPool2d(3) + + self.classifier = nn.Linear(num_ftrs, NUM_CLASSES) + + def forward(self, p1, p2, p3, p4): + """ + In the forward function we accept a Tensor of input data and we must return + a Tensor of output data. We can use Modules defined in the constructor as + well as arbitrary operators on Tensors. + """ + f1 = self.model1(p1) + f2 = self.model1(p2) + f3 = self.model1(p3) + f4 = self.model1(p4) + feat = torch.cat((f1, f2, f3, f4),1) + + pooled = self.avgpool(feat).squeeze() + + out = self.classifier(pooled) + + return out diff --git a/code/main_fine_tuning_aug.py b/code/train_evaluate_scene_classification.py similarity index 87% rename from code/main_fine_tuning_aug.py rename to code/train_evaluate_scene_classification.py index 8ebff48..27974ea 100644 --- a/code/main_fine_tuning_aug.py +++ b/code/train_evaluate_scene_classification.py @@ -1,4 +1,7 @@ -### Section 1 - First, let's import everything we will be needing. +############################################### +# Intel Scene Classification challenge +# Written by: Yash Sanjay Bhalgat +############################################### from __future__ import print_function, division import torch @@ -23,27 +26,11 @@ from albumentations import ( HorizontalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90, Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue, - IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, + IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, ChannelShuffle, Cutout, IAASharpen, IAAEmboss, RandomContrast, RandomBrightness, Flip, OneOf, Compose ) -## If you want to keep a track of your network on tensorboard, set USE_TENSORBOARD TO 1 in config file. - -if USE_TENSORBOARD: - from pycrayon import CrayonClient - cc = CrayonClient(hostname=TENSORBOARD_SERVER) - try: - cc.remove_experiment(EXP_NAME) - except: - pass - foo = cc.create_experiment(EXP_NAME) - - -## If you want to use the GPU, set GPU_MODE TO 1 in config file - -use_gpu = GPU_MODE -if use_gpu: - torch.cuda.set_device(CUDA_DEVICE) +torch.cuda.set_device(CUDA_DEVICE) count=0 @@ -71,6 +58,8 @@ def strong_aug(p=.5): RandomBrightness(), ], p=0.3), HueSaturationValue(p=0.3), + ChannelShuffle(), + Cutout(num_holes=20, max_h_size=16, max_w_size=16) ], p=p) def augment(aug, image): @@ -235,22 +224,22 @@ def exp_lr_scheduler(optimizer, epoch, init_lr=BASE_LR, lr_decay_epoch=EPOCH_DEC # Set the number of classes in the config file by setting the right value for NUM_CLASSES. ################ RESNET -model_ft = models.squeezenet1_1(pretrained=True) +# model_ft = models.squeezenet1_1(pretrained=True) # model_ft = torch.hub.load( # 'moskomule/senet.pytorch', # 'se_resnet20', # num_classes=6) -# model_ft = pretrainedmodels.nasnetamobile(num_classes=1000, pretrained='imagenet') +model_ft = pretrainedmodels.xception(num_classes=1000, pretrained='imagenet') # for param in model_ft.parameters(): # param.requires_grad = False -model_ft.classifier = nn.Sequential( - nn.Dropout(p=0.5), - nn.Conv2d(512, NUM_CLASSES, kernel_size=1), - nn.ReLU(inplace=True), - nn.AdaptiveAvgPool2d((1, 1)) - ) -model_ft.num_classes = NUM_CLASSES +#model_ft.classifier = nn.Sequential( +# nn.Dropout(p=0.5), +# nn.Conv2d(512, NUM_CLASSES, kernel_size=1), +# nn.ReLU(inplace=True), +# nn.AdaptiveAvgPool2d((1, 1)) +# ) +#model_ft.num_classes = NUM_CLASSES # num_ftrs = model_ft.fc.in_features # model_ft.fc = nn.Linear(num_ftrs, NUM_CLASSES) @@ -258,8 +247,8 @@ def exp_lr_scheduler(optimizer, epoch, init_lr=BASE_LR, lr_decay_epoch=EPOCH_DEC # num_ftrs = model_ft.classifier[6].in_features # model_ft.classifier[6] = nn.Linear(num_ftrs, NUM_CLASSES) -# num_ftrs = model_ft.last_linear.in_features -# model_ft.last_linear = nn.Linear(num_ftrs, NUM_CLASSES) +num_ftrs = model_ft.last_linear.in_features +model_ft.last_linear = nn.Linear(num_ftrs, NUM_CLASSES) ################ MobileV2-Net # model_ft = MobileNetV2(n_class=1000) @@ -279,8 +268,8 @@ def exp_lr_scheduler(optimizer, epoch, init_lr=BASE_LR, lr_decay_epoch=EPOCH_DEC # Run the functions and save the best model in the function model_ft. model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, - num_epochs=30) + num_epochs=20) # Save model -torch.save(model_ft.state_dict(), 'best_model_squeeze1_1_cutout_aug.pt') +torch.save(model_ft.state_dict(), 'best_model_xcep_cutout_full.pt') # model_ft.save_state_dict('fine_tuned_best_model.pt')