From 9bb95d7c36f7ac38bfd64b5462c0e92a82c64c8e Mon Sep 17 00:00:00 2001 From: Shreyash Gupta <31446588+shreyashguptas@users.noreply.github.com> Date: Sun, 23 Jun 2024 15:01:11 +0000 Subject: [PATCH] chore: Update dataset paths in code files --- CAPTCHA Images/.gitignore | 2 -- combined_code.txt | 20 +++++++++++++++++--- dataset.py | 2 +- predict.py | 2 +- split_train_val_test.py | 2 +- train.py | 6 +++--- 6 files changed, 23 insertions(+), 11 deletions(-) delete mode 100644 CAPTCHA Images/.gitignore diff --git a/CAPTCHA Images/.gitignore b/CAPTCHA Images/.gitignore deleted file mode 100644 index a3a0c8b..0000000 --- a/CAPTCHA Images/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore \ No newline at end of file diff --git a/combined_code.txt b/combined_code.txt index 83c3dad..a4f124c 100644 --- a/combined_code.txt +++ b/combined_code.txt @@ -19,15 +19,25 @@ def split_image(image): return output +# class CaptchaImagesDataset(Dataset): +# def __init__(self, root, augment=False): +# super(CaptchaImagesDataset, self).__init__() +# self.root = root +# self.augment = augment + +# self.image_list = [] +# for ext in ('*.png', '*.jpg'): +# self.image_list.extend(glob.glob(os.path.join(root, ext))) + class CaptchaImagesDataset(Dataset): def __init__(self, root, augment=False): super(CaptchaImagesDataset, self).__init__() self.root = root self.augment = augment - self.image_list = [] for ext in ('*.png', '*.jpg'): self.image_list.extend(glob.glob(os.path.join(root, ext))) + print(f"Found {len(self.image_list)} images in {root}") def __len__(self): return len(self.image_list) @@ -54,8 +64,13 @@ class CaptchaImagesDataset(Dataset): def get_loader(root, batch_size): train_dataset = CaptchaImagesDataset(root + '/train', augment=True) val_dataset = CaptchaImagesDataset(root + '/val', augment=False) + + print(f"Number of training samples: {len(train_dataset)}") + print(f"Number of validation samples: {len(val_dataset)}") + train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, drop_last=True) + return train_loader, val_loader @@ -384,5 +399,4 @@ if __name__ == '__main__': if ground_truth == decoded_text: correct += 1 - print('accuracy =', correct/len(image_list)) - + print('accuracy =', correct/len(image_list)) \ No newline at end of file diff --git a/dataset.py b/dataset.py index 90a641e..5d06b6d 100644 --- a/dataset.py +++ b/dataset.py @@ -74,7 +74,7 @@ def get_loader(root, batch_size): if __name__ == '__main__': - train, val = get_loader('data/CAPTCHA Images/', batch_size=2) + train, val = get_loader('/home/dev/dev_work_shrey/playing_around/data/CAPTCHA Images/', batch_size=2) for image, labels in train: print() print() diff --git a/predict.py b/predict.py index 84e7945..ade2082 100644 --- a/predict.py +++ b/predict.py @@ -24,7 +24,7 @@ model.eval() correct = 0.0 - image_list = glob.glob('/app/data/CAPTCHA Images/test/*') + image_list = glob.glob('/home/dev/dev_work_shrey/playing_around/data/CAPTCHA Images/test/*') for image in tqdm(image_list): ground_truth = image.split('/')[-1].split('.')[0] image = Image.open(image).convert('RGB') diff --git a/split_train_val_test.py b/split_train_val_test.py index b2dd4a7..42ba03d 100644 --- a/split_train_val_test.py +++ b/split_train_val_test.py @@ -35,6 +35,6 @@ def split_train_val_test(root): if __name__ == '__main__': - split_train_val_test('data/CAPTCHA Images') + split_train_val_test('/home/dev/dev_work_shrey/playing_around/data/CAPTCHA Images') print() diff --git a/train.py b/train.py index 26c76f4..ab2ce24 100644 --- a/train.py +++ b/train.py @@ -64,7 +64,7 @@ def train(): model = CRNN(vocab_size=vocab_size).to(device) # model.load_state_dict(torch.load('output/weight.pth', map_location=device)) - train_loader, val_loader = get_loader('/app/data/CAPTCHA Images', batch_size=batch_size) + train_loader, val_loader = get_loader('/home/dev/dev_work_shrey/playing_around/data/CAPTCHA Images', batch_size=batch_size) optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, nesterov=True) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min') @@ -77,7 +77,7 @@ def train(): print('-----------------------------------------') if epoch == 0 or val_epoch_loss <= np.min(val_losses): - torch.save(model.state_dict(), '/app/output/weight.pth') + torch.save(model.state_dict(), '/home/dev/dev_work_shrey/playing_around/output/weight.pth') train_losses.append(train_epoch_loss) val_losses.append(val_epoch_loss) @@ -90,4 +90,4 @@ def train(): if __name__ == "__main__": - train() + train() \ No newline at end of file