-
Notifications
You must be signed in to change notification settings - Fork 3
/
model.py
1013 lines (876 loc) · 48.7 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import sys
import os
import glob
import time
from functools import wraps
from tqdm import tqdm
import numpy as np
from typing import Union, List, Dict, Tuple
import torch
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision
import torchmetrics
import pytorch_lightning as pl
try:
from deepspeed.ops.adam import DeepSpeedCPUAdam, FusedAdam
except ModuleNotFoundError as e:
print(e, "\n")
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import albumentations as A
BACKUP_SEED = 4222 # In case non is set
# Choose path where the github repository will be cloned to an the cnw folder will be
CNW_PATH = "./cropandweed-dataset/"
OPTUNA_DB_PATH = "./OptunaTrials/trials.db"
MODEL_PATH = os.environ.get("MODEL_PATH", "./models")
# Path were the downloaded files images, segmentation masks, ... will be
if os.getcwd().startswith("/ceph") or os.getcwd().startswith("/pfs"): # dws or bw server
DATA_PATH = os.path.join(CNW_PATH, "data")
else:
DATA_PATH =r"./data"
IMAGES_PATH = os.path.join(DATA_PATH, "images")
if CNW_PATH not in sys.path:
sys.path.append(CNW_PATH) # folder contains hypen so using this workaround
sys.path.append(CNW_PATH+"/cnw")
import cnw
from cnw.utilities.datasets import DATASETS
import utils
# fore developement
from utils import visualize_bbox
sys.path.append("./Yolov7-training")
#from ultralytics.yolo.utils.loss import BboxLoss
from yolov7 import create_yolov7_model, create_yolov7_loss
from yolov7.trainer import filter_eval_predictions
from yolov7.models.yolo import Yolov7Model
from torch.optim.lr_scheduler import CosineAnnealingLR, MultiStepLR, ChainedScheduler, ReduceLROnPlateau, SequentialLR, OneCycleLR
from torchmetrics.detection.mean_ap import MeanAveragePrecision
BBOX_PARAMS = A.BboxParams(format='pascal_voc', # Left, Top, Right, Bottom
#format="yolo",
label_fields=["class_labels"],
min_area=30, # Min amount of pixels
min_visibility=0.2) # area ratio from after to the before
##################
class CropDataset(torchvision.datasets.VisionDataset):
"""
Dataset Interface to load images and apply augmentations.
Allows to use albumination transformations in parallel for mask and images
"""
#target_transform : A.Compose
#transforms : torchvision.transforms.Compose
# TODO if stacking, downscale before stacking.
def __init__(self, root=DATA_PATH, *,dataset,
stage : Union["test", "valiation", "train"],
image_size,
stack2_images,
train_val_test_indices : Dict[str, List[int]] = None, # manually choose which indices to use;
train_val_test_ratio=(0.7, 0.15, 0.15),
transforms=None,
transform =None,
target_transform=None,
normalize_images=False,
half_precision=False,
cache_bboxes=False,
cache_images=False,
_init_files=True, # Set files by dataloader globally, for less overhead.
seed=None): # should be set in the transformations!):
"""
From documentation:
transforms (callable, optional) – A function/transforms that takes in an image
and a label and returns the transformed versions of both.
transform (callable, optional) – A function/transform that takes in an PIL image
and returns a transformed version. E.g, transforms.RandomCrop
target_transform (callable, optional) – A function/transform that takes in
the target and transforms it.
"""
super().__init__(root, transforms, transform, target_transform)
self.dataset = dataset
self.stage = stage
self.normalize_images = normalize_images
self.data_dir = root
self.try_bbox_caching = cache_bboxes
self.try_image_caching = cache_images
if isinstance(root, dict):
self.image_root = root["images"]
self.bbox_root = root["bboxes"]
else:
self.image_root = root
self.bbox_root = root
self.stack2_images = stack2_images
self.image_size = image_size
self.half_precision = half_precision
self.downscale_transform = A.Compose([A.SmallestMaxSize(max(image_size)),
A.RandomSizedCrop(min_max_height=(max(image_size),max(image_size)),
w2h_ratio=1.0, height=max(image_size), width=max(image_size))],
bbox_params=BBOX_PARAMS)
# For perfomance and integrity this allows to set the specific values
# outside of the initialization.
# For example the indices can be assigned manually to be 100% sure
# that train, val, test data do not overlapp.
if _init_files:
image_dir = os.path.join(self.image_root, 'images')
# If stage val, train load eval boxes
if stage == "train":
bbox_dir = os.path.join(self.bbox_root, 'bboxes', dataset)
else:
bbox_dir = os.path.join(self.bbox_root, 'bboxes', dataset+"Eval") # allows more lenient preidctions for validation
# TODO: need to change validation vegetation label id?
bbox_dir = os.path.join(self.bbox_root, 'bboxes', dataset)
# Extra data sadly unused
#self.annotation_dir = os.path.join(root, 'Annotations')
# Some overhead storage. Can be removed later
# This could be a view!
#self.all_masks = self.get_files() # masks are filtered for the subsets
self.all_bboxes = self.get_files() # masks are filtered for the subsets
# Filter images based on stage TODO ??
self.all_images = np.asarray([os.path.join(image_dir, os.path.basename(file)[:-4]+".jpg") for file in self.all_bboxes],
dtype=str)
# verify that to each mask an image exist
#for i in self.all_images:
# if not os.path.exists(i):
# raise FileNotFoundError(i)
# Perform train_val_test split and get indices
if train_val_test_indices is None:
self.indices = utils.train_val_test_split(self.all_images, seed or BACKUP_SEED, train_val_test_ratio)
self.images = self.all_images[self.indices[self.stage]]
#self.masks = self.all_masks[self.indices[self.stage]]
else:
self.indices = train_val_test_indices
self.images = self.all_images[self.indices[self.stage]] # This should/could be a view
#self.masks = self.all_masks[self.indices[self.stage]]
#self.bboxes = [os.path.join(bbox_dir, os.path.basename(file)[:-4]+".csv") for file in self.masks ]
self.bboxes = self.all_bboxes[self.indices[self.stage]]
_init_caches(self)
def _init_caches(self):
try:
if self.try_bbox_caching:
self.cache_bboxes()
self.bboxes_cached = self.try_bbox_caching # True or False
except Exception as e:
self.bboxes_cached = False
raise
try:
if self.try_image_caching:
self.cache_images()
self.images_cached = self.try_image_caching # True or False
except Exception as e:
self.images_cached = False
def get_all_bboxes(self):
"""Loads all bboxes into an array. NOTE: This function might not be up do date"""
print("Loading bboxes")
bbox_dir = os.path.join(self.bbox_root, 'bboxes', self.dataset)
# This should be quite fast but sometimes it seams to stuck at certain files.
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=UserWarning) # supresses warnings of empty files
#.csv is pascal voc format# NOTE: HARDCODED
files = (os.path.join(bbox_dir, os.path.basename(file)[:-4]+".csv") for file in self.bboxes )
bboxes = []
for file in tqdm(files, total=len(self.bboxes)):
bboxes.append(np.loadtxt(file, delimiter=",", dtype=int))
return np.array(bboxes, dtype=object)
def get_files(self, path=None):
"""Find images belonging to the chosen dataset via the bbox.csv files."""
path = path or self.data_dir
if isinstance(path, dict):
path = path["bboxes"]
files = np.array(glob.glob(os.path.join(path, "bboxes", self.dataset, "*.csv")))
if len(files) == 0:
print("path:", path)
raise ValueError("No files found in", os.path.join(os.path.join(path, "bboxes", self.dataset)))
return files
@staticmethod
def get_class_label(label_id):
"""Get the class name from a predicted label id"""
return DATASETS['CropAndWeed'].get_label_name(label_id)
def get_bbox_color(self, label_id):
return DATASETS[self.settings['dataset']['name']].get_label_color(label_id)
mapped_id=DATASETS[DATASET].get_mapped_id(label_id)
return DATASETS[DATASET].get_label_color(mapped_id)
def __len__(self):
return len(self.indices[self.stage])
# Evaluate during compilation
if BBOX_PARAMS.format == "pascal_voc":
@staticmethod
def load_bbox(path):
bbox = np.loadtxt(path, delimiter=",", dtype=np.int32).reshape(-1, 7)
return bbox[:,4], bbox[:,:4] # label, bbox
else:
@staticmethod
def load_bbox(path):
bbox = np.loadtxt(path, delimiter=" ", dtype=np.float32).reshape(-1, 5) # yolo format, assure not (5,) shaped
return bbox[:, 0], bbox[:, 1:]
def cache_bboxes(self):
"""caches all images and labels"""
# This might take some time on servers because files need to be transfered
# TODO could be parallies and then joined.
all_bboxes = []
all_labels = []
start = time.perf_counter()
for bbox_file in self.bboxes:
labels, bbox = self.load_bbox(bbox_file)
all_bboxes.append(bbox)
all_labels.append(labels)
self.cached_bboxes = np.array(all_bboxes, dtype=object)
self.cached_labels = np.array(all_labels, dtype=object)
print("BBoxes cached in", format(time.perf_counter() - start, ".1"), "s")
def cache_images(self):
"""Loads all images into RAM"""
# TODO: Should be parallized
print("Loading and caching all images - this will need a lot of RAM.")
print("Trying to allocate space...")
image = cv2.imread(self.images[0])
image_stack = np.empty((len(self.images), *image.shape), dtype=float)
print("Allocated array of size", image_stack.shape, format(image_stack.nbytes / 1024 / 1024 / 1024, ".4f"), "GB")
image_stack[0] = image
try:
for idx, file in enumerate(tqdm(self.images[1:]), 1):
image_stack[idx] = cv2.imread(self.images[idx])
except BaseException as e:
print("WARNING: Images not cached")
raise
self.cached_images = image_stack
# Would be great to differentiate between at compile time but in the current setup not possible
# TODO: make aa new class or overwrite __getitem__ on the class (does not work on instances)
def __getitem__(self, index):
return self._getitem_stacked(index) if self.stack2_images else self._getitem_normal(index)
def _getitem_stacked(self, index):
"""
Stack two images vertically together by chosing a second one randomly.
Advantage no blank color padding needed, more variation and more bounding boxes per image.
"""
if self.images_cached:
image = self.cached_images[index].copy()
else:
image = cv2.imread(self.images[index])
if self.bboxes_cached:
labels = self.cached_labels[index].copy()
bbox = self.cached_bboxes[index].copy()
else:
bboxf = self.bboxes[index] # No disc read
labels, bbox = self.load_bbox(bboxf)
if self.stage != "train":
# Return without stacking two images, transform should be resizing&ToTensor
transformed = self.target_transform(image=np.asarray(image, np.float32) / 255, # bring into 0...1 range
#mask=mask,
bboxes=bbox,
class_labels=labels)
return self._format_transformed(transformed, index)
# Load 2nd image to patch together
index2 = np.random.choice(len(self.images))
if self.images_cached:
image2 = self.cached_images[index2].copy()
else:
image2 = cv2.imread(self.images[index2])
if self.bboxes_cached:
bbox2 = self.cached_bboxes[index2].copy()
labels2 = self.cached_labels[index2].copy()
else:
bbox2f = self.bboxes[index2]
labels2, bbox2 = self.load_bbox(bbox2f)
if BBOX_PARAMS.format == "pascal_voc":
bbox2[:, [1,3]] += image.shape[0] # Adjust Y values of 1st image
else:
raise NotImplementedError("Non pascal_voc format")
full_img = np.vstack((image, image2)).astype(np.float32) / 255 #NOTE: numpy 1.24 allows dtype for vstack
del image, image2
bboxes = np.vstack([bbox, bbox2])
#Downscale
down_transforms = self.downscale_transform(image=full_img,
bboxes=np.vstack([bbox, bbox2]),
class_labels=np.hstack([labels, labels2]))
del full_img
# TODO join transforms else overhead in bbox format conversions
transformed = self.target_transform(**down_transforms)
return self._format_transformed(transformed, index)
def _getitem_normal(self, index):
"""
Bounding box logic: Left, Top, Right, Bottom, Label ID, Stem X, Stem Y
"""
image = cv2.imread(self.images[index])
#image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Theoretically RGB not necessary, some overhead
#SEGMENTATION:
#mask = cv2.imread(self.masks[index], cv2.IMREAD_UNCHANGED)
bboxf = self.bboxes[index] # No disc read
labels, bbox = self.load_bbox(bboxf)
if self.target_transform is not None:
try:
transformed = self.target_transform(image=np.asarray(image, np.float32) / 255, # bring into 0...1 range
#mask=mask,
bboxes=bbox,
class_labels=labels,)
except:
print(bboxf, bbox)
raise
image = transformed["image"][2,1,0] # now in tensor format CxHxW, swap to RGB
#mask = transformed["mask"]
bbox = transformed["bboxes"]
labels = transformed["class_labels"]
#if mask.dtype != torch.int64:
# mask = mask.to(torch.int64)
return self._format_transformed(transformed, index)
return self._format_transformed({'image' : image, 'bboxes' : bboxes, 'class_labels': labels}, index)
def _format_transformed(self, transformed, index):
image = transformed["image"][[2,1,0]] # Swap channel order cv2.COLOR_BGR2RGB
#mask = transformed["mask"]
bbox = transformed["bboxes"]
labels = transformed["class_labels"]
if len(bbox) > 0:
# bboxes to yolo format
if BBOX_PARAMS.format == "pascal_voc":
boxes = torchvision.ops.box_convert(
torch.as_tensor(bbox, dtype=torch.float32), "xyxy", "cxcywh" # if not self.half_precision else torch.float16
)
boxes[:, [1, 3]] /= self.image_size[0] # normalized height 0-1
boxes[:, [0, 2]] /= self.image_size[1] # normalized width 0-1
else:
boxes = bbox
classes = np.expand_dims(labels, 1)
labels_out = torch.hstack(
(torch.zeros((len(boxes), 1)),
torch.as_tensor(classes, dtype=torch.float32), # if not self.half_precision else torch.float16
boxes)
)
else:
labels_out = torch.zeros((0, 6))
#if self.half_precision:
# image = image.to(torch.float16)
# labels_out = labels_out.to(torch.float16)
return (image,
labels_out,
torch.as_tensor(index),
torch.as_tensor(self.image_size)) # height x width (array format)
@staticmethod
def bbox_collate(batch):
# torch expects that the contents of the batch have the same length
# this behaviour is overwritten here.
# As there can be multiple bounding boxes, this has to be handled here
# additionally each bounding box (in labels) will get an image_id assigned
# todo: add (or reason why not to add them, parallelization issues?) indices instead
images, labels, indices, image_sizes = zip(*batch)
for i, l in enumerate(labels):
l[:, 0] = i # add target image index for build_targets() in loss fn
return (
torch.stack(images, 0),
torch.cat(labels, 0),
torch.stack(indices, 0),
torch.stack(image_sizes, 0),
)
class CropAndWeedDataModule(pl.LightningDataModule):
"""
From original documentation:
"A DataModule standardizes the training, val, test splits, data preparation and transforms. The main
advantage is consistent data splits, data preparation and transforms across models."
"""
def __init__(self, dataset, data_dir, *, seed=BACKUP_SEED, eval_batch_scale=2,
batch_size,
num_workers,
image_size,
train_transform,
test_transform,
stack2_images,
train_val_test_ratio=[0.7, 0.15, 0.15],
half_precision=False,
cache_images=False,
cache_bboxes=False):
super().__init__()
self.dataset = dataset
self.indices = None
self.train_transform = train_transform
self.test_transform = test_transform
self.stack2_images = stack2_images
self.image_size = image_size
self.half_precision = half_precision
self.cache_images = cache_images
self.cache_bboxes = cache_bboxes
self.batch_size = batch_size
self.eval_batch_scale = eval_batch_scale
self.num_workers = num_workers
# In case bbox and images are located at different locations
if isinstance(data_dir, dict):
self.image_root = data_dir["images"]
self.bbox_root = data_dir["bboxes"]
else:
self.image_root = data_dir
self.bbox_root = data_dir
self.data_dir = data_dir
self.train_val_test_ratio=train_val_test_ratio
self.seed =seed or BACKUP_SEED
def prepare_data(self, repository_path=CNW_PATH, *, with_mapping=True, check_exists=True):
"""NOTE: Dowloads ~10GB and mapping annotations will also take a while."""
# Todo currently no customization.
if check_exists:
# Custom paths do exist (e.g a SSD and HDD) check them
if isinstance(self.data_dir, dict):
# Same path we can check:
if os.path.exists(self.data_dir["bboxes"]) and os.path.exists(self.data_dir["images"]):
print("Data paths already exists. Skipping download and mask generation. No integrity check was performed.")
return
# One path does not exist
# Check if it the same path (we can handle) else raise error
if self.data_dir["bboxes"] != self.data_dir["images"]:
raise FileNotFoundError("Image and BBox path differ but one does not exist. Please specify one path only to download files or check the passed paths", self.data_dir["bboxes"], self.data_dir["images"])
# Both paths are the same but does not exist -> download files
# Only one path passed as path like object
elif os.path.exists(self.data_dir):
print("Data path already exists. Skipping download and mask generation. No integrity check was performed.")
return
print("Cloning github repository for data acquisition")
os.system(" ".join(["git clone", "https://github.com/cropandweed/cropandweed-dataset.git", repository_path]))
print("Downloading data. This can take a while.")
from cnw import setup
if isinstance(self.data_dir, str):
setup.setup(self.data_dir, with_mapping)
else: #dict of paths
print("Print ignoring data setup for now")
def setup(self, stage : str="fit"):
# Gather files and do train test val split
if self.indices is None:
self.all_bboxes = CropDataset.get_files(self, path=self.bbox_root) # bbox/dataset will be appended
image_dir = os.path.join(self.image_root, 'images')
self.all_images = np.asarray([os.path.join(image_dir, os.path.basename(file)[:-4]+".jpg") for file in self.all_bboxes],
dtype=str)
# uncommented for performance
#verify that to each mask an image exist
#for i in self.all_images:
# if not os.path.exists(i):
# raise FileNotFoundError(i)
# Perform train_val_test split and get indices
self.indices = utils.train_val_test_split(self.all_images, self.seed or BACKUP_SEED, self.train_val_test_ratio)
#bbox_dir = os.path.join(self.bbox_root, 'bboxes', self.dataset)
#bbox_dir_eval = os.path.join(self.bbox_root, 'bboxes', self.dataset+"Eval")
if (stage == "fit" or stage is None):
if not hasattr(self, "data_train"):
self.data_train = CropDataset(self.data_dir, dataset=self.dataset, stage="train",
stack2_images = self.stack2_images,
image_size=self.image_size,
train_val_test_ratio=self.train_val_test_ratio, seed=self.seed,
target_transform=self.train_transform,
half_precision=self.half_precision,
cache_bboxes=self.cache_bboxes,
cache_images=self.cache_images,
_init_files=False)
# Assure that below methods produce the same results
# For assertion set _init_files=True
#assert (self.data_train.images == self.all_images[self.indices['train']]).all()
#assert (self.data_train.masks == self.all_masks[self.indices['train']]).all()
#assert self.data_train.bboxes == [os.path.join(bbox_dir, os.path.basename(file)[:-4]+".csv") for file in self.data_train.masks ]
#assert False, "all good"
self.data_train.images = self.all_images[self.indices['train']]
# Setup for doing segmentation
#self.data_train.masks = self.all_masks[self.indices['train']]
#self.data_train.bboxes = [os.path.join(bbox_dir, os.path.basename(file)[:-4]+".csv") for file in self.data_train.masks ]
self.data_train.bboxes = self.all_bboxes[self.indices['train']]
self.data_train.indices = self.indices
self.data_train._init_caches()
# No augmentation here
if (stage == "fit" or stage=="validate" or stage == "val" or stage == None):
if not hasattr(self, "data_val"):
self.data_val = CropDataset(self.data_dir, dataset=self.dataset, stage="val",
stack2_images=False,
image_size=self.image_size,
train_val_test_ratio=self.train_val_test_ratio, seed=self.seed,
target_transform=self.test_transform,
half_precision=self.half_precision,
cache_bboxes=self.cache_bboxes,
cache_images=False,
_init_files=False)
# For assertion set _init_files=True
#assert self.data_val.images == self.all_images[self.indices['val']]
#assert self.data_val.masks == self.all_masks[self.indices['val']]
#assert self.data_val.bboxes == [os.path.join(bbox_dir, os.path.basename(file)[:-4]+".csv") for file in self.data_val.masks ]
self.data_val.images = self.all_images[self.indices['val']]
#self.data_val.masks = self.all_masks[self.indices['val']]
#self.data_val.bboxes = [os.path.join(bbox_dir, os.path.basename(file)[:-4]+".csv") for file in self.data_val.masks ]
self.data_val.bboxes = self.all_bboxes[self.indices['val']]
self.data_val.indices = self.indices
self.data_val._init_caches()
elif stage== "test":
self.data_test = CropDataset(self.data_dir, dataset=self.dataset, stage="test",
stack2_images=False,
image_size=self.image_size,
train_val_test_ratio=self.train_val_test_ratio, seed=self.seed,
target_transform=self.test_transform,
half_precision=self.half_precision,
cache_bboxes=False,
cache_images=False,
_init_files=False)
# For assertion set _init_files=True
#assert self.data_test.images == self.all_images[self.indices['test']]
#assert self.data_test.masks == self.all_masks[self.indices['test']]
#assert self.data_test.bboxes == [os.path.join(bbox_dir, os.path.basename(file)[:-4]+".csv") for file in self.data_test.masks ]
self.data_test.images = self.all_images[self.indices['test']]
#self.data_test.masks = self.all_masks[self.indices['test']]
#self.data_test.bboxes = [os.path.join(bbox_dir, os.path.basename(file)[:-4]+".csv") for file in self.data_test.masks ]
self.data_test.bboxes = self.all_bboxes[self.indices['test']]
self.data_test.indices = self.indices
elif stage == "predict":
raise NotImplementedError("Setting stage to predict.")
else:
raise ValueError(f"Invalid stage '{stage}'", )
#@wraps(pl.core.hooks.DataHooks.train_dataloader)
def train_dataloader(self):
return DataLoader(self.data_train,
batch_size=self.batch_size,
num_workers=self.num_workers,
pin_memory=True,
collate_fn=CropDataset.bbox_collate,
shuffle=True)
#@wraps(pl.core.hooks.DataHooks.val_dataloader)
def val_dataloader(self):
return DataLoader(self.data_val,
batch_size=int(self.batch_size * self.eval_batch_scale),
num_workers=self.num_workers,
pin_memory=True,
collate_fn=CropDataset.bbox_collate,
shuffle=False)
#@wraps(pl.core.hooks.DataHooks.test_dataloader)
def test_dataloader(self):
return DataLoader(self.data_test,
batch_size=int(self.batch_size * self.eval_batch_scale),
num_workers=self.num_workers,
collate_fn=CropDataset.bbox_collate,
shuffle=False)
def check_dataloaders(self):
# Setup the variables
if not hasattr(self, "data_train") or self.data_train is None:
self.setup()
if not hasattr(self, "data_test") or self.data_test is None:
self.setup("test")
try:
# Change variables for testing
real_batchsize = self.batch_size
real_numworkers = self.num_workers
self.num_workers = 0
self.batch_size = 5
return self.train_dataloader(), self.val_dataloader(), self.test_dataloader()
finally:
# reset variables
self.batch_size = real_batchsize
self.num_workers = real_numworkers
#def predict_dataloader(self): # This will raise an error from super
# raise NotImplementedError("predict_dataloader")
class YOLO_PL(pl.LightningModule):
#criterion = DetectionTrainer.criterion
#postprocess = Yolov7Model.postprocessing
def __init__(self, settings):
super().__init__()
# v8
#self.yolo_model = YOLO(yolo_model, task="detect")
#self.yolo_model.overrides = {} # default
#self.yolo_model([*np.zeros((2, 1088, 1920))]) # init some stuff
#self.model = self.yolo_model.model
assert 0 <= settings["dataset"]["use_extra_class"] <= 1, "No or only one extra class can be used."
assert settings["dataset"]["image_size"][0] == settings["dataset"]["image_size"][1], "Image size must be a square."
self.settings=settings
#self.example_input_array = torch.zeros((1, 3, *settings["dataset"]["image_size"]))
#if settings["trainer"]["precision"] == 16 or settings["trainer"]["precision"] == "16-mixed":
# self.example_input_array = self.example_input_array.to(torch.float16)
try:
self.model = create_yolov7_model(settings["model"]["name"],
num_classes=( len( DATASETS[settings["dataset"]["name"]].get_label_ids() )
+ settings["dataset"]["use_extra_class"]),
pretrained=settings["model"]["pretrained"])
except AttributeError:
# NOTE: There is sometimes an Attribute error that can be solved in repeating the process:
self.model = create_yolov7_model(settings["model"]["name"],
num_classes=( len( DATASETS[settings["dataset"]["name"]].get_label_ids() )
+ settings["dataset"]["use_extra_class"]),
pretrained=settings["model"]["pretrained"])
if settings["model"].get("load_anchors", False):
anchorparam = settings["model"]["load_anchors"]
if anchorparam == True: # auto
try:
anchors = torch.load("./anchors/new_anchors_"+settings["model"]["name"]+".pt").detach()
anchors.requires_grad = False
self.update_anchors(anchors)
except Exception as e:
print(e, "\n^^^^^^^^^^^^\n", "Cannot update anchors")
elif isinstance(anchorparam, str):
self.update_anchors(torch.load(anchorparam).detach()) # path
else:
self.update_anchors(anchorparam.detach()) # pass manually
print("Updated anchor settings")
size = max(settings["dataset"]["image_size"])
self.loss_func = create_yolov7_loss(self.model,
image_size=size,
**self.settings["loss"])
self.image_size=(size, size)
# Maybe adjust this because predifined turns into xyxy
self.mAP = MeanAveragePrecision(box_format="cxcywh", iou_type="bbox", class_metrics=False,
iou_thresholds=[settings["mAP"]["iou_threshold"]],
rec_thresholds=settings["mAP"].get("rec_thresholds",None)) # Single IoU threshold)
self.test_mAP = MeanAveragePrecision(box_format="cxcywh",
iou_type="bbox",
class_metrics=True)
self.save_hyperparameters()
def forward(self, x):
#print(x.dtype)
#print(self.dtype)
return self.model(x)
@wraps(Yolov7Model.postprocess)
def postprocess(self, *args, **kwargs):
return self.model.postprocess(args, kwargs)
def on_fit_start(self):
self.loss_func.to(self.device)
print("Fitting", self.settings["model"]["name"], "on", self.settings["dataset"]["name"], "with", self.settings["optimizer"]["name"],end="\n\n")
def on_eval_start(self):
self.loss_func.to(self.device)
def on_test_start(self):
self.loss_func.to(self.device)
def on_train_epoch_start(self):
self.loss_func.train()
# For debugging
def on_train_epoch_end(self):
if self.trainer.current_epoch == 0:
# log memory usage
# for example for node training where this can not be called directly
print(os.system("nvidia-smi"))
def on_validation_epoch_start(self):
self.loss_func.eval()
def training_step(self, batch, _):
images, labels, image_id, image_size = batch
preds = self.model(images)
loss, _ = self.loss_func(
fpn_heads_outputs=preds, targets=labels, images=images
)
self.log("train_loss", loss, batch_size=images.shape[0], prog_bar=True)
return loss
def update_map(self, labels, preds, map):
filtered_preds : list = self.model.postprocess(preds,
conf_thres=self.settings["mAP"]["min_confidence"], # TODO conf thresholds should be same?
max_detections= self.settings["mAP"]["max_detections"],
multiple_labels_per_box=False)
target_lists = [{"boxes" : labels[(idx := labels[:,0]==i), 2:],
"labels" : labels[idx, 1]} for i in range(len(filtered_preds))]
pred_lists = [None] * len(filtered_preds)
for i, pred in enumerate(filtered_preds): # len of batch_size
pred = pred[pred[:, 4] > self.settings["mAP"]["nms_threshold"]].detach()
nms_idx = torchvision.ops.batched_nms(
boxes =pred[:, :4],
scores=pred[:, 4],
idxs =pred[:, 5],
iou_threshold=self.settings["mAP"]["iou_threshold"]#self.settings["mAP"]["iou_threshold"],
)
nms_preds = pred[nms_idx]
boxes = torchvision.ops.box_convert(
nms_preds[:, :4], "xyxy", "cxcywh" # ughhh, is transformed in preprocess, maybe convert these others
)
boxes[:, [1, 3]] /= self.image_size[0] # normalized height 0-1
boxes[:, [0, 2]] /= self.image_size[1] # normalized width 0-1
pred_lists[i]={"boxes" : boxes,
"scores" : nms_preds[:, 4],
"labels" : nms_preds[:, 5]}
#pred_list=[{"boxes" : boxes,
# "scores" : nms_preds[:, 4],
# "labels" : nms_preds[:, 5]}]
#map.update(pred_list, target_lists[i:i+1])
map.update(pred_lists, target_lists) # do not use forward else will compute & return
#TODO: Plot bounding boxes with NMS supression.
# Has the form
"""
{'map': tensor(0.6000),
'map_50': tensor(1.),
'map_75': tensor(1.),
'map_large': tensor(0.6000),
'map_medium': tensor(-1.),
'map_per_class': tensor(-1.),
'map_small': tensor(-1.),
'mar_1': tensor(0.6000),
'mar_10': tensor(0.6000),
'mar_100': tensor(0.6000),
'mar_100_per_class': tensor(-1.),
'mar_large': tensor(0.6000),
'mar_medium': tensor(-1.),
'mar_small': tensor(-1.)}
"""
def _log_images(self, images, labels, image_id, predictions):
# plot 10 bounding boxes with highest confidence
# TODO. this relies on lost code and has not yet been restored
figures = []
IDX=slice(4)
pred = self.model.postprocess(predictions, conf_thres=0.00,
max_detections=30,
multiple_labels_per_box=False
)
#TODO: Plot bounding boxes with NMS supression.
# get the 10 bounding boxes with highest confidence
#bboxes =pred[:, :4]
#scores=pred[:, 4]
#idxs =pred[:, 5]
# annotate with label and confidence
#print("Images shape", images.shape)
#imgs = np.ascontiguousarray(.permute(0, 2,3,1).cpu()) # old method
bbox_files = [os.path.split(f)[1] for f in self.trainer.val_dataloaders.dataset.bboxes[image_id[IDX].cpu()]] # filenames to to true bounding boxes
plots = utils.visualize_bboxes(DATASETS[self.settings["dataset"]["name"]],
images=images[IDX].cpu(),
bbox_files=bbox_files,
predictions=[ p.cpu() for p in pred[IDX]],
plot_true_bboxes= self.current_epoch==0,
data_path=DATA_PATH, # TODO this is globall!
dataset=self.settings["dataset"]["name"],
output_dir=None,
font_scale=0.55,
save_to_disk=False,
crop_borders=240 if self.settings["dataset"]["image_size"][0] > 1200 else 100, # certically to read original format
target_width=640,
pbar=False)
stack = torch.tensor(np.array(plots)) # turn into array first for speed up
# Turn Batch x HxWxChannels into -> B x C x H x W
grid = torchvision.utils.make_grid(stack.permute(0,3,1,2), nrow=2, )
# Turn into HxWxC
self.logger.experiment.add_image('val_bboxes', grid.permute(1,2,0), self.current_epoch,
dataformats='HWC')
def validation_step(self, batch, batch_idx):
images, labels, image_id, image_size = batch
preds = self.model(images)
loss, _ = self.loss_func(
fpn_heads_outputs=preds, targets=labels, images=images
)
self.log("val_loss", loss, prog_bar=True, batch_size=len(images), sync_dist=True)
try:
if batch_idx == 0:
self._log_images(images, labels, image_id, preds)
except Exception as e:
print(e,"\n^^^^^^^^^^^^^^^^^^^^^^^^^^^\n Could not log images")
raise
self.update_map(labels, preds, self.mAP)
return loss
def test_step(self, batch, batch_idx):
images, labels, image_id, image_size = batch
preds = self.model(images)
loss, _ = self.loss_func(
fpn_heads_outputs=preds, targets=labels, images=images
)
self.log("test_loss", loss, prog_bar=True, batch_size=len(images), sync_dist=True)
self.update_map(labels, preds, self.test_mAP)
# See: https://torchmetrics.readthedocs.io/en/latest/pages/lightning.html
def on_validation_epoch_end(self):
#print("computing map-val", self.current_epoch)
results = self.mAP.compute()
#print("logging-val")
if results["map"] != -1:
self.log("hp_metric", results["map"], sync_dist=True)
self.log("map", results.pop("map"), prog_bar=True, sync_dist=True)
self.log_dict({k:v for k,v in results.items() if v != -1}, sync_dist=True)
self.mAP.reset()
def on_test_epoch_end(self):
results = self.test_mAP.compute()
map_per_class = results.pop('map_per_class')
mar_per_class = results.pop('mar_100_per_class')
if self.test_mAP.class_metrics:
# Not one element tensors
names = list(map(DATASETS[self.settings['dataset']['name']].get_label_name, self.test_mAP._get_classes()))
for i, name in enumerate(names):
if map_per_class[i] != -1:
self.log(f"test_map_{name}", map_per_class[i], sync_dist=True)
if mar_per_class[i] != -1:
self.log(f"test_mar100_{name}", mar_per_class[i], sync_dist=True)
print(f"test_map_{name}", map_per_class[i], f"\ntest_mar100_{name}", mar_per_class[i])
else:
print("not logging class metrics")
self.log_dict({"test_"+key:val for key,val in results.items() if val != -1}, sync_dist=True)
self.test_mAP.reset()
#DEBUG
#def on_before_batch_transfer(self, batch, dataloader_idx):
# print(batch.shape, batch.dtype)
# print("\n\ntranserfering")
# print(batch)
# #print(images.dtype, labels.dtype)
# return batch
def update_anchors(self, anchors):
try:
update_func = self.model.update_anchors # got renamed
except AttributeError:
update_func = self.model.update_model_anchors
update_func(anchors)
def configure_optimizers(self):
steps_per_epoch = len(self.trainer.datamodule.data_train.images) // self.settings["dataset"]["batch_size"] + 1
if self.settings["optimizer"]["weight_decay"] == "auto":
self.settings["dataset"]["accumulate_batch_size"]
num_accumulate_steps = max(round(self.settings["dataset"]["accumulate_batch_size"]
/ self.settings["dataset"]["batch_size"])
, 1)
base_weight_decay = 0.0005
weight_decay = (
base_weight_decay
* self.settings["dataset"]["batch_size"]
* num_accumulate_steps
/ self.settings["dataset"]["accumulate_batch_size"]
)
self.settings["optimizer"]["weight_decay"] = weight_decay
print("Auto: weight decay set to", weight_decay)
print("model type", self.dtype)
param_groups=self.model.get_parameter_groups()
if self.settings['optimizer']['name'] == "SGD":
optimizer = torch.optim.SGD(
param_groups["other_params"],
lr=self.settings["optimizer"]["learning_rate"],
momentum=0.937, nesterov=True,
)
optimizer.add_param_group(
{"params": param_groups["conv_weights"],
"weight_decay": self.settings["optimizer"]["weight_decay"]}
)
elif self.settings['optimizer']['name'] == "DeepSpeedCPUAdam":
optimizer = DeepSpeedCPUAdam(param_groups["other_params"],
lr=self.settings["optimizer"]["learning_rate"],
betas=(0.937, 0.999),
weight_decay=self.settings["optimizer"]["weight_decay"])
optimizer.add_param_group(
{"params": param_groups["conv_weights"],
"weight_decay": self.settings["optimizer"]["weight_decay"]}
)
elif self.settings['optimizer']['name'] == "FusedAdam":
optimizer = FusedAdam(param_groups["other_params"],
lr=self.settings["optimizer"]["learning_rate"],
betas=(0.937, 0.999),
adam_w_mode=True)
optimizer.add_param_group(
{"params": param_groups["conv_weights"],
"weight_decay": self.settings["optimizer"]["weight_decay"]}
)
#"""
#optimizer = torch.optim.RAdam(self.parameters(),
# lr=(self.settings["optimizer"]["learning_rate"]
# * ((1/self.settings["lr_scheduler"]["warmup_multiplier"]
# )** self.settings["lr_scheduler"]["warmup_epochs"])), # scale down for warmup
# eps=1e-08,
# betas= (0.937, 0.999),
# weight_decay=self.settings["optimizer"]["weight_decay"])
#"""
#lr_scheduler = CosineLrScheduler(optimizer,
# total_num_epochs=trainer.max_epochs,
# num_update_steps_per_epoch = num_batches,
# num_warmup_epochs=3,
# num_cooldown_epochs=3,
# k_decay=2,
#)
if self.settings["optimizer"].get("lr_scheduler", "").lower() == "onecyclelr":
print("Using OneCycleLR scheduler")
lr_scheduler = OneCycleLR(optimizer,
max_lr=self.settings["optimizer"]["learning_rate"],
# NOTE: TODO: self.trainer.estimated_stepping_batches is better here but not compatible with the current caching setup
# Probably a bug in lightning sets up the dataset twice :/
#total_steps=self.trainer.estimated_stepping_batches,
steps_per_epoch=steps_per_epoch,
epochs=self.settings["trainer"]["max_epochs"],
base_momentum = self.settings["lr_onecycle"]["base_momentum"],
max_momentum = self.settings["lr_onecycle"]["max_momentum"],
pct_start= self.settings["lr_onecycle"]["pct_start"],
div_factor=self.settings["lr_onecycle"]["div_factor"],
final_div_factor=self.settings["lr_onecycle"]["final_div_factor"],
)
return [optimizer], [{"scheduler": lr_scheduler, "interval": "step"}]
print("Using Multistep+CosineAnnealingLR scheduler")
scheduler1 = MultiStepLR(optimizer,
milestones=list(range(1, self.settings["lr_scheduler"]["warmup_epochs"]+1)),