diff --git a/run_ssd_example_output.jpg b/run_ssd_example_output.jpg deleted file mode 100644 index 3075c9b9..00000000 Binary files a/run_ssd_example_output.jpg and /dev/null differ diff --git a/run_ssd_live_caffe2.py b/run_ssd_live_caffe2.py index b1fa40fa..526a2802 100644 --- a/run_ssd_live_caffe2.py +++ b/run_ssd_live_caffe2.py @@ -83,12 +83,13 @@ def predict(width, height, confidences, boxes, prob_threshold, iou_threshold=0.5 interval = timer.end() print('Inference Time: {:.2f}s.'.format(interval)) timer.start() - boxes, labels, probs = predict(orig_image.shape[1], orig_image.shape[0], confidences, boxes, 0.4) + boxes, labels, probs = predict(orig_image.shape[1], orig_image.shape[0], confidences, boxes, 0.55) interval = timer.end() print('NMS Time: {:.2f}s, Detect Objects: {:d}.'.format(interval, labels.shape[0])) for i in range(boxes.shape[0]): box = boxes[i, :] - label = class_names[labels[i]] + label = f"{class_names[labels[i]]}: {probs[i]:.2f}" + cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 4) cv2.putText(orig_image, label, diff --git a/train_ssd.py b/train_ssd.py index 6820fbff..43f6a050 100644 --- a/train_ssd.py +++ b/train_ssd.py @@ -13,6 +13,7 @@ from vision.ssd.vgg_ssd import create_vgg_ssd from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd from vision.ssd.mobilenetv1_ssd_lite import create_mobilenetv1_ssd_lite +from vision.ssd.mobilenet_v2_ssd_lite import create_mobilenetv2_ssd_lite from vision.ssd.squeezenet_ssd_lite import create_squeezenet_ssd_lite from vision.datasets.voc_dataset import VOCDataset from vision.datasets.open_images import OpenImagesDataset @@ -35,12 +36,15 @@ parser.add_argument('--net', default="vgg16-ssd", - help="The network architecture, it can be mb1-ssd, mb1-lite-ssd or vgg16-ssd.") + help="The network architecture, it can be mb1-ssd, mb1-lite-ssd, mb2-ssd-lite or vgg16-ssd.") parser.add_argument('--freeze_base_net', action='store_true', help="Freeze base net layers.") parser.add_argument('--freeze_net', action='store_true', help="Freeze all the layers except the prediction head.") +parser.add_argument('--mb2_width_mult', default=1.0, type=float, + help='Width Multiplifier for MobilenetV2') + # Params for SGD parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float, help='initial learning rate') @@ -179,11 +183,13 @@ def test(loader, net, criterion, device): elif args.net == 'sq-ssd-lite': create_net = create_squeezenet_ssd_lite config = squeezenet_ssd_config + elif args.net == 'mb2-ssd-lite': + create_net = lambda num: create_mobilenetv2_ssd_lite(num, width_mult=args.mb2_width_mult) + config = mobilenetv1_ssd_config else: logging.fatal("The net type is wrong.") parser.print_help(sys.stderr) sys.exit(1) - train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std) target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, 0.5) diff --git a/vision/ssd/ssd.py b/vision/ssd/ssd.py index d15ab75d..962b9a22 100644 --- a/vision/ssd/ssd.py +++ b/vision/ssd/ssd.py @@ -5,6 +5,8 @@ import torch.nn.functional as F from ..utils import box_utils +from collections import namedtuple +GraphPath = namedtuple("GraphPath", ['s0', 'name', 's1']) # class SSD(nn.Module): @@ -25,7 +27,8 @@ def __init__(self, num_classes: int, base_net: nn.ModuleList, source_layer_index self.config = config # register layers in source_layer_indexes by adding them to a module list - self.source_layer_add_ons = nn.ModuleList([t[1] for t in source_layer_indexes if isinstance(t, tuple)]) + self.source_layer_add_ons = nn.ModuleList([t[1] for t in source_layer_indexes + if isinstance(t, tuple) and not isinstance(t, GraphPath)]) if device: self.device = device else: @@ -40,19 +43,32 @@ def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: start_layer_index = 0 header_index = 0 for end_layer_index in self.source_layer_indexes: - - if isinstance(end_layer_index, tuple): + if isinstance(end_layer_index, GraphPath): + path = end_layer_index + end_layer_index = end_layer_index.s0 + added_layer = None + elif isinstance(end_layer_index, tuple): added_layer = end_layer_index[1] end_layer_index = end_layer_index[0] + path = None else: added_layer = None + path = None for layer in self.base_net[start_layer_index: end_layer_index]: x = layer(x) - start_layer_index = end_layer_index if added_layer: y = added_layer(x) else: y = x + if path: + sub = getattr(self.base_net[end_layer_index], path.name) + for layer in sub[:path.s1]: + x = layer(x) + y = x + for layer in sub[path.s1:]: + x = layer(x) + end_layer_index += 1 + start_layer_index = end_layer_index confidence, location = self.compute_header(header_index, y) header_index += 1 confidences.append(confidence)