Merge branch 'squeezenet-ssd-lite'

qfgaohao · Nov 1, 2018 · dca078f · dca078f
2 parents 57de5e4 + 3bb13d0
commit dca078f
Show file tree

Hide file tree

Showing 11 changed files with 384 additions and 14 deletions.
diff --git a/convert_to_caffe2_models.py b/convert_to_caffe2_models.py
@@ -1,7 +1,7 @@
 from vision.ssd.vgg_ssd import create_vgg_ssd
 from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd
-from vision.ssd.mobilenetv1_ssd_lite import create_mobilenetv1_ssd_lite, create_mobilenetv1_ssd_lite_predictor
-
+from vision.ssd.mobilenetv1_ssd_lite import create_mobilenetv1_ssd_lite
+from vision.ssd.squeezenet_ssd_lite import create_squeezenet_ssd_lite
 
 import sys
 import torch.onnx
@@ -26,6 +26,8 @@
     net = create_mobilenetv1_ssd(len(class_names), is_test=True)
 elif net_type == 'mb1-ssd-lite':
     net = create_mobilenetv1_ssd_lite(len(class_names), is_test=True)
+elif net_type == 'sq-ssd-lite':
+    net = create_squeezenet_ssd_lite(len(class_names), is_test=True)
 else:
     print("The net type is wrong. It should be one of vgg16-ssd, mb1-ssd and mb1-ssd-lite.")
     sys.exit(1)

diff --git a/draw_eval_results.py b/draw_eval_results.py
@@ -0,0 +1,35 @@
+import sys
+import cv2
+import pandas as pd
+import os
+
+eval_result_file = sys.argv[1]
+image_dir = sys.argv[2]
+output_dir = sys.argv[3]
+threshold = float(sys.argv[4])
+
+if not os.path.exists(output_dir):
+    os.mkdir(output_dir)
+
+r = pd.read_csv(eval_result_file, delimiter=" ", names=["ImageID", "Prob", "x1", "y1", "x2", "y2"])
+r['x1'] = r['x1'].astype(int)
+r['y1'] = r['y1'].astype(int)
+r['x2'] = r['x2'].astype(int)
+r['y2'] = r['y2'].astype(int)
+
+
+for image_id, g in r.groupby('ImageID'):
+    image = cv2.imread(os.path.join(image_dir, image_id + ".jpg"))
+    for row in g.itertuples():
+        if row.Prob < threshold:
+            continue
+        cv2.rectangle(image, (row.x1, row.y1), (row.x2, row.y2), (255, 255, 0), 4)
+        label = f"{row.Prob:.2f}"
+        cv2.putText(image, label,
+                    (row.x1 + 20, row.y1 + 40),
+                    cv2.FONT_HERSHEY_SIMPLEX,
+                    1,  # font scale
+                    (255, 0, 255),
+                    2)  # line type
+    cv2.imwrite(os.path.join(output_dir, image_id + ".jpg"), image)
+print(f"Task Done. Processed {r.shape[0]} bounding boxes.")
diff --git a/eval_ssd.py b/eval_ssd.py
@@ -2,7 +2,9 @@
 from vision.ssd.vgg_ssd import create_vgg_ssd, create_vgg_ssd_predictor
 from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd, create_mobilenetv1_ssd_predictor
 from vision.ssd.mobilenetv1_ssd_lite import create_mobilenetv1_ssd_lite, create_mobilenetv1_ssd_lite_predictor
+from vision.ssd.squeezenet_ssd_lite import create_squeezenet_ssd_lite, create_squeezenet_ssd_lite_predictor
 from vision.datasets.voc_dataset import VOCDataset
+from vision.datasets.open_images import OpenImagesDataset
 from vision.utils import box_utils, measurements
 from vision.utils.misc import str2bool, Timer
 import argparse
@@ -16,7 +18,10 @@
 parser.add_argument('--net', default="vgg16-ssd",
                     help="The network architecture, it should be of mb1-ssd, mb1-ssd-lite or vgg16-ssd.")
 parser.add_argument("--trained_model", type=str)
-parser.add_argument("--dataset", type=str, help="The root directory of the VOC dataset.")
+
+parser.add_argument("--dataset_type", default="voc", type=str,
+                    help='Specify dataset type. Currently support voc and open_images.')
+parser.add_argument("--dataset", type=str, help="The root directory of the VOC dataset or Open Images dataset.")
 parser.add_argument("--label_file", type=str, help="The label file path.")
 parser.add_argument("--use_cuda", type=str2bool, default=True)
 parser.add_argument("--use_2007_metric", type=str2bool, default=True)
@@ -118,14 +123,20 @@ def compute_average_precision_per_class(num_true_cases, gt_boxes, difficult_case
     timer = Timer()
     class_names = [name.strip() for name in open(args.label_file).readlines()]
 
-    dataset = VOCDataset(args.dataset, is_test=True)
+    if args.dataset_type == "voc":
+        dataset = VOCDataset(args.dataset, is_test=True)
+    elif args.dataset_type == 'open_images':
+        dataset = OpenImagesDataset(args.dataset, dataset_type="test")
+
     true_case_stat, all_gb_boxes, all_difficult_cases = group_annotation_by_class(dataset)
     if args.net == 'vgg16-ssd':
         net = create_vgg_ssd(len(class_names), is_test=True)
     elif args.net == 'mb1-ssd':
         net = create_mobilenetv1_ssd(len(class_names), is_test=True)
     elif args.net == 'mb1-ssd-lite':
         net = create_mobilenetv1_ssd_lite(len(class_names), is_test=True)
+    elif args.net == 'sq-ssd-lite':
+        net = create_squeezenet_ssd_lite(len(class_names), is_test=True)
     else:
         logging.fatal("The net type is wrong. It should be one of vgg16-ssd, mb1-ssd and mb1-ssd-lite.")
         parser.print_help(sys.stderr)
@@ -141,6 +152,8 @@ def compute_average_precision_per_class(num_true_cases, gt_boxes, difficult_case
         predictor = create_mobilenetv1_ssd_predictor(net, nms_method=args.nms_method, device=DEVICE)
     elif args.net == 'mb1-ssd-lite':
         predictor = create_mobilenetv1_ssd_lite_predictor(net, nms_method=args.nms_method, device=DEVICE)
+    elif args.net == 'sq-ssd-lite':
+        predictor = create_squeezenet_ssd_lite_predictor(net,nms_method=args.nms_method, device=DEVICE)
     else:
         logging.fatal("The net type is wrong. It should be one of vgg16-ssd, mb1-ssd and mb1-ssd-lite.")
         parser.print_help(sys.stderr)

diff --git a/models/EMPTY b/models/EMPTY
diff --git a/open_images_downloader.py b/open_images_downloader.py
@@ -78,6 +78,10 @@ def parse_args():
                         help="the classes you want to download.")
     parser.add_argument("--retry", type=int, default=10,
                         help="retry times when downloading.")
+    parser.add_argument("--filter_file", type=str, default="",
+                        help="This file specifies the image ids you want to exclude.")
+    parser.add_argument('--remove_overlapped', action='store_true',
+                        help="Remove single boxes covered by group boxes.")
     return parser.parse_args()
 
 
@@ -87,11 +91,33 @@ def parse_args():
 
     args = parse_args()
     bucket = "open-images-dataset"
-    class_names = [e.strip() for e in args.class_names.split(",")]
+    names = [e.strip() for e in args.class_names.split(",")]
+    class_names = []
+    group_filters = []
+    percentages = []
+    for name in names:
+        t = name.split(":")
+        class_names.append(t[0].strip())
+        if len(t) >= 2 and t[1].strip():
+            group_filters.append(t[1].strip())
+        else:
+            group_filters.append("")
+        if len(t) >= 3 and t[2].strip():
+            percentages.append(float(t[2].strip()))
+        else:
+            percentages.append(1.0)
 
     if not os.path.exists(args.root):
         os.makedirs(args.root)
 
+    excluded_images = set()
+    if args.filter_file:
+        for line in open(args.filter_file):
+            img_id = line.strip()
+            if not img_id:
+                continue
+            excluded_images.add(img_id)
+
     class_description_file = os.path.join(args.root, "class-descriptions-boxable.csv")
     if not os.path.exists(class_description_file):
         url = "https://storage.googleapis.com/openimages/2018_04/class-descriptions-boxable.csv"
@@ -118,14 +144,42 @@ def parse_args():
                                left_on="LabelName", right_on="id",
                                how="inner")
         if not args.include_depiction:
-            annotations = annotations.loc[:, annotations['IsDepiction'] != 1]
-        logging.warning(f"{dataset_type} data size: {annotations.shape[0]}")
+            annotations = annotations.loc[annotations['IsDepiction'] != 1, :]
+
+        # TODO MAKE IT MORE EFFICIENT
+        #filter by IsGroupOf
+        filtered = []
+        for class_name, group_filter, percentage in zip(class_names, group_filters, percentages):
+            sub = annotations.loc[annotations['ClassName'] == class_name, :]
+            if group_filter == "group":
+                sub = sub.loc[sub['IsGroupOf'] == 1, :]
+            elif group_filter == '~group':
+                sub = sub.loc[sub['IsGroupOf'] == 0, :]
+            excluded_images |= set(sub['ImageID'].sample(frac=1 - percentage))
+            filtered.append(sub)
+
+        annotations = pd.concat(filtered)
+        annotations = annotations.loc[~annotations['ImageID'].isin(excluded_images), :]
+
+
+        if args.remove_overlapped:
+            images_with_group = annotations.loc[annotations['IsGroupOf'] == 1, 'ImageID']
+            annotations = annotations.loc[~(annotations['ImageID'].isin(set(images_with_group)) & (annotations['IsGroupOf'] == 0)), :]
+        annotations = annotations.sample(frac=1.0)
+
+        logging.warning(f"{dataset_type} bounding boxes size: {annotations.shape[0]}")
+        logging.warning("Approximate Image Stats: ")
+        log_counts(annotations.drop_duplicates(["ImageID", "ClassName"])["ClassName"])
+        logging.warning("Label distribution: ")
         log_counts(annotations['ClassName'])
 
+        logging.warning(f"Shuffle dataset.")
+
+
         sub_annotation_file = f"{args.root}/sub-{dataset_type}-annotations-bbox.csv"
         logging.warning(f"Save {dataset_type} data to {sub_annotation_file}.")
         annotations.to_csv(sub_annotation_file, index=False)
         image_files.extend(f"{dataset_type}/{id}.jpg" for id in set(annotations['ImageID']))
-    logging.warning(f"Start downloading {len(image_files)}images.")
+    logging.warning(f"Start downloading {len(image_files)} images.")
     batch_download(bucket, image_files, args.root, args.num_workers, args.retry)
     logging.warning("Task Done.")
diff --git a/run_ssd_live_demo.py b/run_ssd_live_demo.py
@@ -1,6 +1,7 @@
 from vision.ssd.vgg_ssd import create_vgg_ssd, create_vgg_ssd_predictor
 from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd, create_mobilenetv1_ssd_predictor
 from vision.ssd.mobilenetv1_ssd_lite import create_mobilenetv1_ssd_lite, create_mobilenetv1_ssd_lite_predictor
+from vision.ssd.squeezenet_ssd_lite import create_squeezenet_ssd_lite, create_squeezenet_ssd_lite_predictor
 
 from vision.utils.misc import Timer
 import cv2
@@ -30,6 +31,8 @@
     net = create_mobilenetv1_ssd(len(class_names), is_test=True)
 elif net_type == 'mb1-ssd-lite':
     net = create_mobilenetv1_ssd_lite(len(class_names), is_test=True)
+elif net_type == 'sq-ssd-lite':
+    net = create_squeezenet_ssd_lite(len(class_names), is_test=True)
 else:
     print("The net type is wrong. It should be one of vgg16-ssd, mb1-ssd and mb1-ssd-lite.")
     sys.exit(1)
@@ -41,6 +44,8 @@
     predictor = create_mobilenetv1_ssd_predictor(net, candidate_size=200)
 elif net_type == 'mb1-ssd-lite':
     predictor = create_mobilenetv1_ssd_lite_predictor(net, candidate_size=200)
+elif net_type == 'sq-ssd-lite':
+    predictor = create_squeezenet_ssd_lite_predictor(net, candidate_size=200)
 else:
     print("The net type is wrong. It should be one of vgg16-ssd, mb1-ssd and mb1-ssd-lite.")
     sys.exit(1)

diff --git a/train_ssd.py b/train_ssd.py
@@ -13,12 +13,13 @@
 from vision.ssd.vgg_ssd import create_vgg_ssd
 from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd
 from vision.ssd.mobilenetv1_ssd_lite import create_mobilenetv1_ssd_lite
-from vision.ssd.fpn_mobilenetv1_ssd import create_fpn_mobilenetv1_ssd
+from vision.ssd.squeezenet_ssd_lite import create_squeezenet_ssd_lite
 from vision.datasets.voc_dataset import VOCDataset
 from vision.datasets.open_images import OpenImagesDataset
 from vision.nn.multibox_loss import MultiboxLoss
 from vision.ssd.config import vgg_ssd_config
 from vision.ssd.config import mobilenetv1_ssd_config
+from vision.ssd.config import squeezenet_ssd_config
 from vision.ssd.data_preprocessing import TrainAugmentation, TestTransform
 
 parser = argparse.ArgumentParser(
@@ -92,10 +93,14 @@
                     help='Directory for saving checkpoint models')
 
 
+logging.basicConfig(stream=sys.stdout, level=logging.INFO,
+                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 args = parser.parse_args()
 DEVICE = torch.device("cuda:0" if torch.cuda.is_available() and args.use_cuda else "cpu")
+
 if args.use_cuda and torch.cuda.is_available():
     torch.backends.cudnn.benchmark = True
+    logging.info("Use Cuda.")
 
 
 def train(loader, net, criterion, optimizer, device, debug_steps=100, epoch=-1):
@@ -159,9 +164,6 @@ def test(loader, net, criterion, device):
 
 
 if __name__ == '__main__':
-    logging.basicConfig(stream=sys.stdout, level=logging.INFO,
-                        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-
     timer = Timer()
 
     logging.info(args)
@@ -174,6 +176,9 @@ def test(loader, net, criterion, device):
     elif args.net == 'mb1-ssd-lite':
         create_net = create_mobilenetv1_ssd_lite
         config = mobilenetv1_ssd_config
+    elif args.net == 'sq-ssd-lite':
+        create_net = create_squeezenet_ssd_lite
+        config = squeezenet_ssd_config
     else:
         logging.fatal("The net type is wrong.")
         parser.print_help(sys.stderr)
@@ -219,7 +224,7 @@ def test(loader, net, criterion, device):
     elif args.dataset_type == 'open_images':
         val_dataset = OpenImagesDataset(dataset_path,
                                         transform=test_transform, target_transform=target_transform,
-                                        dataset_type="validation")
+                                        dataset_type="test")
         logging.info(val_dataset)
     logging.info("validation dataset size: {}".format(len(val_dataset)))
 

diff --git a/vision/datasets/open_images.py b/vision/datasets/open_images.py
@@ -19,10 +19,11 @@ def __init__(self, root,
         self.min_image_num = -1
         if self.balance_data:
             self.data = self._balance_data()
+        self.ids = [info['image_id'] for info in self.data]
 
         self.class_stat = None
 
-    def __getitem__(self, index):
+    def _getitem(self, index):
         image_info = self.data[index]
         image = self._read_image(image_info['image_id'])
         boxes = image_info['boxes']
@@ -35,8 +36,25 @@ def __getitem__(self, index):
             image, boxes, labels = self.transform(image, boxes, labels)
         if self.target_transform:
             boxes, labels = self.target_transform(boxes, labels)
+        return image_info['image_id'], image, boxes, labels
+
+    def __getitem__(self, index):
+        _, image, boxes, labels = self._getitem(index)
         return image, boxes, labels
 
+    def get_annotation(self, index):
+        """To conform the eval_ssd implementation that is based on the VOC dataset."""
+        image_id, image, boxes, labels = self._getitem(index)
+        is_difficult = np.zeros(boxes.shape[0], dtype=np.uint8)
+        return image_id, (boxes, labels, is_difficult)
+
+    def get_image(self, index):
+        image_info = self.data[index]
+        image = self._read_image(image_info['image_id'])
+        if self.transform:
+            image, _ = self.transform(image)
+        return image
+
     def _read_data(self):
         annotation_file = f"{self.root}/sub-{self.dataset_type}-annotations-bbox.csv"
         annotations = pd.read_csv(annotation_file)