ijcv extension

vLAR-group · Oct 17, 2024 · ecd477b · ecd477b
1 parent f92b47b
commit ecd477b
Show file tree

Hide file tree

Showing 10 changed files with 668 additions and 12 deletions.
diff --git a/Complexity_Factors/Complexity_Factor_Evaluator.py b/Complexity_Factors/Complexity_Factor_Evaluator.py
@@ -16,6 +16,7 @@
 from utils.hausdorff_dist import calculate_hausdorff_distance
 from utils.mask_to_coordinates import mask_to_coordinates
 from utils.merge_dict import merge_with_old_dict
+from utils.max_inscribe_convex_hull import maximal_inscribed_convex_set
 EPS = 1e-5
 '''
 This class is to compute complexity factors for a dataset
@@ -222,6 +223,69 @@ def calculate_scene_level_factors(self):
 
         return result.copy()
 
+    def calculate_bg_factors(self):
+        result = {}
+        dataset_component_perimeter_list = []
+        dataset_component_area_list = []
+        # max_component_perimeter = MAX_COMPONENT_PERIMETER_DICT[self.image_root.split('/')[-3]]
+        # max_component_area = MAX_COMPONENT_AREA_DICT[self.image_root.split('/')[-3]]
+        for index, image_fname in enumerate(tqdm(self.image_filenames, ncols=120)):
+            ## read image and mask data
+            image = cv2.imread(os.path.join(self.image_root, image_fname))
+            mask = cv2.imread(os.path.join(self.mask_root, self.mask_filenames[index]), cv2.IMREAD_GRAYSCALE)  
+            img_key = image_fname.split('.')[0]
+            bg_factors = {}
+            binary_bg_mask = np.array(mask==0).astype(np.uint8)
+            binary_fg_mask = np.array(mask!=0).astype(np.uint8)
+            if binary_bg_mask.sum() == 0:
+                continue
+
+            ## 1. Bg Color Gradient
+            grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
+            gradient_image = calculate_image_gradient(grayscale_image)
+            binary_bg_mask_without_boundary = binary_bg_mask - mask_to_boundary(binary_bg_mask)
+            bg_color_gradient = (gradient_image * binary_bg_mask_without_boundary).sum() / (binary_bg_mask_without_boundary.sum() + EPS)
+            bg_factors['BG Color Gradient'] = bg_color_gradient / 255.0
+
+            # 2. BG-FG Color Similarity with linear sum assignment
+            resized_image = cv2.resize(image, (50, 50), interpolation = cv2.INTER_AREA)
+            resized_binary_bg_mask = cv2.resize(binary_bg_mask, (50, 50), interpolation = cv2.INTER_AREA)
+            resized_binary_fg_mask = cv2.resize(binary_fg_mask, (50, 50), interpolation = cv2.INTER_AREA)
+            assert len(np.unique(resized_binary_bg_mask)) <= 2
+
+            bg_rgb_points = np.ma.array(resized_image, mask=np.repeat(1-resized_binary_bg_mask[:,:,None], 3, axis=-1)).compressed()
+            bg_rgb_points = np.resize(bg_rgb_points, (int(len(bg_rgb_points)/3), 3))
+            # bg_rgb_points = np.unique(bg_rgb_points, axis=0)
+            fg_rgb_points = np.ma.array(resized_image, mask=np.repeat(1-resized_binary_fg_mask[:,:,None], 3, axis=-1)).compressed()
+            fg_rgb_points = np.resize(fg_rgb_points, (int(len(fg_rgb_points)/3), 3))
+            # fg_rgb_points = np.unique(fg_rgb_points, axis=0)
+            if len(bg_rgb_points) == 0 or len(fg_rgb_points) == 0:
+                bg_factors['BG-FG Color Similarity (negative LSA)'] = 1
+            else:
+                fg_bg_color_distance_matrix = sklearn.metrics.pairwise.euclidean_distances(bg_rgb_points, fg_rgb_points)
+                row_ind, col_ind = linear_sum_assignment(-fg_bg_color_distance_matrix)
+                min_dist = fg_bg_color_distance_matrix[row_ind, col_ind].mean()
+
+                bg_factors['BG-FG Color Similarity (negative LSA)'] = 1 - (min_dist / (255 * math.sqrt(3)))
+
+            # ## 3. BG Shape Irregularity
+            connected_component_labels = skimage.measure.label(binary_fg_mask)
+            irregularity_score_list = []
+            for label in np.unique(connected_component_labels):
+                if label == 0:
+                    continue
+                binary_component = np.array(connected_component_labels==label).astype(np.uint8)
+                maximal_inscribed_convex = maximal_inscribed_convex_set(binary_component)
+                irregularity_score = (1 - maximal_inscribed_convex.sum() / binary_component.sum())
+                irregularity_score_list.append(irregularity_score)
+            bg_factors['BG Shape Irregularity'] = sum(irregularity_score_list) / len(irregularity_score_list)
+
+            result[img_key] = bg_factors.copy()
+
+
+
+        return result.copy()
+
 if __name__ == "__main__":
     image_root = "/home/user/DATASET/Scannet/test/image"
     mask_root = "/home/user/DATASET/Scannet/test/mask"

diff --git a/Complexity_Factors/utils/max_inscribe_convex_hull.py b/Complexity_Factors/utils/max_inscribe_convex_hull.py
@@ -0,0 +1,110 @@
+
+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+from scipy.ndimage import binary_fill_holes, distance_transform_edt
+
+def convex_hull(binary_mask):
+    """Compute the convex hull of a binary mask."""
+    points = np.argwhere(binary_mask)
+    if len(points) < 3:
+        return binary_mask.copy()  # Not enough points to form a convex hull
+    hull = cv2.convexHull(points)
+    hull_mask = np.zeros_like(binary_mask, dtype=np.uint8)
+    cv2.fillConvexPoly(hull_mask, hull, 1)
+    return hull_mask
+
+def find_deepest_concavity(distance_map):
+    """Find the deepest concavity in the distance transform."""
+    return np.unravel_index(np.argmax(distance_map), distance_map.shape)
+
+def cut_region(binary_mask, cut_point, direction):
+    """Cut the region based on the cut_point and direction, returning the smaller part."""
+    y, x = cut_point
+    height, width = binary_mask.shape
+
+    # Create a mask for the entire region
+    mask = np.zeros_like(binary_mask, dtype=np.uint8)
+
+    # Define the slope from the direction
+    dy, dx = np.sin(direction), np.cos(direction)
+
+    # Create a half-plane mask
+    for i in range(height):
+        for j in range(width):
+            # Calculate the position relative to the cut point
+            if (dy * (j - x) - dx * (i - y)) > 0:  # Above the line
+                mask[i, j] = 1
+
+    # The smaller part of the original object is where the mask overlaps with the binary mask
+    smaller_part = mask & binary_mask
+
+    return smaller_part
+
+def maximal_inscribed_convex_set(binary_mask):
+    """Compute the maximal inscribed convex set."""
+    # Fill holes in the binary mask
+    # filled_mask = binary_fill_holes(binary_mask).astype(np.uint8)
+    filled_mask = binary_mask.astype(np.uint8)
+
+    # Compute the convex hull
+    hull = convex_hull(filled_mask)
+
+    # Compute convex deficiency D
+    deficiency = hull - filled_mask
+
+    while True:
+        # Calculate the distance transform of the deficiency
+        distance_map = distance_transform_edt(deficiency)
+
+        # Find the deepest concavity
+        deepest_concavity = find_deepest_concavity(distance_map)
+        print('deepest_concavity', deepest_concavity, distance_map[deepest_concavity])
+
+        # Check if the deepest concavity is within acceptable bounds
+        if distance_map[deepest_concavity] <= 3:
+            break
+
+        # Generate cuts in 8 directions
+        cuts = [cut_region(filled_mask, deepest_concavity, n * np.pi / 4) for n in range(8)]
+
+        # Evaluate the size of the resulting sub-regions
+        sub_regions = [filled_mask & cut for cut in cuts]
+        areas = [np.sum(region) for region in sub_regions]
+
+        # Remove the smallest sub-region if its area is greater than 0
+        valid_areas = [area for area in areas if area > 0]
+        print('valid_areas', valid_areas)
+        if not valid_areas:
+            break  # Exit if no valid areas are found
+
+        min_area_index = np.argmin(valid_areas)
+        filled_mask = filled_mask & ~sub_regions[min_area_index]
+
+        # Recompute the convex hull and deficiency
+        hull = convex_hull(filled_mask)
+        deficiency = hull - filled_mask
+
+    return filled_mask
+
+# Example usage
+if __name__ == "__main__":
+    # Create a sample binary mask (connected region)
+    region = np.zeros((1000, 1000))
+    cv2.rectangle(region, (30, 30), (600, 600), 1, -1)
+    region = cv2.rectangle(region, (30, 30), (400, 400), 0, -1)  # Create an overlapping rectangle
+
+    # Compute the maximal inscribed convex set
+    convex_set = maximal_inscribed_convex_set(region)
+
+    # Visualization
+    plt.figure(figsize=(10, 5))
+    plt.subplot(1, 2, 1)
+    plt.title("Original Region")
+    plt.imshow(region, cmap='gray')
+
+    plt.subplot(1, 2, 2)
+    plt.title("Maximal Inscribed Convex Set")
+    plt.imshow(convex_set, cmap='gray')
+
+    plt.show()
diff --git a/Dataset_Generation/Ablation Dataset/bg_ablation.py b/Dataset_Generation/Ablation Dataset/bg_ablation.py
@@ -0,0 +1,157 @@
+
+import numpy as np
+import cv2
+import os
+import json
+import time
+import random
+import skimage.measure
+from tqdm import tqdm
+from sklearn.metrics.pairwise import euclidean_distances
+from skimage.morphology import convex_hull_image
+from utils.generate_convex_appearance import generate_convex_appearance_for_bg
+from utils.remove_small_object import remove_small_object 
+def create_bgC_dataset(
+        source_image_folder,
+        source_mask_folder, 
+        dest_image_folder
+    ):
+    if not os.path.exists(dest_image_folder):
+        os.makedirs(dest_image_folder)
+    for fname in tqdm(os.listdir(source_mask_folder), ncols=90, desc=dest_image_folder):
+        source_image = cv2.imread(os.path.join(source_image_folder, fname))
+        source_mask = cv2.imread(os.path.join(source_mask_folder, fname), cv2.IMREAD_GRAYSCALE)
+        out_image = np.zeros_like(source_image)
+        bg_image = source_image * np.array(source_mask==0)[:,:,None]
+        bg_pixels = np.array(source_mask==0).astype(np.uint8).sum()
+        if bg_pixels == 0:
+            cv2.imwrite(os.path.join(dest_image_folder, fname), source_image)
+            continue
+        avg_bg_image = np.ones_like(bg_image)
+        avg_bg_image[:,:,0] *= int(bg_image[:,:,0].sum() / bg_pixels)
+        avg_bg_image[:,:,1] *= int(bg_image[:,:,1].sum() / bg_pixels)
+        avg_bg_image[:,:,2] *= int(bg_image[:,:,2].sum() / bg_pixels)
+        out_image += avg_bg_image * np.array(source_mask==0).astype(np.uint8)[:,:,None] + source_image * np.array(source_mask!=0).astype(np.uint8)[:,:,None]
+        cv2.imwrite(os.path.join(dest_image_folder, fname), out_image)
+
+
+def create_bgT_dataset(
+        source_image_folder,
+        source_mask_folder, 
+        dest_image_folder
+    ):
+    if not os.path.exists(dest_image_folder):
+        os.makedirs(dest_image_folder)
+    style_image_fname_list = os.listdir('replaced_texture/processed')
+    style_image_fname_list.sort()
+    for fname in tqdm(os.listdir(source_mask_folder), ncols=90, desc=dest_image_folder):
+        source_image = cv2.imread(os.path.join(source_image_folder, fname))
+        source_mask = cv2.imread(os.path.join(source_mask_folder, fname), cv2.IMREAD_GRAYSCALE)
+        out_image = np.zeros_like(source_image)
+        fg_image = source_image * np.array(source_mask!=0)[:,:,None] ## [128, 128, 3]
+        fg_avg_color = fg_image.sum(0).sum(0) / np.array(source_mask!=0).sum()
+        largest_color_dist = 0
+        for style_image_fname in style_image_fname_list:
+            style_image = cv2.imread(os.path.join('replaced_texture/processed', style_image_fname))
+            style_image_avg_color = style_image.sum(0).sum(0) / (128 * 128)
+            dist = euclidean_distances([style_image_avg_color], [fg_avg_color])[0]
+            if dist > largest_color_dist:
+                selected_fname = style_image_fname
+                largest_color_dist = dist
+        style_image = cv2.imread(os.path.join('replaced_texture/processed', selected_fname))
+        out_image += style_image * np.array(source_mask==0)[:,:,None]
+        out_image += source_image * np.array(source_mask!=0)[:,:,None]
+        cv2.imwrite(os.path.join(dest_image_folder, fname), out_image)
+
+
+def create_bgCT_dataset(
+        source_image_folder,
+        source_mask_folder, 
+        dest_image_folder
+    ):
+    if not os.path.exists(dest_image_folder):
+        os.makedirs(dest_image_folder)
+    for fname in tqdm(os.listdir(source_mask_folder), ncols=90, desc=dest_image_folder):
+        source_image = cv2.imread(os.path.join(source_image_folder, fname))
+        source_mask = cv2.imread(os.path.join(source_mask_folder, fname), cv2.IMREAD_GRAYSCALE)
+        out_image = np.zeros_like(source_image)
+        fg_image = source_image * np.array(source_mask!=0)[:,:,None] ## [128, 128, 3]
+        fg_avg_color = fg_image.sum(0).sum(0) / np.array(source_mask!=0).sum()
+
+        corner_color = [
+            [0, 0, 0],
+            [255, 0, 0],
+            [0, 255, 0],
+            [0, 0, 255], 
+            [255, 255, 0],
+            [255, 0, 255],
+            [0, 255, 255],
+            [255, 255, 255],
+        ]
+        largest_color_dist = 0
+        for color in corner_color:
+            dist = euclidean_distances([color], [fg_avg_color])[0]
+            if dist > largest_color_dist:
+                selected_color = color
+                largest_color_dist = dist 
+        new_bg_img = np.ones_like(source_image) * np.array(selected_color)[None, None, :].astype(np.uint8)
+        out_image += new_bg_img * np.array(source_mask==0)[:,:,None]
+        out_image += source_image * np.array(source_mask!=0)[:,:,None]
+        cv2.imwrite(os.path.join(dest_image_folder, fname), out_image)
+
+def create_bgS_dataset(
+        source_image_folder,
+        source_mask_folder, 
+        dest_image_folder,
+        dest_mask_folder,
+        image_dim=128
+    ):
+    if not os.path.exists(dest_image_folder):
+        os.makedirs(dest_image_folder)
+    if not os.path.exists(dest_mask_folder):
+        os.makedirs(dest_mask_folder)
+    fname_list = os.listdir(source_image_folder)
+    fname_list.sort()
+    for fname in tqdm(fname_list, ncols=90, desc=dest_image_folder):
+        source_image = cv2.imread(os.path.join(source_image_folder, fname))
+        source_mask = cv2.imread(os.path.join(source_mask_folder, fname), cv2.IMREAD_GRAYSCALE)
+        source_fg_mask = np.array(source_mask!=0).astype(np.uint8)
+        connected_component_mask = skimage.measure.label(source_fg_mask)
+
+        # out_image = np.zeros((image_dim, image_dim, 3))
+        out_image = source_image
+        out_mask = np.zeros((image_dim, image_dim))
+        for component_idx in np.unique(connected_component_mask):
+            if component_idx == 0:
+                continue
+            source_component_mask = np.array(connected_component_mask==component_idx).astype(np.uint8)
+            source_component_image = source_component_mask[:,:,None] * source_image
+            kernel = np.ones((3, 3), dtype=np.uint8)
+            convex_component_mask = convex_hull_image(source_component_mask).astype(np.uint8)
+            timeout = 5
+            timeout_start = time.time()
+            ## we erode the convex shape to if it is too large
+            # while convex_component_mask.sum() > 128 * 128 * 0.3 and time.time() < timeout_start + timeout:
+            #     kernel = np.ones((3, 3), dtype=np.uint8)
+            #     convex_component_mask = cv2.erode(convex_component_mask, kernel, iterations=1)
+
+            convex_component_image, convex_component_mask, convex_obj_mask = generate_convex_appearance_for_bg(
+                source_component_image=source_component_image, 
+                source_component_mask=source_component_mask, 
+                source_obj_mask=source_component_mask * source_mask,
+                target_component_mask=convex_component_mask)
+
+            out_image = out_image * (1-convex_component_mask[:, :, None]) + convex_component_image
+            # out_mask = out_mask * (1-convex_component_mask) + convex_obj_mask
+            out_mask = out_mask * (1-convex_component_mask) + convex_component_mask
+        out_mask = source_mask + out_mask * (1-source_fg_mask) * (7)
+        cv2.imwrite(os.path.join(dest_image_folder, fname), out_image)
+        cv2.imwrite(os.path.join(dest_mask_folder, fname), out_mask)
+
+if __name__ == "__main__":
+
+    create_bgCT_dataset(
+        source_image_folder='/media/HDD1/kubric/MOVi-C_128/train/image_bgS',
+        source_mask_folder='/media/HDD1/kubric/MOVi-C_128/train/mask_bgS', 
+        dest_image_folder='/media/HDD1/kubric/MOVi-C_128/train/image_bgCST',
+    )
diff --git a/Dataset_Generation/Ablation Dataset/utils/generate_convex_appearance.py b/Dataset_Generation/Ablation Dataset/utils/generate_convex_appearance.py
@@ -35,4 +35,27 @@ def shift_image(X, dx, dy):
         X[:, :dx] = 0
     elif dx<0:
         X[:, dx:] = 0
-    return X
+    return X
+
+def generate_convex_appearance_for_bg(source_component_image, source_component_mask, source_obj_mask, target_component_mask):
+    x_center, y_center = np.argwhere(source_component_mask==1).sum(0)/source_component_mask.sum()
+    x_range = np.argwhere(source_component_mask==1)[:,0].max() - np.argwhere(source_component_mask==1)[:,0].min() 
+    y_range = np.argwhere(source_component_mask==1)[:,1].max() - np.argwhere(source_component_mask==1)[:,1].min() 
+    center = (x_center, y_center)
+    current_mask = source_component_mask
+    current_image = source_component_image
+    current_obj_mask = source_obj_mask
+    timeout = 5
+    timeout_start = time.time()
+    # while current_mask.sum() < target_component_mask.sum() and time.time() < timeout_start + timeout:
+    while ((1-current_mask) * target_component_mask).sum() != 0 and time.time() < timeout_start + timeout:
+        x_shift = random.randint(-x_range, x_range)
+        y_shift = random.randint(-y_range, y_range)
+        shifted_image = shift_image(source_component_image, x_shift, y_shift)
+        shifted_mask = shift_image(source_component_mask, x_shift, y_shift)
+        shifted_obj_mask = shift_image(source_obj_mask, x_shift, y_shift)
+        new_mask = np.array(target_component_mask==1) * np.array(shifted_mask==1) * np.array(current_mask==0)
+        current_image += shifted_image * new_mask[:,:,None]
+        current_obj_mask += shifted_obj_mask * new_mask
+        current_mask += new_mask
+    return current_image, current_mask, current_obj_mask