diff --git a/README.md b/README.md index 8e45365..13efaa6 100644 --- a/README.md +++ b/README.md @@ -160,9 +160,9 @@ The default settings are optimized for the best result. However, the behavior of - `--processing_res`: the processing resolution; set as 0 to process the input resolution directly. When unassigned (`None`), will read default setting from model config. Default: ~~768~~ `None`. - `--output_processing_res`: produce output at the processing resolution instead of upsampling it to the input resolution. Default: False. - - `--resample_method`: resampling method used to resize images and depth predictions. This can be one of `bilinear`, `bicubic` or `nearest`. Default: `bilinear`. + - `--resample_method`: the resampling method used to resize images and depth predictions. This can be one of `bilinear`, `bicubic`, or `nearest`. Default: `bilinear`. -- `--half_precision` or `--fp16`: Run with half-precision (16-bit float) to reduce VRAM usage, might lead to suboptimal result. +- `--half_precision` or `--fp16`: Run with half-precision (16-bit float) to reduce VRAM usage, which might lead to suboptimal results. - `--seed`: Random seed can be set to ensure additional reproducibility. Default: None (unseeded). Note: forcing `--batch_size 1` helps to increase reproducibility. To ensure full reproducibility, [deterministic mode](https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms) needs to be used. - `--batch_size`: Batch size of repeated inference. Default: 0 (best value determined automatically). - `--color_map`: [Colormap](https://matplotlib.org/stable/users/explain/colors/colormaps.html) used to colorize the depth prediction. Default: Spectral. Set to `None` to skip colored depth map generation. @@ -196,7 +196,7 @@ python run.py \ --output_dir output/in-the-wild_example ``` -## 🦿 Evaluation on test datasets +## 🦿 Evaluation on test datasets Install additional dependencies: @@ -224,6 +224,43 @@ bash script/eval/12_eval_nyu.sh Note: although the seed has been set, the results might still be slightly different on different hardware. +## 🏋️ Training + +Based on the previously created environment, install extended requirements: + +```bash +pip install -r requirements++.txt -r requirements+.txt -r requirements.txt +``` + +Set environment parameters for the data directory: + +```bash +export BASE_DATA_DIR=YOUR_DATA_DIR # directory of training data +export BASE_CKPT_DIR=YOUR_CHECKPOINT_DIR # directory of pretrained checkpoint +``` + +Download Stable Diffusion v2 [checkpoint](https://huggingface.co/stabilityai/stable-diffusion-2) into `${BASE_CKPT_DIR}` + +Prepare for [Hypersim](https://github.com/apple/ml-hypersim) and [Virtual KITTI 2](https://europe.naverlabs.com/research/computer-vision/proxy-virtual-worlds-vkitti-2/) datasets and save into `${BASE_DATA_DIR}`. Please refer to [this README](script/dataset_preprocess/hypersim/README.md) for Hypersim preprocessing. + +Run training script + +```bash +python train.py --config config/train_marigold.yaml +``` + +Resume from a checkpoint, e.g. + +```bash +python train.py --resume_from output/marigold_base/checkpoint/latest +``` + +Evaluating results + +Only the U-Net is updated and saved during training. To use the inference pipeline with your training result, replace `unet` folder in Marigold checkpoints with that in the `checkpoint` output folder. Then refer to [this section](#evaluation) for evaluation. + +**Note**: Although random seeds have been set, the training result might be slightly different on different hardwares. It's recommended to train without interruption. + ## ✏️ Contributing Please refer to [this](CONTRIBUTING.md) instruction. diff --git a/config/dataset/data_hypersim_train.yaml b/config/dataset/data_hypersim_train.yaml new file mode 100644 index 0000000..589a189 --- /dev/null +++ b/config/dataset/data_hypersim_train.yaml @@ -0,0 +1,4 @@ +name: hypersim +disp_name: hypersim_train +dir: hypersim/hypersim_processed_train.tar +filenames: data_split/hypersim/filename_list_train_filtered.txt \ No newline at end of file diff --git a/config/dataset/data_hypersim_val.yaml b/config/dataset/data_hypersim_val.yaml new file mode 100644 index 0000000..fe26bdc --- /dev/null +++ b/config/dataset/data_hypersim_val.yaml @@ -0,0 +1,4 @@ +name: hypersim +disp_name: hypersim_val +dir: hypersim/hypersim_processed_val.tar +filenames: data_split/hypersim/filename_list_val_filtered.txt \ No newline at end of file diff --git a/config/dataset/data_kitti_val.yaml b/config/dataset/data_kitti_val.yaml new file mode 100644 index 0000000..fc6c3a2 --- /dev/null +++ b/config/dataset/data_kitti_val.yaml @@ -0,0 +1,6 @@ +name: kitti +disp_name: kitti_val800_from_eigen_train +dir: kitti/kitti_sampled_val_800.tar +filenames: data_split/kitti/eigen_val_from_train_800.txt +kitti_bm_crop: true +valid_mask_crop: eigen \ No newline at end of file diff --git a/config/dataset/data_nyu_train.yaml b/config/dataset/data_nyu_train.yaml new file mode 100644 index 0000000..185ff58 --- /dev/null +++ b/config/dataset/data_nyu_train.yaml @@ -0,0 +1,5 @@ +name: nyu_v2 +disp_name: nyu_train_full +dir: nyuv2/nyu_labeled_extracted.tar +filenames: data_split/nyu/labeled/filename_list_train.txt +eigen_valid_mask: true \ No newline at end of file diff --git a/config/dataset/data_vkitti_train.yaml b/config/dataset/data_vkitti_train.yaml new file mode 100644 index 0000000..8c089c4 --- /dev/null +++ b/config/dataset/data_vkitti_train.yaml @@ -0,0 +1,6 @@ +name: vkitti +disp_name: vkitti_train +dir: vkitti/vkitti.tar +filenames: data_split/vkitti/vkitti_train.txt +kitti_bm_crop: true +valid_mask_crop: null # no valid_mask_crop for training \ No newline at end of file diff --git a/config/dataset/data_vkitti_val.yaml b/config/dataset/data_vkitti_val.yaml new file mode 100644 index 0000000..257e9f1 --- /dev/null +++ b/config/dataset/data_vkitti_val.yaml @@ -0,0 +1,6 @@ +name: vkitti +disp_name: vkitti_val +dir: vkitti/vkitti.tar +filenames: data_split/vkitti/vkitti_val.txt +kitti_bm_crop: true +valid_mask_crop: eigen \ No newline at end of file diff --git a/config/dataset/dataset_train.yaml b/config/dataset/dataset_train.yaml new file mode 100644 index 0000000..0381948 --- /dev/null +++ b/config/dataset/dataset_train.yaml @@ -0,0 +1,18 @@ +dataset: + train: + name: mixed + prob_ls: [0.9, 0.1] + dataset_list: + - name: hypersim + disp_name: hypersim_train + dir: hypersim/hypersim_processed_train.tar + filenames: data_split/hypersim/filename_list_train_filtered.txt + resize_to_hw: + - 480 + - 640 + - name: vkitti + disp_name: vkitti_train + dir: vkitti/vkitti.tar + filenames: data_split/vkitti/vkitti_train.txt + kitti_bm_crop: true + valid_mask_crop: null \ No newline at end of file diff --git a/config/dataset/dataset_val.yaml b/config/dataset/dataset_val.yaml new file mode 100644 index 0000000..f70d90d --- /dev/null +++ b/config/dataset/dataset_val.yaml @@ -0,0 +1,45 @@ +dataset: + val: + # - name: hypersim + # disp_name: hypersim_val + # dir: hypersim/hypersim_processed_val.tar + # filenames: data_split/hypersim/filename_list_val_filtered.txt + # resize_to_hw: + # - 480 + # - 640 + + # - name: nyu_v2 + # disp_name: nyu_train_full + # dir: nyuv2/nyu_labeled_extracted.tar + # filenames: data_split/nyu/labeled/filename_list_train.txt + # eigen_valid_mask: true + + # - name: kitti + # disp_name: kitti_val800_from_eigen_train + # dir: kitti/kitti_sampled_val_800.tar + # filenames: data_split/kitti/eigen_val_from_train_800.txt + # kitti_bm_crop: true + # valid_mask_crop: eigen + + # Smaller subsets for faster validation during training + # The first dataset is used to calculate main eval metric. + - name: hypersim + disp_name: hypersim_val_small_80 + dir: hypersim/hypersim_processed_val.tar + filenames: data_split/hypersim/filename_list_val_filtered_small_80.txt + resize_to_hw: + - 480 + - 640 + + - name: nyu_v2 + disp_name: nyu_train_small_100 + dir: nyuv2/nyu_labeled_extracted.tar + filenames: data_split/nyu/labeled/filename_list_train_small_100.txt + eigen_valid_mask: true + + - name: kitti + disp_name: kitti_val_from_train_sub_100 + dir: kitti/kitti_sampled_val_800.tar + filenames: data_split/kitti/eigen_val_from_train_sub_100.txt + kitti_bm_crop: true + valid_mask_crop: eigen \ No newline at end of file diff --git a/config/dataset/dataset_vis.yaml b/config/dataset/dataset_vis.yaml new file mode 100644 index 0000000..c11dc8e --- /dev/null +++ b/config/dataset/dataset_vis.yaml @@ -0,0 +1,9 @@ +dataset: + vis: + - name: hypersim + disp_name: hypersim_vis + dir: hypersim/hypersim_processed_val.tar + filenames: data_split/hypersim/selected_vis_sample.txt + resize_to_hw: + - 480 + - 640 diff --git a/config/logging.yaml b/config/logging.yaml new file mode 100644 index 0000000..8cecbae --- /dev/null +++ b/config/logging.yaml @@ -0,0 +1,5 @@ +logging: + filename: logging.log + format: ' %(asctime)s - %(levelname)s -%(filename)s - %(funcName)s >> %(message)s' + console_level: 20 + file_level: 10 diff --git a/config/model_sdv2.yaml b/config/model_sdv2.yaml new file mode 100644 index 0000000..ce58c5e --- /dev/null +++ b/config/model_sdv2.yaml @@ -0,0 +1,4 @@ +model: + name: marigold_pipeline + pretrained_path: stable-diffusion-2 + latent_scale_factor: 0.18215 \ No newline at end of file diff --git a/config/train_debug.yaml b/config/train_debug.yaml new file mode 100644 index 0000000..7c21ed1 --- /dev/null +++ b/config/train_debug.yaml @@ -0,0 +1,12 @@ +base_config: +- config/train_marigold.yaml + + +# Training settings +trainer: + save_period: 5 + backup_period: 10 + validation_period: 5 + visualization_period: 5 + +max_iter: 50 \ No newline at end of file diff --git a/config/train_marigold.yaml b/config/train_marigold.yaml new file mode 100644 index 0000000..defb4c7 --- /dev/null +++ b/config/train_marigold.yaml @@ -0,0 +1,94 @@ +base_config: +- config/logging.yaml +- config/wandb.yaml +- config/dataset/dataset_train.yaml +- config/dataset/dataset_val.yaml +- config/dataset/dataset_vis.yaml +- config/model_sdv2.yaml + + +pipeline: + name: MarigoldPipeline + kwargs: + scale_invariant: true + shift_invariant: true + +depth_normalization: + type: scale_shift_depth + clip: true + norm_min: -1.0 + norm_max: 1.0 + min_max_quantile: 0.02 + +augmentation: + lr_flip_p: 0.5 + +dataloader: + num_workers: 2 + effective_batch_size: 32 + max_train_batch_size: 2 + seed: 2024 # to ensure continuity when resuming from checkpoint + +# Training settings +trainer: + name: MarigoldTrainer + training_noise_scheduler: + pretrained_path: stable-diffusion-2 + init_seed: 2024 # use null to train w/o seeding + save_period: 50 + backup_period: 2000 + validation_period: 2000 + visualization_period: 2000 + +multi_res_noise: + strength: 0.9 + annealed: true + downscale_strategy: original + +gt_depth_type: depth_raw_norm +gt_mask_type: valid_mask_raw + +max_epoch: 10000 # a large enough number +max_iter: 30000 # usually converges at around 20k + +optimizer: + name: Adam + +loss: + name: mse_loss + kwargs: + reduction: mean + +lr: 3.0e-05 +lr_scheduler: + name: IterExponential + kwargs: + total_iter: 25000 + final_ratio: 0.01 + warmup_steps: 100 + +# Validation (and visualization) settings +validation: + denoising_steps: 50 + ensemble_size: 1 # simplified setting for on-training validation + processing_res: 0 + match_input_res: false + resample_method: bilinear + main_val_metric: abs_relative_difference + main_val_metric_goal: minimize + init_seed: 2024 + +eval: + alignment: least_square + align_max_res: null + eval_metrics: + - abs_relative_difference + - squared_relative_difference + - rmse_linear + - rmse_log + - log10 + - delta1_acc + - delta2_acc + - delta3_acc + - i_rmse + - silog_rmse diff --git a/config/wandb.yaml b/config/wandb.yaml new file mode 100644 index 0000000..5631cfb --- /dev/null +++ b/config/wandb.yaml @@ -0,0 +1,3 @@ +wandb: + # entity: your_entity + project: marigold \ No newline at end of file diff --git a/data_split/hypersim/filename_list_val_filtered_small_80.txt b/data_split/hypersim/filename_list_val_filtered_small_80.txt new file mode 100644 index 0000000..b240c2d --- /dev/null +++ b/data_split/hypersim/filename_list_val_filtered_small_80.txt @@ -0,0 +1,80 @@ +ai_003_010/rgb_cam_00_fr0047.png ai_003_010/depth_plane_cam_00_fr0047.png +ai_003_010/rgb_cam_00_fr0048.png ai_003_010/depth_plane_cam_00_fr0048.png +ai_003_010/rgb_cam_01_fr0098.png ai_003_010/depth_plane_cam_01_fr0098.png +ai_004_003/rgb_cam_01_fr0008.png ai_004_003/depth_plane_cam_01_fr0008.png +ai_004_004/rgb_cam_00_fr0025.png ai_004_004/depth_plane_cam_00_fr0025.png +ai_004_004/rgb_cam_00_fr0046.png ai_004_004/depth_plane_cam_00_fr0046.png +ai_004_004/rgb_cam_00_fr0049.png ai_004_004/depth_plane_cam_00_fr0049.png +ai_004_004/rgb_cam_01_fr0023.png ai_004_004/depth_plane_cam_01_fr0023.png +ai_005_005/rgb_cam_00_fr0032.png ai_005_005/depth_plane_cam_00_fr0032.png +ai_006_007/rgb_cam_00_fr0022.png ai_006_007/depth_plane_cam_00_fr0022.png +ai_006_007/rgb_cam_00_fr0095.png ai_006_007/depth_plane_cam_00_fr0095.png +ai_007_001/rgb_cam_00_fr0044.png ai_007_001/depth_plane_cam_00_fr0044.png +ai_007_001/rgb_cam_00_fr0048.png ai_007_001/depth_plane_cam_00_fr0048.png +ai_009_007/rgb_cam_00_fr0017.png ai_009_007/depth_plane_cam_00_fr0017.png +ai_009_007/rgb_cam_00_fr0097.png ai_009_007/depth_plane_cam_00_fr0097.png +ai_009_009/rgb_cam_00_fr0094.png ai_009_009/depth_plane_cam_00_fr0094.png +ai_015_001/rgb_cam_00_fr0058.png ai_015_001/depth_plane_cam_00_fr0058.png +ai_015_001/rgb_cam_00_fr0089.png ai_015_001/depth_plane_cam_00_fr0089.png +ai_017_007/rgb_cam_01_fr0064.png ai_017_007/depth_plane_cam_01_fr0064.png +ai_018_005/rgb_cam_00_fr0014.png ai_018_005/depth_plane_cam_00_fr0014.png +ai_018_005/rgb_cam_00_fr0059.png ai_018_005/depth_plane_cam_00_fr0059.png +ai_022_010/rgb_cam_00_fr0097.png ai_022_010/depth_plane_cam_00_fr0097.png +ai_022_010/rgb_cam_00_fr0099.png ai_022_010/depth_plane_cam_00_fr0099.png +ai_023_003/rgb_cam_00_fr0013.png ai_023_003/depth_plane_cam_00_fr0013.png +ai_023_003/rgb_cam_00_fr0015.png ai_023_003/depth_plane_cam_00_fr0015.png +ai_023_003/rgb_cam_00_fr0036.png ai_023_003/depth_plane_cam_00_fr0036.png +ai_023_003/rgb_cam_00_fr0095.png ai_023_003/depth_plane_cam_00_fr0095.png +ai_023_003/rgb_cam_01_fr0029.png ai_023_003/depth_plane_cam_01_fr0029.png +ai_023_003/rgb_cam_01_fr0036.png ai_023_003/depth_plane_cam_01_fr0036.png +ai_023_003/rgb_cam_01_fr0071.png ai_023_003/depth_plane_cam_01_fr0071.png +ai_032_007/rgb_cam_00_fr0031.png ai_032_007/depth_plane_cam_00_fr0031.png +ai_032_007/rgb_cam_00_fr0040.png ai_032_007/depth_plane_cam_00_fr0040.png +ai_032_007/rgb_cam_00_fr0075.png ai_032_007/depth_plane_cam_00_fr0075.png +ai_035_003/rgb_cam_00_fr0054.png ai_035_003/depth_plane_cam_00_fr0054.png +ai_035_004/rgb_cam_00_fr0077.png ai_035_004/depth_plane_cam_00_fr0077.png +ai_038_009/rgb_cam_00_fr0031.png ai_038_009/depth_plane_cam_00_fr0031.png +ai_038_009/rgb_cam_01_fr0010.png ai_038_009/depth_plane_cam_01_fr0010.png +ai_038_009/rgb_cam_01_fr0088.png ai_038_009/depth_plane_cam_01_fr0088.png +ai_039_003/rgb_cam_01_fr0042.png ai_039_003/depth_plane_cam_01_fr0042.png +ai_039_003/rgb_cam_01_fr0097.png ai_039_003/depth_plane_cam_01_fr0097.png +ai_044_001/rgb_cam_00_fr0043.png ai_044_001/depth_plane_cam_00_fr0043.png +ai_044_001/rgb_cam_01_fr0018.png ai_044_001/depth_plane_cam_01_fr0018.png +ai_044_003/rgb_cam_01_fr0082.png ai_044_003/depth_plane_cam_01_fr0082.png +ai_044_003/rgb_cam_01_fr0087.png ai_044_003/depth_plane_cam_01_fr0087.png +ai_044_003/rgb_cam_02_fr0086.png ai_044_003/depth_plane_cam_02_fr0086.png +ai_044_003/rgb_cam_03_fr0022.png ai_044_003/depth_plane_cam_03_fr0022.png +ai_044_003/rgb_cam_03_fr0063.png ai_044_003/depth_plane_cam_03_fr0063.png +ai_045_008/rgb_cam_00_fr0015.png ai_045_008/depth_plane_cam_00_fr0015.png +ai_045_008/rgb_cam_00_fr0030.png ai_045_008/depth_plane_cam_00_fr0030.png +ai_045_008/rgb_cam_01_fr0029.png ai_045_008/depth_plane_cam_01_fr0029.png +ai_045_008/rgb_cam_01_fr0052.png ai_045_008/depth_plane_cam_01_fr0052.png +ai_045_008/rgb_cam_01_fr0088.png ai_045_008/depth_plane_cam_01_fr0088.png +ai_047_009/rgb_cam_00_fr0097.png ai_047_009/depth_plane_cam_00_fr0097.png +ai_048_001/rgb_cam_00_fr0014.png ai_048_001/depth_plane_cam_00_fr0014.png +ai_048_001/rgb_cam_00_fr0088.png ai_048_001/depth_plane_cam_00_fr0088.png +ai_048_001/rgb_cam_01_fr0045.png ai_048_001/depth_plane_cam_01_fr0045.png +ai_048_001/rgb_cam_02_fr0031.png ai_048_001/depth_plane_cam_02_fr0031.png +ai_048_001/rgb_cam_03_fr0005.png ai_048_001/depth_plane_cam_03_fr0005.png +ai_048_001/rgb_cam_03_fr0045.png ai_048_001/depth_plane_cam_03_fr0045.png +ai_048_001/rgb_cam_03_fr0054.png ai_048_001/depth_plane_cam_03_fr0054.png +ai_048_001/rgb_cam_03_fr0061.png ai_048_001/depth_plane_cam_03_fr0061.png +ai_050_002/rgb_cam_01_fr0016.png ai_050_002/depth_plane_cam_01_fr0016.png +ai_050_002/rgb_cam_02_fr0053.png ai_050_002/depth_plane_cam_02_fr0053.png +ai_050_002/rgb_cam_03_fr0082.png ai_050_002/depth_plane_cam_03_fr0082.png +ai_050_002/rgb_cam_04_fr0033.png ai_050_002/depth_plane_cam_04_fr0033.png +ai_051_004/rgb_cam_00_fr0028.png ai_051_004/depth_plane_cam_00_fr0028.png +ai_051_004/rgb_cam_01_fr0065.png ai_051_004/depth_plane_cam_01_fr0065.png +ai_051_004/rgb_cam_02_fr0054.png ai_051_004/depth_plane_cam_02_fr0054.png +ai_051_004/rgb_cam_02_fr0056.png ai_051_004/depth_plane_cam_02_fr0056.png +ai_051_004/rgb_cam_03_fr0037.png ai_051_004/depth_plane_cam_03_fr0037.png +ai_051_004/rgb_cam_04_fr0083.png ai_051_004/depth_plane_cam_04_fr0083.png +ai_051_004/rgb_cam_05_fr0003.png ai_051_004/depth_plane_cam_05_fr0003.png +ai_052_001/rgb_cam_00_fr0008.png ai_052_001/depth_plane_cam_00_fr0008.png +ai_052_003/rgb_cam_00_fr0097.png ai_052_003/depth_plane_cam_00_fr0097.png +ai_052_003/rgb_cam_01_fr0081.png ai_052_003/depth_plane_cam_01_fr0081.png +ai_052_007/rgb_cam_01_fr0001.png ai_052_007/depth_plane_cam_01_fr0001.png +ai_053_003/rgb_cam_00_fr0005.png ai_053_003/depth_plane_cam_00_fr0005.png +ai_053_005/rgb_cam_00_fr0080.png ai_053_005/depth_plane_cam_00_fr0080.png +ai_055_009/rgb_cam_01_fr0070.png ai_055_009/depth_plane_cam_01_fr0070.png +ai_055_009/rgb_cam_01_fr0086.png ai_055_009/depth_plane_cam_01_fr0086.png \ No newline at end of file diff --git a/data_split/hypersim/selected_vis_sample.txt b/data_split/hypersim/selected_vis_sample.txt new file mode 100644 index 0000000..0ebd78d --- /dev/null +++ b/data_split/hypersim/selected_vis_sample.txt @@ -0,0 +1,3 @@ +ai_015_004/rgb_cam_00_fr0002.png ai_015_004/depth_plane_cam_00_fr0002.png (val) +ai_044_003/rgb_cam_01_fr0063.png ai_044_003/depth_plane_cam_01_fr0063.png (val) +ai_052_003/rgb_cam_01_fr0076.png ai_052_003/depth_plane_cam_01_fr0076.png (val) \ No newline at end of file diff --git a/data_split/kitti/eigen_val_from_train_sub_100.txt b/data_split/kitti/eigen_val_from_train_sub_100.txt new file mode 100644 index 0000000..fa0ebc4 --- /dev/null +++ b/data_split/kitti/eigen_val_from_train_sub_100.txt @@ -0,0 +1,100 @@ +2011_09_26/2011_09_26_drive_0001_sync/image_02/data/0000000046.png 2011_09_26_drive_0001_sync/proj_depth/groundtruth/image_02/0000000046.png 721.5377 +2011_09_26/2011_09_26_drive_0005_sync/image_02/data/0000000148.png 2011_09_26_drive_0005_sync/proj_depth/groundtruth/image_02/0000000148.png 721.5377 +2011_09_26/2011_09_26_drive_0014_sync/image_02/data/0000000076.png 2011_09_26_drive_0014_sync/proj_depth/groundtruth/image_02/0000000076.png 721.5377 +2011_09_26/2011_09_26_drive_0015_sync/image_02/data/0000000019.png 2011_09_26_drive_0015_sync/proj_depth/groundtruth/image_02/0000000019.png 721.5377 +2011_09_26/2011_09_26_drive_0015_sync/image_02/data/0000000194.png 2011_09_26_drive_0015_sync/proj_depth/groundtruth/image_02/0000000194.png 721.5377 +2011_09_26/2011_09_26_drive_0018_sync/image_02/data/0000000106.png 2011_09_26_drive_0018_sync/proj_depth/groundtruth/image_02/0000000106.png 721.5377 +2011_09_26/2011_09_26_drive_0019_sync/image_02/data/0000000263.png 2011_09_26_drive_0019_sync/proj_depth/groundtruth/image_02/0000000263.png 721.5377 +2011_09_26/2011_09_26_drive_0019_sync/image_02/data/0000000274.png 2011_09_26_drive_0019_sync/proj_depth/groundtruth/image_02/0000000274.png 721.5377 +2011_09_26/2011_09_26_drive_0022_sync/image_02/data/0000000015.png 2011_09_26_drive_0022_sync/proj_depth/groundtruth/image_02/0000000015.png 721.5377 +2011_09_26/2011_09_26_drive_0022_sync/image_02/data/0000000123.png 2011_09_26_drive_0022_sync/proj_depth/groundtruth/image_02/0000000123.png 721.5377 +2011_09_26/2011_09_26_drive_0022_sync/image_02/data/0000000149.png 2011_09_26_drive_0022_sync/proj_depth/groundtruth/image_02/0000000149.png 721.5377 +2011_09_26/2011_09_26_drive_0022_sync/image_02/data/0000000308.png 2011_09_26_drive_0022_sync/proj_depth/groundtruth/image_02/0000000308.png 721.5377 +2011_09_26/2011_09_26_drive_0022_sync/image_02/data/0000000553.png 2011_09_26_drive_0022_sync/proj_depth/groundtruth/image_02/0000000553.png 721.5377 +2011_09_26/2011_09_26_drive_0022_sync/image_02/data/0000000691.png 2011_09_26_drive_0022_sync/proj_depth/groundtruth/image_02/0000000691.png 721.5377 +2011_09_26/2011_09_26_drive_0028_sync/image_02/data/0000000270.png 2011_09_26_drive_0028_sync/proj_depth/groundtruth/image_02/0000000270.png 721.5377 +2011_09_26/2011_09_26_drive_0035_sync/image_02/data/0000000085.png 2011_09_26_drive_0035_sync/proj_depth/groundtruth/image_02/0000000085.png 721.5377 +2011_09_26/2011_09_26_drive_0039_sync/image_02/data/0000000326.png 2011_09_26_drive_0039_sync/proj_depth/groundtruth/image_02/0000000326.png 721.5377 +2011_09_26/2011_09_26_drive_0051_sync/image_02/data/0000000429.png 2011_09_26_drive_0051_sync/proj_depth/groundtruth/image_02/0000000429.png 721.5377 +2011_09_26/2011_09_26_drive_0057_sync/image_02/data/0000000010.png 2011_09_26_drive_0057_sync/proj_depth/groundtruth/image_02/0000000010.png 721.5377 +2011_09_26/2011_09_26_drive_0060_sync/image_02/data/0000000020.png 2011_09_26_drive_0060_sync/proj_depth/groundtruth/image_02/0000000020.png 721.5377 +2011_09_26/2011_09_26_drive_0061_sync/image_02/data/0000000223.png 2011_09_26_drive_0061_sync/proj_depth/groundtruth/image_02/0000000223.png 721.5377 +2011_09_26/2011_09_26_drive_0061_sync/image_02/data/0000000262.png 2011_09_26_drive_0061_sync/proj_depth/groundtruth/image_02/0000000262.png 721.5377 +2011_09_26/2011_09_26_drive_0061_sync/image_02/data/0000000291.png 2011_09_26_drive_0061_sync/proj_depth/groundtruth/image_02/0000000291.png 721.5377 +2011_09_26/2011_09_26_drive_0061_sync/image_02/data/0000000523.png 2011_09_26_drive_0061_sync/proj_depth/groundtruth/image_02/0000000523.png 721.5377 +2011_09_26/2011_09_26_drive_0061_sync/image_02/data/0000000524.png 2011_09_26_drive_0061_sync/proj_depth/groundtruth/image_02/0000000524.png 721.5377 +2011_09_26/2011_09_26_drive_0070_sync/image_02/data/0000000063.png 2011_09_26_drive_0070_sync/proj_depth/groundtruth/image_02/0000000063.png 721.5377 +2011_09_26/2011_09_26_drive_0070_sync/image_02/data/0000000320.png 2011_09_26_drive_0070_sync/proj_depth/groundtruth/image_02/0000000320.png 721.5377 +2011_09_26/2011_09_26_drive_0087_sync/image_02/data/0000000313.png 2011_09_26_drive_0087_sync/proj_depth/groundtruth/image_02/0000000313.png 721.5377 +2011_09_26/2011_09_26_drive_0087_sync/image_02/data/0000000316.png 2011_09_26_drive_0087_sync/proj_depth/groundtruth/image_02/0000000316.png 721.5377 +2011_09_26/2011_09_26_drive_0087_sync/image_02/data/0000000363.png 2011_09_26_drive_0087_sync/proj_depth/groundtruth/image_02/0000000363.png 721.5377 +2011_09_26/2011_09_26_drive_0087_sync/image_02/data/0000000438.png 2011_09_26_drive_0087_sync/proj_depth/groundtruth/image_02/0000000438.png 721.5377 +2011_09_26/2011_09_26_drive_0091_sync/image_02/data/0000000137.png 2011_09_26_drive_0091_sync/proj_depth/groundtruth/image_02/0000000137.png 721.5377 +2011_09_26/2011_09_26_drive_0091_sync/image_02/data/0000000143.png 2011_09_26_drive_0091_sync/proj_depth/groundtruth/image_02/0000000143.png 721.5377 +2011_09_26/2011_09_26_drive_0091_sync/image_02/data/0000000278.png 2011_09_26_drive_0091_sync/proj_depth/groundtruth/image_02/0000000278.png 721.5377 +2011_09_26/2011_09_26_drive_0091_sync/image_02/data/0000000312.png 2011_09_26_drive_0091_sync/proj_depth/groundtruth/image_02/0000000312.png 721.5377 +2011_09_26/2011_09_26_drive_0095_sync/image_02/data/0000000160.png 2011_09_26_drive_0095_sync/proj_depth/groundtruth/image_02/0000000160.png 721.5377 +2011_09_26/2011_09_26_drive_0104_sync/image_02/data/0000000011.png 2011_09_26_drive_0104_sync/proj_depth/groundtruth/image_02/0000000011.png 721.5377 +2011_09_26/2011_09_26_drive_0113_sync/image_02/data/0000000052.png 2011_09_26_drive_0113_sync/proj_depth/groundtruth/image_02/0000000052.png 721.5377 +2011_09_26/2011_09_26_drive_0113_sync/image_02/data/0000000055.png 2011_09_26_drive_0113_sync/proj_depth/groundtruth/image_02/0000000055.png 721.5377 +2011_09_29/2011_09_29_drive_0004_sync/image_02/data/0000000065.png 2011_09_29_drive_0004_sync/proj_depth/groundtruth/image_02/0000000065.png 718.3351 +2011_09_30/2011_09_30_drive_0020_sync/image_02/data/0000000325.png 2011_09_30_drive_0020_sync/proj_depth/groundtruth/image_02/0000000325.png 707.0912 +2011_09_30/2011_09_30_drive_0020_sync/image_02/data/0000000959.png 2011_09_30_drive_0020_sync/proj_depth/groundtruth/image_02/0000000959.png 707.0912 +2011_09_30/2011_09_30_drive_0020_sync/image_02/data/0000001004.png 2011_09_30_drive_0020_sync/proj_depth/groundtruth/image_02/0000001004.png 707.0912 +2011_09_30/2011_09_30_drive_0020_sync/image_02/data/0000001054.png 2011_09_30_drive_0020_sync/proj_depth/groundtruth/image_02/0000001054.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000000545.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000000545.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000000920.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000000920.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000001593.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000001593.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000001692.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000001692.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000001806.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000001806.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000001905.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000001905.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000002714.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000002714.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000002812.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000002812.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000002838.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000002838.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000003402.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000003402.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000003700.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000003700.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000004016.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000004016.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000004276.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000004276.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000004664.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000004664.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000004772.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000004772.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000004782.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000004782.png 707.0912 +2011_09_30/2011_09_30_drive_0028_sync/image_02/data/0000005095.png 2011_09_30_drive_0028_sync/proj_depth/groundtruth/image_02/0000005095.png 707.0912 +2011_09_30/2011_09_30_drive_0033_sync/image_02/data/0000000319.png 2011_09_30_drive_0033_sync/proj_depth/groundtruth/image_02/0000000319.png 707.0912 +2011_09_30/2011_09_30_drive_0033_sync/image_02/data/0000000355.png 2011_09_30_drive_0033_sync/proj_depth/groundtruth/image_02/0000000355.png 707.0912 +2011_09_30/2011_09_30_drive_0033_sync/image_02/data/0000000500.png 2011_09_30_drive_0033_sync/proj_depth/groundtruth/image_02/0000000500.png 707.0912 +2011_09_30/2011_09_30_drive_0033_sync/image_02/data/0000000682.png 2011_09_30_drive_0033_sync/proj_depth/groundtruth/image_02/0000000682.png 707.0912 +2011_09_30/2011_09_30_drive_0033_sync/image_02/data/0000000710.png 2011_09_30_drive_0033_sync/proj_depth/groundtruth/image_02/0000000710.png 707.0912 +2011_09_30/2011_09_30_drive_0033_sync/image_02/data/0000000896.png 2011_09_30_drive_0033_sync/proj_depth/groundtruth/image_02/0000000896.png 707.0912 +2011_09_30/2011_09_30_drive_0033_sync/image_02/data/0000001197.png 2011_09_30_drive_0033_sync/proj_depth/groundtruth/image_02/0000001197.png 707.0912 +2011_09_30/2011_09_30_drive_0033_sync/image_02/data/0000001508.png 2011_09_30_drive_0033_sync/proj_depth/groundtruth/image_02/0000001508.png 707.0912 +2011_09_30/2011_09_30_drive_0033_sync/image_02/data/0000001512.png 2011_09_30_drive_0033_sync/proj_depth/groundtruth/image_02/0000001512.png 707.0912 +2011_09_30/2011_09_30_drive_0034_sync/image_02/data/0000000029.png 2011_09_30_drive_0034_sync/proj_depth/groundtruth/image_02/0000000029.png 707.0912 +2011_09_30/2011_09_30_drive_0034_sync/image_02/data/0000000171.png 2011_09_30_drive_0034_sync/proj_depth/groundtruth/image_02/0000000171.png 707.0912 +2011_09_30/2011_09_30_drive_0034_sync/image_02/data/0000000193.png 2011_09_30_drive_0034_sync/proj_depth/groundtruth/image_02/0000000193.png 707.0912 +2011_09_30/2011_09_30_drive_0034_sync/image_02/data/0000000389.png 2011_09_30_drive_0034_sync/proj_depth/groundtruth/image_02/0000000389.png 707.0912 +2011_09_30/2011_09_30_drive_0034_sync/image_02/data/0000001141.png 2011_09_30_drive_0034_sync/proj_depth/groundtruth/image_02/0000001141.png 707.0912 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000000138.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000000138.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000000593.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000000593.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000001046.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000001046.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000001151.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000001151.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000001255.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000001255.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000001283.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000001283.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000001737.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000001737.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000001999.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000001999.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000002012.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000002012.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000002089.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000002089.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000002324.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000002324.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000002902.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000002902.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000002971.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000002971.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000003299.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000003299.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000003366.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000003366.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000003427.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000003427.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000003440.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000003440.png 718.856 +2011_10_03/2011_10_03_drive_0034_sync/image_02/data/0000004060.png 2011_10_03_drive_0034_sync/proj_depth/groundtruth/image_02/0000004060.png 718.856 +2011_10_03/2011_10_03_drive_0042_sync/image_02/data/0000000525.png 2011_10_03_drive_0042_sync/proj_depth/groundtruth/image_02/0000000525.png 718.856 +2011_10_03/2011_10_03_drive_0042_sync/image_02/data/0000000538.png 2011_10_03_drive_0042_sync/proj_depth/groundtruth/image_02/0000000538.png 718.856 +2011_10_03/2011_10_03_drive_0042_sync/image_02/data/0000000648.png 2011_10_03_drive_0042_sync/proj_depth/groundtruth/image_02/0000000648.png 718.856 +2011_10_03/2011_10_03_drive_0042_sync/image_02/data/0000000776.png 2011_10_03_drive_0042_sync/proj_depth/groundtruth/image_02/0000000776.png 718.856 +2011_10_03/2011_10_03_drive_0042_sync/image_02/data/0000000779.png 2011_10_03_drive_0042_sync/proj_depth/groundtruth/image_02/0000000779.png 718.856 +2011_10_03/2011_10_03_drive_0042_sync/image_02/data/0000001087.png 2011_10_03_drive_0042_sync/proj_depth/groundtruth/image_02/0000001087.png 718.856 +2011_10_03/2011_10_03_drive_0042_sync/image_02/data/0000001107.png 2011_10_03_drive_0042_sync/proj_depth/groundtruth/image_02/0000001107.png 718.856 \ No newline at end of file diff --git a/data_split/nyu/labeled/filename_list_train_small_100.txt b/data_split/nyu/labeled/filename_list_train_small_100.txt new file mode 100644 index 0000000..61103d4 --- /dev/null +++ b/data_split/nyu/labeled/filename_list_train_small_100.txt @@ -0,0 +1,100 @@ +train/bathroom_0007/rgb_0649.png train/bathroom_0007/depth_0649.png train/bathroom_0007/filled_0649.png +train/bathroom_0010/rgb_0653.png train/bathroom_0010/depth_0653.png train/bathroom_0010/filled_0653.png +train/bathroom_0041/rgb_0719.png train/bathroom_0041/depth_0719.png train/bathroom_0041/filled_0719.png +train/bathroom_0045/rgb_0729.png train/bathroom_0045/depth_0729.png train/bathroom_0045/filled_0729.png +train/bathroom_0048/rgb_0736.png train/bathroom_0048/depth_0736.png train/bathroom_0048/filled_0736.png +train/bathroom_0056/rgb_0505.png train/bathroom_0056/depth_0505.png train/bathroom_0056/filled_0505.png +train/bedroom_0004/rgb_0178.png train/bedroom_0004/depth_0178.png train/bedroom_0004/filled_0178.png +train/bedroom_0016/rgb_0071.png train/bedroom_0016/depth_0071.png train/bedroom_0016/filled_0071.png +train/bedroom_0025/rgb_0910.png train/bedroom_0025/depth_0910.png train/bedroom_0025/filled_0910.png +train/bedroom_0026/rgb_0914.png train/bedroom_0026/depth_0914.png train/bedroom_0026/filled_0914.png +train/bedroom_0031/rgb_0929.png train/bedroom_0031/depth_0929.png train/bedroom_0031/filled_0929.png +train/bedroom_0034/rgb_0939.png train/bedroom_0034/depth_0939.png train/bedroom_0034/filled_0939.png +train/bedroom_0040/rgb_0954.png train/bedroom_0040/depth_0954.png train/bedroom_0040/filled_0954.png +train/bedroom_0042/rgb_0958.png train/bedroom_0042/depth_0958.png train/bedroom_0042/filled_0958.png +train/bedroom_0050/rgb_0978.png train/bedroom_0050/depth_0978.png train/bedroom_0050/filled_0978.png +train/bedroom_0051/rgb_0984.png train/bedroom_0051/depth_0984.png train/bedroom_0051/filled_0984.png +train/bedroom_0056/rgb_0997.png train/bedroom_0056/depth_0997.png train/bedroom_0056/filled_0997.png +train/bedroom_0060/rgb_1008.png train/bedroom_0060/depth_1008.png train/bedroom_0060/filled_1008.png +train/bedroom_0067/rgb_1029.png train/bedroom_0067/depth_1029.png train/bedroom_0067/filled_1029.png +train/bedroom_0072/rgb_1045.png train/bedroom_0072/depth_1045.png train/bedroom_0072/filled_1045.png +train/bedroom_0072/rgb_1046.png train/bedroom_0072/depth_1046.png train/bedroom_0072/filled_1046.png +train/bedroom_0079/rgb_1062.png train/bedroom_0079/depth_1062.png train/bedroom_0079/filled_1062.png +train/bedroom_0081/rgb_1072.png train/bedroom_0081/depth_1072.png train/bedroom_0081/filled_1072.png +train/bedroom_0096/rgb_1112.png train/bedroom_0096/depth_1112.png train/bedroom_0096/filled_1112.png +train/bedroom_0118/rgb_1173.png train/bedroom_0118/depth_1173.png train/bedroom_0118/filled_1173.png +train/bedroom_0129/rgb_1197.png train/bedroom_0129/depth_1197.png train/bedroom_0129/filled_1197.png +train/bedroom_0136/rgb_0527.png train/bedroom_0136/depth_0527.png train/bedroom_0136/filled_0527.png +train/bookstore_0000/rgb_0105.png train/bookstore_0000/depth_0105.png train/bookstore_0000/filled_0105.png +train/bookstore_0000/rgb_0107.png train/bookstore_0000/depth_0107.png train/bookstore_0000/filled_0107.png +train/bookstore_0002/rgb_0101.png train/bookstore_0002/depth_0101.png train/bookstore_0002/filled_0101.png +train/bookstore_0002/rgb_0103.png train/bookstore_0002/depth_0103.png train/bookstore_0002/filled_0103.png +train/classroom_0010/rgb_0305.png train/classroom_0010/depth_0305.png train/classroom_0010/filled_0305.png +train/classroom_0012/rgb_0309.png train/classroom_0012/depth_0309.png train/classroom_0012/filled_0309.png +train/conference_room_0001/rgb_0339.png train/conference_room_0001/depth_0339.png train/conference_room_0001/filled_0339.png +train/conference_room_0001/rgb_0341.png train/conference_room_0001/depth_0341.png train/conference_room_0001/filled_0341.png +train/dining_room_0002/rgb_1346.png train/dining_room_0002/depth_1346.png train/dining_room_0002/filled_1346.png +train/dining_room_0008/rgb_1363.png train/dining_room_0008/depth_1363.png train/dining_room_0008/filled_1363.png +train/dining_room_0012/rgb_1371.png train/dining_room_0012/depth_1371.png train/dining_room_0012/filled_1371.png +train/dining_room_0014/rgb_1377.png train/dining_room_0014/depth_1377.png train/dining_room_0014/filled_1377.png +train/dining_room_0015/rgb_1379.png train/dining_room_0015/depth_1379.png train/dining_room_0015/filled_1379.png +train/dining_room_0016/rgb_1382.png train/dining_room_0016/depth_1382.png train/dining_room_0016/filled_1382.png +train/dining_room_0031/rgb_1425.png train/dining_room_0031/depth_1425.png train/dining_room_0031/filled_1425.png +train/dining_room_0031/rgb_1426.png train/dining_room_0031/depth_1426.png train/dining_room_0031/filled_1426.png +train/dining_room_0033/rgb_1436.png train/dining_room_0033/depth_1436.png train/dining_room_0033/filled_1436.png +train/dining_room_0037/rgb_0548.png train/dining_room_0037/depth_0548.png train/dining_room_0037/filled_0548.png +train/furniture_store_0001/rgb_0224.png train/furniture_store_0001/depth_0224.png train/furniture_store_0001/filled_0224.png +train/furniture_store_0001/rgb_0237.png train/furniture_store_0001/depth_0237.png train/furniture_store_0001/filled_0237.png +train/furniture_store_0002/rgb_0249.png train/furniture_store_0002/depth_0249.png train/furniture_store_0002/filled_0249.png +train/home_office_0005/rgb_0368.png train/home_office_0005/depth_0368.png train/home_office_0005/filled_0368.png +train/home_office_0006/rgb_0374.png train/home_office_0006/depth_0374.png train/home_office_0006/filled_0374.png +train/home_office_0008/rgb_0380.png train/home_office_0008/depth_0380.png train/home_office_0008/filled_0380.png +train/home_office_0013/rgb_0554.png train/home_office_0013/depth_0554.png train/home_office_0013/filled_0554.png +train/kitchen_0010/rgb_0138.png train/kitchen_0010/depth_0138.png train/kitchen_0010/filled_0138.png +train/kitchen_0019/rgb_0750.png train/kitchen_0019/depth_0750.png train/kitchen_0019/filled_0750.png +train/kitchen_0019/rgb_0757.png train/kitchen_0019/depth_0757.png train/kitchen_0019/filled_0757.png +train/kitchen_0028/rgb_0788.png train/kitchen_0028/depth_0788.png train/kitchen_0028/filled_0788.png +train/kitchen_0028/rgb_0793.png train/kitchen_0028/depth_0793.png train/kitchen_0028/filled_0793.png +train/kitchen_0029/rgb_0799.png train/kitchen_0029/depth_0799.png train/kitchen_0029/filled_0799.png +train/kitchen_0033/rgb_0815.png train/kitchen_0033/depth_0815.png train/kitchen_0033/filled_0815.png +train/kitchen_0033/rgb_0816.png train/kitchen_0033/depth_0816.png train/kitchen_0033/filled_0816.png +train/kitchen_0037/rgb_0832.png train/kitchen_0037/depth_0832.png train/kitchen_0037/filled_0832.png +train/kitchen_0041/rgb_0849.png train/kitchen_0041/depth_0849.png train/kitchen_0041/filled_0849.png +train/kitchen_0047/rgb_0875.png train/kitchen_0047/depth_0875.png train/kitchen_0047/filled_0875.png +train/kitchen_0050/rgb_0887.png train/kitchen_0050/depth_0887.png train/kitchen_0050/filled_0887.png +train/kitchen_0051/rgb_0892.png train/kitchen_0051/depth_0892.png train/kitchen_0051/filled_0892.png +train/kitchen_0051/rgb_0893.png train/kitchen_0051/depth_0893.png train/kitchen_0051/filled_0893.png +train/kitchen_0052/rgb_0899.png train/kitchen_0052/depth_0899.png train/kitchen_0052/filled_0899.png +train/kitchen_0059/rgb_0573.png train/kitchen_0059/depth_0573.png train/kitchen_0059/filled_0573.png +train/living_room_0000/rgb_0050.png train/living_room_0000/depth_0050.png train/living_room_0000/filled_0050.png +train/living_room_0010/rgb_0156.png train/living_room_0010/depth_0156.png train/living_room_0010/filled_0156.png +train/living_room_0010/rgb_0158.png train/living_room_0010/depth_0158.png train/living_room_0010/filled_0158.png +train/living_room_0010/rgb_0159.png train/living_room_0010/depth_0159.png train/living_room_0010/filled_0159.png +train/living_room_0011/rgb_0162.png train/living_room_0011/depth_0162.png train/living_room_0011/filled_0162.png +train/living_room_0019/rgb_0258.png train/living_room_0019/depth_0258.png train/living_room_0019/filled_0258.png +train/living_room_0042/rgb_1251.png train/living_room_0042/depth_1251.png train/living_room_0042/filled_1251.png +train/living_room_0046/rgb_1268.png train/living_room_0046/depth_1268.png train/living_room_0046/filled_1268.png +train/living_room_0047/rgb_1272.png train/living_room_0047/depth_1272.png train/living_room_0047/filled_1272.png +train/living_room_0058/rgb_1301.png train/living_room_0058/depth_1301.png train/living_room_0058/filled_1301.png +train/living_room_0062/rgb_1310.png train/living_room_0062/depth_1310.png train/living_room_0062/filled_1310.png +train/living_room_0063/rgb_1313.png train/living_room_0063/depth_1313.png train/living_room_0063/filled_1313.png +train/living_room_0083/rgb_0588.png train/living_room_0083/depth_0588.png train/living_room_0083/filled_0588.png +train/living_room_0086/rgb_0601.png train/living_room_0086/depth_0601.png train/living_room_0086/filled_0601.png +train/office_0003/rgb_0004.png train/office_0003/depth_0004.png train/office_0003/filled_0004.png +train/office_0023/rgb_0623.png train/office_0023/depth_0623.png train/office_0023/filled_0623.png +train/office_0024/rgb_0627.png train/office_0024/depth_0627.png train/office_0024/filled_0627.png +train/office_kitchen_0003/rgb_0415.png train/office_kitchen_0003/depth_0415.png train/office_kitchen_0003/filled_0415.png +train/playroom_0002/rgb_0418.png train/playroom_0002/depth_0418.png train/playroom_0002/filled_0418.png +train/playroom_0003/rgb_0423.png train/playroom_0003/depth_0423.png train/playroom_0003/filled_0423.png +train/playroom_0003/rgb_0424.png train/playroom_0003/depth_0424.png train/playroom_0003/filled_0424.png +train/playroom_0003/rgb_0425.png train/playroom_0003/depth_0425.png train/playroom_0003/filled_0425.png +train/playroom_0004/rgb_0426.png train/playroom_0004/depth_0426.png train/playroom_0004/filled_0426.png +train/printer_room_0001/rgb_0451.png train/printer_room_0001/depth_0451.png train/printer_room_0001/filled_0451.png +train/reception_room_0001/rgb_0456.png train/reception_room_0001/depth_0456.png train/reception_room_0001/filled_0456.png +train/reception_room_0002/rgb_0459.png train/reception_room_0002/depth_0459.png train/reception_room_0002/filled_0459.png +train/reception_room_0004/rgb_0468.png train/reception_room_0004/depth_0468.png train/reception_room_0004/filled_0468.png +train/student_lounge_0001/rgb_0641.png train/student_lounge_0001/depth_0641.png train/student_lounge_0001/filled_0641.png +train/study_0003/rgb_0478.png train/study_0003/depth_0478.png train/study_0003/filled_0478.png +train/study_0005/rgb_0485.png train/study_0005/depth_0485.png train/study_0005/filled_0485.png +train/study_0008/rgb_0646.png train/study_0008/depth_0646.png train/study_0008/filled_0646.png +train/study_room_0004/rgb_0274.png train/study_room_0004/depth_0274.png train/study_room_0004/filled_0274.png \ No newline at end of file diff --git a/infer.py b/infer.py index 5db976f..4f88721 100644 --- a/infer.py +++ b/infer.py @@ -31,7 +31,7 @@ from tqdm.auto import tqdm from marigold import MarigoldPipeline -from src.util.seed_all import seed_all +from src.util.seeding import seed_all from src.dataset import ( BaseDepthDataset, DatasetMode, diff --git a/requirements++.txt b/requirements++.txt new file mode 100644 index 0000000..492dd06 --- /dev/null +++ b/requirements++.txt @@ -0,0 +1,6 @@ +h5py +opencv-python +tensorboard +wandb +xformers==0.0.21 + diff --git a/script/eval/11_infer_nyu.sh b/script/eval/11_infer_nyu.sh index 193d302..ad36be5 100644 --- a/script/eval/11_infer_nyu.sh +++ b/script/eval/11_infer_nyu.sh @@ -2,12 +2,16 @@ set -e set -x +# Use specified checkpoint path, otherwise, default value +ckpt=${1:-"prs-eth/marigold-v1-0"} +subfolder=${2:-"eval"} python infer.py \ + --checkpoint $ckpt \ --seed 1234 \ --base_data_dir $BASE_DATA_DIR \ --denoise_steps 50 \ --ensemble_size 10 \ --processing_res 0 \ --dataset_config config/dataset/data_nyu_test.yaml \ - --output_dir output/nyu_test/prediction \ + --output_dir output/${subfolder}/nyu_test/prediction \ diff --git a/script/eval/12_eval_nyu.sh b/script/eval/12_eval_nyu.sh index 61f7d30..69e52b9 100644 --- a/script/eval/12_eval_nyu.sh +++ b/script/eval/12_eval_nyu.sh @@ -2,10 +2,11 @@ set -e set -x +subfolder=${1:-"eval"} python eval.py \ --base_data_dir $BASE_DATA_DIR \ --dataset_config config/dataset/data_nyu_test.yaml \ --alignment least_square \ - --prediction_dir output/nyu_test/prediction \ - --output_dir output/nyu_test/eval_metric \ + --prediction_dir output/${subfolder}/nyu_test/prediction \ + --output_dir output/${subfolder}/nyu_test/eval_metric \ diff --git a/script/eval/21_infer_kitti.sh b/script/eval/21_infer_kitti.sh index 924ccf6..0eb6634 100644 --- a/script/eval/21_infer_kitti.sh +++ b/script/eval/21_infer_kitti.sh @@ -2,12 +2,16 @@ set -e set -x +# Use specified checkpoint path, otherwise, default value +ckpt=${1:-"prs-eth/marigold-v1-0"} +subfolder=${2:-"eval"} -python infer.py \ +python infer.py \ + --checkpoint $ckpt \ --seed 1234 \ --base_data_dir $BASE_DATA_DIR \ --denoise_steps 50 \ --ensemble_size 10 \ --processing_res 0 \ --dataset_config config/dataset/data_kitti_eigen_test.yaml \ - --output_dir output/kitti_eigen_test/prediction \ + --output_dir output/${subfolder}/kitti_eigen_test/prediction \ diff --git a/script/eval/22_eval_kitti.sh b/script/eval/22_eval_kitti.sh index 42793d2..69828e7 100644 --- a/script/eval/22_eval_kitti.sh +++ b/script/eval/22_eval_kitti.sh @@ -2,10 +2,11 @@ set -e set -x +subfolder=${1:-"eval"} python eval.py \ --base_data_dir $BASE_DATA_DIR \ --dataset_config config/dataset/data_kitti_eigen_test.yaml \ --alignment least_square \ - --prediction_dir output/kitti_eigen_test/prediction \ - --output_dir output/kitti_eigen_test/eval_metric \ + --prediction_dir output/${subfolder}/kitti_eigen_test/prediction \ + --output_dir output/${subfolder}/kitti_eigen_test/eval_metric \ diff --git a/script/eval/31_infer_eth3d.sh b/script/eval/31_infer_eth3d.sh index ff7ea2f..1dc7efa 100644 --- a/script/eval/31_infer_eth3d.sh +++ b/script/eval/31_infer_eth3d.sh @@ -2,13 +2,17 @@ set -e set -x +# Use specified checkpoint path, otherwise, default value +ckpt=${1:-"prs-eth/marigold-v1-0"} +subfolder=${2:-"eval"} -python infer.py \ +python infer.py \ + --checkpoint $ckpt \ --seed 1234 \ --base_data_dir $BASE_DATA_DIR \ --denoise_steps 50 \ --ensemble_size 10 \ --dataset_config config/dataset/data_eth3d.yaml \ - --output_dir output/eth3d/prediction \ + --output_dir output/${subfolder}/eth3d/prediction \ --processing_res 756 \ --resample_method bilinear \ \ No newline at end of file diff --git a/script/eval/32_eval_eth3d.sh b/script/eval/32_eval_eth3d.sh index a1aa554..f25c346 100644 --- a/script/eval/32_eval_eth3d.sh +++ b/script/eval/32_eval_eth3d.sh @@ -2,11 +2,12 @@ set -e set -x +subfolder=${1:-"eval"} python eval.py \ --base_data_dir $BASE_DATA_DIR \ --dataset_config config/dataset/data_eth3d.yaml \ --alignment least_square \ - --prediction_dir output/eth3d/prediction \ - --output_dir output/eth3d/eval_metric \ + --prediction_dir output/${subfolder}/eth3d/prediction \ + --output_dir output/${subfolder}/eth3d/eval_metric \ --alignment_max_res 1024 \ \ No newline at end of file diff --git a/script/eval/41_infer_scannet.sh b/script/eval/41_infer_scannet.sh index 0734bae..15006ea 100644 --- a/script/eval/41_infer_scannet.sh +++ b/script/eval/41_infer_scannet.sh @@ -2,12 +2,16 @@ set -e set -x +# Use specified checkpoint path, otherwise, default value +ckpt=${1:-"prs-eth/marigold-v1-0"} +subfolder=${2:-"eval"} -python infer.py \ +python infer.py \ + --checkpoint $ckpt \ --seed 1234 \ --base_data_dir $BASE_DATA_DIR \ --denoise_steps 50 \ --ensemble_size 10 \ --processing_res 0 \ --dataset_config config/dataset/data_scannet_val.yaml \ - --output_dir output/scannet/prediction \ + --output_dir output/${subfolder}/scannet/prediction \ diff --git a/script/eval/42_eval_scannet.sh b/script/eval/42_eval_scannet.sh index ea78c9a..da4c784 100644 --- a/script/eval/42_eval_scannet.sh +++ b/script/eval/42_eval_scannet.sh @@ -2,10 +2,11 @@ set -e set -x +subfolder=${1:-"eval"} python eval.py \ --base_data_dir $BASE_DATA_DIR \ --dataset_config config/dataset/data_scannet_val.yaml \ --alignment least_square \ - --prediction_dir output/scannet/prediction \ - --output_dir output/scannet/eval_metric \ + --prediction_dir output/${subfolder}/scannet/prediction \ + --output_dir output/${subfolder}/scannet/eval_metric \ diff --git a/script/eval/51_infer_diode.sh b/script/eval/51_infer_diode.sh index d0c9fca..2ec6cc2 100644 --- a/script/eval/51_infer_diode.sh +++ b/script/eval/51_infer_diode.sh @@ -2,13 +2,17 @@ set -e set -x +# Use specified checkpoint path, otherwise, default value +ckpt=${1:-"prs-eth/marigold-v1-0"} +subfolder=${2:-"eval"} -python infer.py \ +python infer.py \ + --checkpoint $ckpt \ --seed 1234 \ --base_data_dir $BASE_DATA_DIR \ --denoise_steps 50 \ --ensemble_size 10 \ --dataset_config config/dataset/data_diode_all.yaml \ - --output_dir output/diode/prediction \ + --output_dir output/${subfolder}/diode/prediction \ --processing_res 640 \ --resample_method bilinear \ diff --git a/script/eval/52_eval_diode.sh b/script/eval/52_eval_diode.sh index 9674353..c10c672 100644 --- a/script/eval/52_eval_diode.sh +++ b/script/eval/52_eval_diode.sh @@ -2,10 +2,11 @@ set -e set -x +subfolder=${1:-"eval"} python eval.py \ --base_data_dir $BASE_DATA_DIR \ --dataset_config config/dataset/data_diode_all.yaml \ --alignment least_square \ - --prediction_dir output/diode/prediction \ - --output_dir output/diode/eval_metric \ + --prediction_dir output/${subfolder}/diode/prediction \ + --output_dir output/${subfolder}/diode/eval_metric \ diff --git a/src/dataset/__init__.py b/src/dataset/__init__.py index dd7d502..6c57add 100644 --- a/src/dataset/__init__.py +++ b/src/dataset/__init__.py @@ -1,17 +1,40 @@ -# Author: Bingxin Ke -# Last modified: 2024-03-30 +# Last modified: 2024-04-16 +# +# Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -------------------------------------------------------------------------- +# If you find this code useful, we kindly ask you to cite our paper in your work. +# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation +# If you use or adapt this code, please attribute to https://github.com/prs-eth/marigold. +# More information about the method can be found at https://marigoldmonodepth.github.io +# -------------------------------------------------------------------------- import os from .base_depth_dataset import BaseDepthDataset, get_pred_name, DatasetMode # noqa: F401 from .diode_dataset import DIODEDataset from .eth3d_dataset import ETH3DDataset +from .hypersim_dataset import HypersimDataset from .kitti_dataset import KITTIDataset from .nyu_dataset import NYUDataset from .scannet_dataset import ScanNetDataset +from .vkitti_dataset import VirtualKITTIDataset dataset_name_class_dict = { + "hypersim": HypersimDataset, + "vkitti": VirtualKITTIDataset, "nyu_v2": NYUDataset, "kitti": KITTIDataset, "eth3d": ETH3DDataset, @@ -23,7 +46,14 @@ def get_dataset( cfg_data_split, base_data_dir: str, mode: DatasetMode, **kwargs ) -> BaseDepthDataset: - if cfg_data_split.name in dataset_name_class_dict.keys(): + if "mixed" == cfg_data_split.name: + assert DatasetMode.TRAIN == mode, "Only training mode supports mixed datasets." + dataset_ls = [ + get_dataset(_cfg, base_data_dir, mode, **kwargs) + for _cfg in cfg_data_split.dataset_list + ] + return dataset_ls + elif cfg_data_split.name in dataset_name_class_dict.keys(): dataset_class = dataset_name_class_dict[cfg_data_split.name] dataset = dataset_class( mode=mode, diff --git a/src/dataset/base_depth_dataset.py b/src/dataset/base_depth_dataset.py index 878c03a..11efb7f 100644 --- a/src/dataset/base_depth_dataset.py +++ b/src/dataset/base_depth_dataset.py @@ -1,11 +1,31 @@ -# Author: Bingxin Ke -# Last modified: 2024-04-15 +# Last modified: 2024-04-30 +# +# Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -------------------------------------------------------------------------- +# If you find this code useful, we kindly ask you to cite our paper in your work. +# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation +# If you use or adapt this code, please attribute to https://github.com/prs-eth/marigold. +# More information about the method can be found at https://marigoldmonodepth.github.io +# -------------------------------------------------------------------------- import io import os import random import tarfile from enum import Enum +from typing import Union import numpy as np import torch @@ -13,6 +33,8 @@ from torch.utils.data import Dataset from torchvision.transforms import InterpolationMode, Resize +from src.util.depth_transform import DepthNormalizerBase + class DatasetMode(Enum): RGB_ONLY = "rgb_only" @@ -20,6 +42,15 @@ class DatasetMode(Enum): TRAIN = "train" +class DepthFileNameMode(Enum): + """Prediction file naming modes""" + + id = 1 # id.png + rgb_id = 2 # rgb_id.png + i_d_rgb = 3 # i_d_1_rgb.png + rgb_i_d = 4 + + def read_image_from_tar(tar_obj, img_rel_path): image = tar_obj.extractfile("./" + img_rel_path) image = image.read() @@ -33,11 +64,11 @@ def __init__( filename_ls_path: str, dataset_dir: str, disp_name: str, - min_depth, - max_depth, - has_filled_depth, - name_mode, - depth_transform=None, + min_depth: float, + max_depth: float, + has_filled_depth: bool, + name_mode: DepthFileNameMode, + depth_transform: Union[DepthNormalizerBase, None] = None, augmentation_args: dict = None, resize_to_hw=None, move_invalid_to_far_plane: bool = True, @@ -49,6 +80,9 @@ def __init__( # dataset info self.filename_ls_path = filename_ls_path self.dataset_dir = dataset_dir + assert os.path.exists( + self.dataset_dir + ), f"Dataset does not exist at: {self.dataset_dir}" self.disp_name = disp_name self.has_filled_depth = has_filled_depth self.name_mode: DepthFileNameMode = name_mode @@ -56,7 +90,7 @@ def __init__( self.max_depth = max_depth # training arguments - self.depth_transform = depth_transform + self.depth_transform: DepthNormalizerBase = depth_transform self.augm_args = augmentation_args self.resize_to_hw = resize_to_hw self.rgb_transform = rgb_transform @@ -118,9 +152,11 @@ def _get_data_item(self, index): def _load_rgb_data(self, rgb_rel_path): # Read RGB data rgb = self._read_rgb_file(rgb_rel_path) + rgb_norm = rgb / 255.0 * 2.0 - 1.0 # [0, 255] -> [-1, 1] outputs = { "rgb_int": torch.from_numpy(rgb).int(), + "rgb_norm": torch.from_numpy(rgb_norm).float(), } return outputs @@ -157,12 +193,12 @@ def _read_image(self, img_rel_path) -> np.ndarray: if self.is_tar: if self.tar_obj is None: self.tar_obj = tarfile.open(self.dataset_dir) - image = self.tar_obj.extractfile("./" + img_rel_path) - image = image.read() - image = Image.open(io.BytesIO(image)) # [H, W, rgb] + image_to_read = self.tar_obj.extractfile("./" + img_rel_path) + image_to_read = image_to_read.read() + image_to_read = io.BytesIO(image_to_read) else: - img_path = os.path.join(self.dataset_dir, img_rel_path) - image = Image.open(img_path) + image_to_read = os.path.join(self.dataset_dir, img_rel_path) + image = Image.open(image_to_read) # [H, W, rgb] image = np.asarray(image) return image @@ -226,19 +262,11 @@ def _augment_data(self, rasters_dict): return rasters_dict def __del__(self): - if self.tar_obj is not None: + if hasattr(self, "tar_obj") and self.tar_obj is not None: self.tar_obj.close() self.tar_obj = None -# Prediction file naming modes -class DepthFileNameMode(Enum): - id = 1 # id.png - rgb_id = 2 # rgb_id.png - i_d_rgb = 3 # i_d_1_rgb.png - rgb_i_d = 4 - - def get_pred_name(rgb_basename, name_mode, suffix=".png"): if DepthFileNameMode.rgb_id == name_mode: pred_basename = "pred_" + rgb_basename.split("_")[1] diff --git a/src/dataset/diode_dataset.py b/src/dataset/diode_dataset.py index 81dc62f..509fb10 100644 --- a/src/dataset/diode_dataset.py +++ b/src/dataset/diode_dataset.py @@ -1,5 +1,24 @@ -# Author: Bingxin Ke # Last modified: 2024-02-26 +# +# Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -------------------------------------------------------------------------- +# If you find this code useful, we kindly ask you to cite our paper in your work. +# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation +# If you use or adapt this code, please attribute to https://github.com/prs-eth/marigold. +# More information about the method can be found at https://marigoldmonodepth.github.io +# -------------------------------------------------------------------------- import os import tarfile diff --git a/src/dataset/eth3d_dataset.py b/src/dataset/eth3d_dataset.py index 34acce0..02810b8 100644 --- a/src/dataset/eth3d_dataset.py +++ b/src/dataset/eth3d_dataset.py @@ -1,5 +1,24 @@ -# Author: Bingxin Ke # Last modified: 2024-02-08 +# +# Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -------------------------------------------------------------------------- +# If you find this code useful, we kindly ask you to cite our paper in your work. +# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation +# If you use or adapt this code, please attribute to https://github.com/prs-eth/marigold. +# More information about the method can be found at https://marigoldmonodepth.github.io +# -------------------------------------------------------------------------- import torch import tarfile diff --git a/src/dataset/hypersim_dataset.py b/src/dataset/hypersim_dataset.py new file mode 100644 index 0000000..886a2be --- /dev/null +++ b/src/dataset/hypersim_dataset.py @@ -0,0 +1,45 @@ +# Last modified: 2024-02-08 +# +# Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -------------------------------------------------------------------------- +# If you find this code useful, we kindly ask you to cite our paper in your work. +# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation +# If you use or adapt this code, please attribute to https://github.com/prs-eth/marigold. +# More information about the method can be found at https://marigoldmonodepth.github.io +# -------------------------------------------------------------------------- + + +from .base_depth_dataset import BaseDepthDataset, DepthFileNameMode + + +class HypersimDataset(BaseDepthDataset): + def __init__( + self, + **kwargs, + ) -> None: + super().__init__( + # Hypersim data parameter + min_depth=1e-5, + max_depth=65.0, + has_filled_depth=False, + name_mode=DepthFileNameMode.rgb_i_d, + **kwargs, + ) + + def _read_depth_file(self, rel_path): + depth_in = self._read_image(rel_path) + # Decode Hypersim depth + depth_decoded = depth_in / 1000.0 + return depth_decoded diff --git a/src/dataset/kitti_dataset.py b/src/dataset/kitti_dataset.py index 7cde0bb..5daa760 100644 --- a/src/dataset/kitti_dataset.py +++ b/src/dataset/kitti_dataset.py @@ -1,5 +1,24 @@ -# Author: Bingxin Ke # Last modified: 2024-02-08 +# +# Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -------------------------------------------------------------------------- +# If you find this code useful, we kindly ask you to cite our paper in your work. +# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation +# If you use or adapt this code, please attribute to https://github.com/prs-eth/marigold. +# More information about the method can be found at https://marigoldmonodepth.github.io +# -------------------------------------------------------------------------- import torch diff --git a/src/dataset/mixed_sampler.py b/src/dataset/mixed_sampler.py new file mode 100644 index 0000000..3abc60f --- /dev/null +++ b/src/dataset/mixed_sampler.py @@ -0,0 +1,149 @@ +# Last modified: 2024-04-18 +# +# Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -------------------------------------------------------------------------- +# If you find this code useful, we kindly ask you to cite our paper in your work. +# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation +# If you use or adapt this code, please attribute to https://github.com/prs-eth/marigold. +# More information about the method can be found at https://marigoldmonodepth.github.io +# -------------------------------------------------------------------------- + +import torch +from torch.utils.data import ( + BatchSampler, + RandomSampler, + SequentialSampler, +) + + +class MixedBatchSampler(BatchSampler): + """Sample one batch from a selected dataset with given probability. + Compatible with datasets at different resolution + """ + + def __init__( + self, src_dataset_ls, batch_size, drop_last, shuffle, prob=None, generator=None + ): + self.base_sampler = None + self.batch_size = batch_size + self.shuffle = shuffle + self.drop_last = drop_last + self.generator = generator + + self.src_dataset_ls = src_dataset_ls + self.n_dataset = len(self.src_dataset_ls) + + # Dataset length + self.dataset_length = [len(ds) for ds in self.src_dataset_ls] + self.cum_dataset_length = [ + sum(self.dataset_length[:i]) for i in range(self.n_dataset) + ] # cumulative dataset length + + # BatchSamplers for each source dataset + if self.shuffle: + self.src_batch_samplers = [ + BatchSampler( + sampler=RandomSampler( + ds, replacement=False, generator=self.generator + ), + batch_size=self.batch_size, + drop_last=self.drop_last, + ) + for ds in self.src_dataset_ls + ] + else: + self.src_batch_samplers = [ + BatchSampler( + sampler=SequentialSampler(ds), + batch_size=self.batch_size, + drop_last=self.drop_last, + ) + for ds in self.src_dataset_ls + ] + self.raw_batches = [ + list(bs) for bs in self.src_batch_samplers + ] # index in original dataset + self.n_batches = [len(b) for b in self.raw_batches] + self.n_total_batch = sum(self.n_batches) + + # sampling probability + if prob is None: + # if not given, decide by dataset length + self.prob = torch.tensor(self.n_batches) / self.n_total_batch + else: + self.prob = torch.as_tensor(prob) + + def __iter__(self): + """_summary_ + + Yields: + list(int): a batch of indics, corresponding to ConcatDataset of src_dataset_ls + """ + for _ in range(self.n_total_batch): + idx_ds = torch.multinomial( + self.prob, 1, replacement=True, generator=self.generator + ).item() + # if batch list is empty, generate new list + if 0 == len(self.raw_batches[idx_ds]): + self.raw_batches[idx_ds] = list(self.src_batch_samplers[idx_ds]) + # get a batch from list + batch_raw = self.raw_batches[idx_ds].pop() + # shift by cumulative dataset length + shift = self.cum_dataset_length[idx_ds] + batch = [n + shift for n in batch_raw] + + yield batch + + def __len__(self): + return self.n_total_batch + + +# Unit test +if "__main__" == __name__: + from torch.utils.data import ConcatDataset, DataLoader, Dataset + + class SimpleDataset(Dataset): + def __init__(self, start, len) -> None: + super().__init__() + self.start = start + self.len = len + + def __len__(self): + return self.len + + def __getitem__(self, index): + return self.start + index + + dataset_1 = SimpleDataset(0, 10) + dataset_2 = SimpleDataset(200, 20) + dataset_3 = SimpleDataset(1000, 50) + + concat_dataset = ConcatDataset( + [dataset_1, dataset_2, dataset_3] + ) # will directly concatenate + + mixed_sampler = MixedBatchSampler( + src_dataset_ls=[dataset_1, dataset_2, dataset_3], + batch_size=4, + drop_last=True, + shuffle=False, + prob=[0.6, 0.3, 0.1], + generator=torch.Generator().manual_seed(0), + ) + + loader = DataLoader(concat_dataset, batch_sampler=mixed_sampler) + + for d in loader: + print(d) diff --git a/src/dataset/nyu_dataset.py b/src/dataset/nyu_dataset.py index e073c4c..e3f1e80 100644 --- a/src/dataset/nyu_dataset.py +++ b/src/dataset/nyu_dataset.py @@ -1,6 +1,24 @@ -# Author: Bingxin Ke # Last modified: 2024-02-08 - +# +# Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -------------------------------------------------------------------------- +# If you find this code useful, we kindly ask you to cite our paper in your work. +# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation +# If you use or adapt this code, please attribute to https://github.com/prs-eth/marigold. +# More information about the method can be found at https://marigoldmonodepth.github.io +# -------------------------------------------------------------------------- import torch diff --git a/src/dataset/scannet_dataset.py b/src/dataset/scannet_dataset.py index 251ba66..c401f92 100644 --- a/src/dataset/scannet_dataset.py +++ b/src/dataset/scannet_dataset.py @@ -1,5 +1,24 @@ -# Author: Bingxin Ke # Last modified: 2024-02-08 +# +# Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -------------------------------------------------------------------------- +# If you find this code useful, we kindly ask you to cite our paper in your work. +# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation +# If you use or adapt this code, please attribute to https://github.com/prs-eth/marigold. +# More information about the method can be found at https://marigoldmonodepth.github.io +# -------------------------------------------------------------------------- from .base_depth_dataset import BaseDepthDataset, DepthFileNameMode diff --git a/src/dataset/vkitti_dataset.py b/src/dataset/vkitti_dataset.py new file mode 100644 index 0000000..ceb7903 --- /dev/null +++ b/src/dataset/vkitti_dataset.py @@ -0,0 +1,98 @@ +# Last modified: 2024-02-08 +# +# Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -------------------------------------------------------------------------- +# If you find this code useful, we kindly ask you to cite our paper in your work. +# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation +# If you use or adapt this code, please attribute to https://github.com/prs-eth/marigold. +# More information about the method can be found at https://marigoldmonodepth.github.io +# -------------------------------------------------------------------------- + +import torch + +from .base_depth_dataset import BaseDepthDataset, DepthFileNameMode +from .kitti_dataset import KITTIDataset + + +class VirtualKITTIDataset(BaseDepthDataset): + def __init__( + self, + kitti_bm_crop, # Crop to KITTI benchmark size + valid_mask_crop, # Evaluation mask. [None, garg or eigen] + **kwargs, + ) -> None: + super().__init__( + # virtual KITTI data parameter + min_depth=1e-5, + max_depth=80, # 655.35 + has_filled_depth=False, + name_mode=DepthFileNameMode.id, + **kwargs, + ) + self.kitti_bm_crop = kitti_bm_crop + self.valid_mask_crop = valid_mask_crop + assert self.valid_mask_crop in [ + None, + "garg", # set evaluation mask according to Garg ECCV16 + "eigen", # set evaluation mask according to Eigen NIPS14 + ], f"Unknown crop type: {self.valid_mask_crop}" + + # Filter out empty depth + self.filenames = [f for f in self.filenames if "None" != f[1]] + + def _read_depth_file(self, rel_path): + depth_in = self._read_image(rel_path) + # Decode vKITTI depth + depth_decoded = depth_in / 100.0 + return depth_decoded + + def _load_rgb_data(self, rgb_rel_path): + rgb_data = super()._load_rgb_data(rgb_rel_path) + if self.kitti_bm_crop: + rgb_data = { + k: KITTIDataset.kitti_benchmark_crop(v) for k, v in rgb_data.items() + } + return rgb_data + + def _load_depth_data(self, depth_rel_path, filled_rel_path): + depth_data = super()._load_depth_data(depth_rel_path, filled_rel_path) + if self.kitti_bm_crop: + depth_data = { + k: KITTIDataset.kitti_benchmark_crop(v) for k, v in depth_data.items() + } + return depth_data + + def _get_valid_mask(self, depth: torch.Tensor): + # reference: https://github.com/cleinc/bts/blob/master/pytorch/bts_eval.py + valid_mask = super()._get_valid_mask(depth) # [1, H, W] + + if self.valid_mask_crop is not None: + eval_mask = torch.zeros_like(valid_mask.squeeze()).bool() + gt_height, gt_width = eval_mask.shape + + if "garg" == self.valid_mask_crop: + eval_mask[ + int(0.40810811 * gt_height) : int(0.99189189 * gt_height), + int(0.03594771 * gt_width) : int(0.96405229 * gt_width), + ] = 1 + elif "eigen" == self.valid_mask_crop: + eval_mask[ + int(0.3324324 * gt_height) : int(0.91351351 * gt_height), + int(0.0359477 * gt_width) : int(0.96405229 * gt_width), + ] = 1 + + eval_mask.reshape(valid_mask.shape) + valid_mask = torch.logical_and(valid_mask, eval_mask) + return valid_mask diff --git a/src/trainer/__init__.py b/src/trainer/__init__.py new file mode 100644 index 0000000..435ea49 --- /dev/null +++ b/src/trainer/__init__.py @@ -0,0 +1,13 @@ +# Author: Bingxin Ke +# Last modified: 2024-05-17 + +from .marigold_trainer import MarigoldTrainer + + +trainer_cls_name_dict = { + "MarigoldTrainer": MarigoldTrainer, +} + + +def get_trainer_cls(trainer_name): + return trainer_cls_name_dict[trainer_name] diff --git a/src/trainer/marigold_trainer.py b/src/trainer/marigold_trainer.py new file mode 100644 index 0000000..a1596f8 --- /dev/null +++ b/src/trainer/marigold_trainer.py @@ -0,0 +1,674 @@ +# An official reimplemented version of Marigold training script. +# Last modified: 2024-04-29 +# +# Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -------------------------------------------------------------------------- +# If you find this code useful, we kindly ask you to cite our paper in your work. +# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation +# If you use or adapt this code, please attribute to https://github.com/prs-eth/marigold. +# More information about the method can be found at https://marigoldmonodepth.github.io +# -------------------------------------------------------------------------- + + +import logging +import os +import shutil +from datetime import datetime +from typing import List, Union + +import numpy as np +import torch +from diffusers import DDPMScheduler +from omegaconf import OmegaConf +from torch.nn import Conv2d +from torch.nn.parameter import Parameter +from torch.optim import Adam +from torch.optim.lr_scheduler import LambdaLR +from torch.utils.data import DataLoader +from tqdm import tqdm +from PIL import Image + +from marigold.marigold_pipeline import MarigoldPipeline, MarigoldDepthOutput +from src.util import metric +from src.util.data_loader import skip_first_batches +from src.util.logging_util import tb_logger, eval_dic_to_text +from src.util.loss import get_loss +from src.util.lr_scheduler import IterExponential +from src.util.metric import MetricTracker +from src.util.multi_res_noise import multi_res_noise_like +from src.util.alignment import align_depth_least_square +from src.util.seeding import generate_seed_sequence + + +class MarigoldTrainer: + def __init__( + self, + cfg: OmegaConf, + model: MarigoldPipeline, + train_dataloader: DataLoader, + device, + base_ckpt_dir, + out_dir_ckpt, + out_dir_eval, + out_dir_vis, + accumulation_steps: int, + val_dataloaders: List[DataLoader] = None, + vis_dataloaders: List[DataLoader] = None, + ): + self.cfg: OmegaConf = cfg + self.model: MarigoldPipeline = model + self.device = device + self.seed: Union[int, None] = ( + self.cfg.trainer.init_seed + ) # used to generate seed sequence, set to `None` to train w/o seeding + self.out_dir_ckpt = out_dir_ckpt + self.out_dir_eval = out_dir_eval + self.out_dir_vis = out_dir_vis + self.train_loader: DataLoader = train_dataloader + self.val_loaders: List[DataLoader] = val_dataloaders + self.vis_loaders: List[DataLoader] = vis_dataloaders + self.accumulation_steps: int = accumulation_steps + + # Adapt input layers + if 8 != self.model.unet.config["in_channels"]: + self._replace_unet_conv_in() + + # Encode empty text prompt + self.model.encode_empty_text() + self.empty_text_embed = self.model.empty_text_embed.detach().clone().to(device) + + self.model.unet.enable_xformers_memory_efficient_attention() + + # Trainability + self.model.vae.requires_grad_(False) + self.model.text_encoder.requires_grad_(False) + self.model.unet.requires_grad_(True) + + # Optimizer !should be defined after input layer is adapted + lr = self.cfg.lr + self.optimizer = Adam(self.model.unet.parameters(), lr=lr) + + # LR scheduler + lr_func = IterExponential( + total_iter_length=self.cfg.lr_scheduler.kwargs.total_iter, + final_ratio=self.cfg.lr_scheduler.kwargs.final_ratio, + warmup_steps=self.cfg.lr_scheduler.kwargs.warmup_steps, + ) + self.lr_scheduler = LambdaLR(optimizer=self.optimizer, lr_lambda=lr_func) + + # Loss + self.loss = get_loss(loss_name=self.cfg.loss.name, **self.cfg.loss.kwargs) + + # Training noise scheduler + self.training_noise_scheduler: DDPMScheduler = DDPMScheduler.from_pretrained( + os.path.join( + base_ckpt_dir, + cfg.trainer.training_noise_scheduler.pretrained_path, + "scheduler", + ) + ) + self.prediction_type = self.training_noise_scheduler.config.prediction_type + assert ( + self.prediction_type == self.model.scheduler.config.prediction_type + ), "Different prediction types" + self.scheduler_timesteps = ( + self.training_noise_scheduler.config.num_train_timesteps + ) + + # Eval metrics + self.metric_funcs = [getattr(metric, _met) for _met in cfg.eval.eval_metrics] + self.train_metrics = MetricTracker(*["loss"]) + self.val_metrics = MetricTracker(*[m.__name__ for m in self.metric_funcs]) + # main metric for best checkpoint saving + self.main_val_metric = cfg.validation.main_val_metric + self.main_val_metric_goal = cfg.validation.main_val_metric_goal + assert ( + self.main_val_metric in cfg.eval.eval_metrics + ), f"Main eval metric `{self.main_val_metric}` not found in evaluation metrics." + self.best_metric = 1e8 if "minimize" == self.main_val_metric_goal else -1e8 + + # Settings + self.max_epoch = self.cfg.max_epoch + self.max_iter = self.cfg.max_iter + self.gradient_accumulation_steps = accumulation_steps + self.gt_depth_type = self.cfg.gt_depth_type + self.gt_mask_type = self.cfg.gt_mask_type + self.save_period = self.cfg.trainer.save_period + self.backup_period = self.cfg.trainer.backup_period + self.val_period = self.cfg.trainer.validation_period + self.vis_period = self.cfg.trainer.visualization_period + + # Multi-resolution noise + self.apply_multi_res_noise = self.cfg.multi_res_noise is not None + if self.apply_multi_res_noise: + self.mr_noise_strength = self.cfg.multi_res_noise.strength + self.annealed_mr_noise = self.cfg.multi_res_noise.annealed + self.mr_noise_downscale_strategy = ( + self.cfg.multi_res_noise.downscale_strategy + ) + + # Internal variables + self.epoch = 1 + self.n_batch_in_epoch = 0 # batch index in the epoch, used when resume training + self.effective_iter = 0 # how many times optimizer.step() is called + self.in_evaluation = False + self.global_seed_sequence: List = [] # consistent global seed sequence, used to seed random generator, to ensure consistency when resuming + + def _replace_unet_conv_in(self): + # replace the first layer to accept 8 in_channels + _weight = self.model.unet.conv_in.weight.clone() # [320, 4, 3, 3] + _bias = self.model.unet.conv_in.bias.clone() # [320] + _weight = _weight.repeat((1, 2, 1, 1)) # Keep selected channel(s) + # half the activation magnitude + _weight *= 0.5 + # new conv_in channel + _n_convin_out_channel = self.model.unet.conv_in.out_channels + _new_conv_in = Conv2d( + 8, _n_convin_out_channel, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) + ) + _new_conv_in.weight = Parameter(_weight) + _new_conv_in.bias = Parameter(_bias) + self.model.unet.conv_in = _new_conv_in + logging.info("Unet conv_in layer is replaced") + # replace config + self.model.unet.config["in_channels"] = 8 + logging.info("Unet config is updated") + return + + def train(self, t_end=None): + logging.info("Start training") + + device = self.device + self.model.to(device) + + if self.in_evaluation: + logging.info( + "Last evaluation was not finished, will do evaluation before continue training." + ) + self.validate() + + self.train_metrics.reset() + accumulated_step = 0 + + for epoch in range(self.epoch, self.max_epoch + 1): + self.epoch = epoch + logging.debug(f"epoch: {self.epoch}") + + # Skip previous batches when resume + for batch in skip_first_batches(self.train_loader, self.n_batch_in_epoch): + self.model.unet.train() + + # globally consistent random generators + if self.seed is not None: + local_seed = self._get_next_seed() + rand_num_generator = torch.Generator(device=device) + rand_num_generator.manual_seed(local_seed) + else: + rand_num_generator = None + + # >>> With gradient accumulation >>> + + # Get data + rgb = batch["rgb_norm"].to(device) + depth_gt_for_latent = batch[self.gt_depth_type].to(device) + + if self.gt_mask_type is not None: + valid_mask_for_latent = batch[self.gt_mask_type].to(device) + invalid_mask = ~valid_mask_for_latent + valid_mask_down = ~torch.max_pool2d( + invalid_mask.float(), 8, 8 + ).bool() + valid_mask_down = valid_mask_down.repeat((1, 4, 1, 1)) + else: + raise NotImplementedError + + batch_size = rgb.shape[0] + + with torch.no_grad(): + # Encode image + rgb_latent = self.model.encode_rgb(rgb) # [B, 4, h, w] + # Encode GT depth + gt_depth_latent = self.encode_depth( + depth_gt_for_latent + ) # [B, 4, h, w] + + # Sample a random timestep for each image + timesteps = torch.randint( + 0, + self.scheduler_timesteps, + (batch_size,), + device=device, + generator=rand_num_generator, + ).long() # [B] + + # Sample noise + if self.apply_multi_res_noise: + strength = self.mr_noise_strength + if self.annealed_mr_noise: + # calculate strength depending on t + strength = strength * (timesteps / self.scheduler_timesteps) + noise = multi_res_noise_like( + gt_depth_latent, + strength=strength, + downscale_strategy=self.mr_noise_downscale_strategy, + generator=rand_num_generator, + device=device, + ) + else: + noise = torch.randn( + gt_depth_latent.shape, + device=device, + generator=rand_num_generator, + ) # [B, 4, h, w] + + # Add noise to the latents (diffusion forward process) + noisy_latents = self.training_noise_scheduler.add_noise( + gt_depth_latent, noise, timesteps + ) # [B, 4, h, w] + + # Text embedding + text_embed = self.empty_text_embed.to(device).repeat( + (batch_size, 1, 1) + ) # [B, 77, 1024] + + # Concat rgb and depth latents + cat_latents = torch.cat( + [rgb_latent, noisy_latents], dim=1 + ) # [B, 8, h, w] + cat_latents = cat_latents.float() + + # Predict the noise residual + model_pred = self.model.unet( + cat_latents, timesteps, text_embed + ).sample # [B, 4, h, w] + if torch.isnan(model_pred).any(): + logging.warning("model_pred contains NaN.") + + # Get the target for loss depending on the prediction type + if "sample" == self.prediction_type: + target = gt_depth_latent + elif "epsilon" == self.prediction_type: + target = noise + elif "v_prediction" == self.prediction_type: + target = self.training_noise_scheduler.get_velocity( + gt_depth_latent, noise, timesteps + ) # [B, 4, h, w] + else: + raise ValueError(f"Unknown prediction type {self.prediction_type}") + + # Masked latent loss + if self.gt_mask_type is not None: + latent_loss = self.loss( + model_pred[valid_mask_down].float(), + target[valid_mask_down].float(), + ) + else: + latent_loss = self.loss(model_pred.float(), target.float()) + + loss = latent_loss.mean() + + self.train_metrics.update("loss", loss.item()) + + loss = loss / self.gradient_accumulation_steps + loss.backward() + accumulated_step += 1 + + self.n_batch_in_epoch += 1 + # Practical batch end + + # Perform optimization step + if accumulated_step >= self.gradient_accumulation_steps: + self.optimizer.step() + self.lr_scheduler.step() + self.optimizer.zero_grad() + accumulated_step = 0 + + self.effective_iter += 1 + + # Log to tensorboard + accumulated_loss = self.train_metrics.result()["loss"] + tb_logger.log_dic( + { + f"train/{k}": v + for k, v in self.train_metrics.result().items() + }, + global_step=self.effective_iter, + ) + tb_logger.writer.add_scalar( + "lr", + self.lr_scheduler.get_last_lr()[0], + global_step=self.effective_iter, + ) + tb_logger.writer.add_scalar( + "n_batch_in_epoch", + self.n_batch_in_epoch, + global_step=self.effective_iter, + ) + logging.info( + f"iter {self.effective_iter:5d} (epoch {epoch:2d}): loss={accumulated_loss:.5f}" + ) + self.train_metrics.reset() + + # Per-step callback + self._train_step_callback() + + # End of training + if self.max_iter > 0 and self.effective_iter >= self.max_iter: + self.save_checkpoint( + ckpt_name=self._get_backup_ckpt_name(), + save_train_state=False, + ) + logging.info("Training ended.") + return + # Time's up + elif t_end is not None and datetime.now() >= t_end: + self.save_checkpoint(ckpt_name="latest", save_train_state=True) + logging.info("Time is up, training paused.") + return + + torch.cuda.empty_cache() + # <<< Effective batch end <<< + + # Epoch end + self.n_batch_in_epoch = 0 + + def encode_depth(self, depth_in): + # stack depth into 3-channel + stacked = self.stack_depth_images(depth_in) + # encode using VAE encoder + depth_latent = self.model.encode_rgb(stacked) + return depth_latent + + @staticmethod + def stack_depth_images(depth_in): + if 4 == len(depth_in.shape): + stacked = depth_in.repeat(1, 3, 1, 1) + elif 3 == len(depth_in.shape): + stacked = depth_in.unsqueeze(1) + stacked = depth_in.repeat(1, 3, 1, 1) + return stacked + + def _train_step_callback(self): + """Executed after every iteration""" + # Save backup (with a larger interval, without training states) + if self.backup_period > 0 and 0 == self.effective_iter % self.backup_period: + self.save_checkpoint( + ckpt_name=self._get_backup_ckpt_name(), save_train_state=False + ) + + _is_latest_saved = False + # Validation + if self.val_period > 0 and 0 == self.effective_iter % self.val_period: + self.in_evaluation = True # flag to do evaluation in resume run if validation is not finished + self.save_checkpoint(ckpt_name="latest", save_train_state=True) + _is_latest_saved = True + self.validate() + self.in_evaluation = False + self.save_checkpoint(ckpt_name="latest", save_train_state=True) + + # Save training checkpoint (can be resumed) + if ( + self.save_period > 0 + and 0 == self.effective_iter % self.save_period + and not _is_latest_saved + ): + self.save_checkpoint(ckpt_name="latest", save_train_state=True) + + # Visualization + if self.vis_period > 0 and 0 == self.effective_iter % self.vis_period: + self.visualize() + + def validate(self): + for i, val_loader in enumerate(self.val_loaders): + val_dataset_name = val_loader.dataset.disp_name + val_metric_dic = self.validate_single_dataset( + data_loader=val_loader, metric_tracker=self.val_metrics + ) + logging.info( + f"Iter {self.effective_iter}. Validation metrics on `{val_dataset_name}`: {val_metric_dic}" + ) + tb_logger.log_dic( + {f"val/{val_dataset_name}/{k}": v for k, v in val_metric_dic.items()}, + global_step=self.effective_iter, + ) + # save to file + eval_text = eval_dic_to_text( + val_metrics=val_metric_dic, + dataset_name=val_dataset_name, + sample_list_path=val_loader.dataset.filename_ls_path, + ) + _save_to = os.path.join( + self.out_dir_eval, + f"eval-{val_dataset_name}-iter{self.effective_iter:06d}.txt", + ) + with open(_save_to, "w+") as f: + f.write(eval_text) + + # Update main eval metric + if 0 == i: + main_eval_metric = val_metric_dic[self.main_val_metric] + if ( + "minimize" == self.main_val_metric_goal + and main_eval_metric < self.best_metric + or "maximize" == self.main_val_metric_goal + and main_eval_metric > self.best_metric + ): + self.best_metric = main_eval_metric + logging.info( + f"Best metric: {self.main_val_metric} = {self.best_metric} at iteration {self.effective_iter}" + ) + # Save a checkpoint + self.save_checkpoint( + ckpt_name=self._get_backup_ckpt_name(), save_train_state=False + ) + + def visualize(self): + for val_loader in self.vis_loaders: + vis_dataset_name = val_loader.dataset.disp_name + vis_out_dir = os.path.join( + self.out_dir_vis, self._get_backup_ckpt_name(), vis_dataset_name + ) + os.makedirs(vis_out_dir, exist_ok=True) + _ = self.validate_single_dataset( + data_loader=val_loader, + metric_tracker=self.val_metrics, + save_to_dir=vis_out_dir, + ) + + @torch.no_grad() + def validate_single_dataset( + self, + data_loader: DataLoader, + metric_tracker: MetricTracker, + save_to_dir: str = None, + ): + self.model.to(self.device) + metric_tracker.reset() + + # Generate seed sequence for consistent evaluation + val_init_seed = self.cfg.validation.init_seed + val_seed_ls = generate_seed_sequence(val_init_seed, len(data_loader)) + + for i, batch in enumerate( + tqdm(data_loader, desc=f"evaluating on {data_loader.dataset.disp_name}"), + start=1, + ): + assert 1 == data_loader.batch_size + # Read input image + rgb_int = batch["rgb_int"].squeeze() # [3, H, W] + # GT depth + depth_raw_ts = batch["depth_raw_linear"].squeeze() + depth_raw = depth_raw_ts.numpy() + depth_raw_ts = depth_raw_ts.to(self.device) + valid_mask_ts = batch["valid_mask_raw"].squeeze() + valid_mask = valid_mask_ts.numpy() + valid_mask_ts = valid_mask_ts.to(self.device) + + # Random number generator + seed = val_seed_ls.pop() + if seed is None: + generator = None + else: + generator = torch.Generator(device=self.device) + generator.manual_seed(seed) + + # Predict depth + pipe_out: MarigoldDepthOutput = self.model( + rgb_int, + denoising_steps=self.cfg.validation.denoising_steps, + ensemble_size=self.cfg.validation.ensemble_size, + processing_res=self.cfg.validation.processing_res, + match_input_res=self.cfg.validation.match_input_res, + generator=generator, + batch_size=1, # use batch size 1 to increase reproducibility + color_map=None, + show_progress_bar=False, + resample_method=self.cfg.validation.resample_method, + ) + + depth_pred: np.ndarray = pipe_out.depth_np + + if "least_square" == self.cfg.eval.alignment: + depth_pred, scale, shift = align_depth_least_square( + gt_arr=depth_raw, + pred_arr=depth_pred, + valid_mask_arr=valid_mask, + return_scale_shift=True, + max_resolution=self.cfg.eval.align_max_res, + ) + else: + raise RuntimeError(f"Unknown alignment type: {self.cfg.eval.alignment}") + + # Clip to dataset min max + depth_pred = np.clip( + depth_pred, + a_min=data_loader.dataset.min_depth, + a_max=data_loader.dataset.max_depth, + ) + + # clip to d > 0 for evaluation + depth_pred = np.clip(depth_pred, a_min=1e-6, a_max=None) + + # Evaluate + sample_metric = [] + depth_pred_ts = torch.from_numpy(depth_pred).to(self.device) + + for met_func in self.metric_funcs: + _metric_name = met_func.__name__ + _metric = met_func(depth_pred_ts, depth_raw_ts, valid_mask_ts).item() + sample_metric.append(_metric.__str__()) + metric_tracker.update(_metric_name, _metric) + + # Save as 16-bit uint png + if save_to_dir is not None: + img_name = batch["rgb_relative_path"][0].replace("/", "_") + png_save_path = os.path.join(save_to_dir, f"{img_name}.png") + depth_to_save = (pipe_out.depth_np * 65535.0).astype(np.uint16) + Image.fromarray(depth_to_save).save(png_save_path, mode="I;16") + + return metric_tracker.result() + + def _get_next_seed(self): + if 0 == len(self.global_seed_sequence): + self.global_seed_sequence = generate_seed_sequence( + initial_seed=self.seed, + length=self.max_iter * self.gradient_accumulation_steps, + ) + logging.info( + f"Global seed sequence is generated, length={len(self.global_seed_sequence)}" + ) + return self.global_seed_sequence.pop() + + def save_checkpoint(self, ckpt_name, save_train_state): + ckpt_dir = os.path.join(self.out_dir_ckpt, ckpt_name) + logging.info(f"Saving checkpoint to: {ckpt_dir}") + # Backup previous checkpoint + temp_ckpt_dir = None + if os.path.exists(ckpt_dir) and os.path.isdir(ckpt_dir): + temp_ckpt_dir = os.path.join( + os.path.dirname(ckpt_dir), f"_old_{os.path.basename(ckpt_dir)}" + ) + if os.path.exists(temp_ckpt_dir): + shutil.rmtree(temp_ckpt_dir, ignore_errors=True) + os.rename(ckpt_dir, temp_ckpt_dir) + logging.debug(f"Old checkpoint is backed up at: {temp_ckpt_dir}") + + # Save UNet + unet_path = os.path.join(ckpt_dir, "unet") + self.model.unet.save_pretrained(unet_path, safe_serialization=False) + logging.info(f"UNet is saved to: {unet_path}") + + if save_train_state: + state = { + "optimizer": self.optimizer.state_dict(), + "lr_scheduler": self.lr_scheduler.state_dict(), + "config": self.cfg, + "effective_iter": self.effective_iter, + "epoch": self.epoch, + "n_batch_in_epoch": self.n_batch_in_epoch, + "best_metric": self.best_metric, + "in_evaluation": self.in_evaluation, + "global_seed_sequence": self.global_seed_sequence, + } + train_state_path = os.path.join(ckpt_dir, "trainer.ckpt") + torch.save(state, train_state_path) + # iteration indicator + f = open(os.path.join(ckpt_dir, self._get_backup_ckpt_name()), "w") + f.close() + + logging.info(f"Trainer state is saved to: {train_state_path}") + + # Remove temp ckpt + if temp_ckpt_dir is not None and os.path.exists(temp_ckpt_dir): + shutil.rmtree(temp_ckpt_dir, ignore_errors=True) + logging.debug("Old checkpoint backup is removed.") + + def load_checkpoint( + self, ckpt_path, load_trainer_state=True, resume_lr_scheduler=True + ): + logging.info(f"Loading checkpoint from: {ckpt_path}") + # Load UNet + _model_path = os.path.join(ckpt_path, "unet", "diffusion_pytorch_model.bin") + self.model.unet.load_state_dict( + torch.load(_model_path, map_location=self.device) + ) + self.model.unet.to(self.device) + logging.info(f"UNet parameters are loaded from {_model_path}") + + # Load training states + if load_trainer_state: + checkpoint = torch.load(os.path.join(ckpt_path, "trainer.ckpt")) + self.effective_iter = checkpoint["effective_iter"] + self.epoch = checkpoint["epoch"] + self.n_batch_in_epoch = checkpoint["n_batch_in_epoch"] + self.in_evaluation = checkpoint["in_evaluation"] + self.global_seed_sequence = checkpoint["global_seed_sequence"] + + self.best_metric = checkpoint["best_metric"] + + self.optimizer.load_state_dict(checkpoint["optimizer"]) + logging.info(f"optimizer state is loaded from {ckpt_path}") + + if resume_lr_scheduler: + self.lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) + logging.info(f"LR scheduler state is loaded from {ckpt_path}") + + logging.info( + f"Checkpoint loaded from: {ckpt_path}. Resume from iteration {self.effective_iter} (epoch {self.epoch})" + ) + return + + def _get_backup_ckpt_name(self): + return f"iter_{self.effective_iter:06d}" diff --git a/src/util/config_util.py b/src/util/config_util.py new file mode 100644 index 0000000..0e9fa45 --- /dev/null +++ b/src/util/config_util.py @@ -0,0 +1,49 @@ +# Author: Bingxin Ke +# Last modified: 2024-02-14 + +import omegaconf +from omegaconf import OmegaConf + + +def recursive_load_config(config_path: str) -> OmegaConf: + conf = OmegaConf.load(config_path) + + output_conf = OmegaConf.create({}) + + # Load base config. Later configs on the list will overwrite previous + base_configs = conf.get("base_config", default_value=None) + if base_configs is not None: + assert isinstance(base_configs, omegaconf.listconfig.ListConfig) + for _path in base_configs: + assert ( + _path != config_path + ), "Circulate merging, base_config should not include itself." + _base_conf = recursive_load_config(_path) + output_conf = OmegaConf.merge(output_conf, _base_conf) + + # Merge configs and overwrite values + output_conf = OmegaConf.merge(output_conf, conf) + + return output_conf + + +def find_value_in_omegaconf(search_key, config): + result_list = [] + + if isinstance(config, omegaconf.DictConfig): + for key, value in config.items(): + if key == search_key: + result_list.append(value) + elif isinstance(value, (omegaconf.DictConfig, omegaconf.ListConfig)): + result_list.extend(find_value_in_omegaconf(search_key, value)) + elif isinstance(config, omegaconf.ListConfig): + for item in config: + if isinstance(item, (omegaconf.DictConfig, omegaconf.ListConfig)): + result_list.extend(find_value_in_omegaconf(search_key, item)) + + return result_list + + +if "__main__" == __name__: + conf = recursive_load_config("config/train_base.yaml") + print(OmegaConf.to_yaml(conf)) diff --git a/src/util/data_loader.py b/src/util/data_loader.py new file mode 100644 index 0000000..0fe42ab --- /dev/null +++ b/src/util/data_loader.py @@ -0,0 +1,111 @@ +# Copied from https://github.com/huggingface/accelerate/blob/e2ae254008061b3e53fc1c97f88d65743a857e75/src/accelerate/data_loader.py + +from torch.utils.data import BatchSampler, DataLoader, IterableDataset + +# kwargs of the DataLoader in min version 1.4.0. +_PYTORCH_DATALOADER_KWARGS = { + "batch_size": 1, + "shuffle": False, + "sampler": None, + "batch_sampler": None, + "num_workers": 0, + "collate_fn": None, + "pin_memory": False, + "drop_last": False, + "timeout": 0, + "worker_init_fn": None, + "multiprocessing_context": None, + "generator": None, + "prefetch_factor": 2, + "persistent_workers": False, +} + + +class SkipBatchSampler(BatchSampler): + """ + A `torch.utils.data.BatchSampler` that skips the first `n` batches of another `torch.utils.data.BatchSampler`. + """ + + def __init__(self, batch_sampler, skip_batches=0): + self.batch_sampler = batch_sampler + self.skip_batches = skip_batches + + def __iter__(self): + for index, samples in enumerate(self.batch_sampler): + if index >= self.skip_batches: + yield samples + + @property + def total_length(self): + return len(self.batch_sampler) + + def __len__(self): + return len(self.batch_sampler) - self.skip_batches + + +class SkipDataLoader(DataLoader): + """ + Subclass of a PyTorch `DataLoader` that will skip the first batches. + + Args: + dataset (`torch.utils.data.dataset.Dataset`): + The dataset to use to build this datalaoder. + skip_batches (`int`, *optional*, defaults to 0): + The number of batches to skip at the beginning. + kwargs: + All other keyword arguments to pass to the regular `DataLoader` initialization. + """ + + def __init__(self, dataset, skip_batches=0, **kwargs): + super().__init__(dataset, **kwargs) + self.skip_batches = skip_batches + + def __iter__(self): + for index, batch in enumerate(super().__iter__()): + if index >= self.skip_batches: + yield batch + + +# Adapted from https://github.com/huggingface/accelerate +def skip_first_batches(dataloader, num_batches=0): + """ + Creates a `torch.utils.data.DataLoader` that will efficiently skip the first `num_batches`. + """ + dataset = dataloader.dataset + sampler_is_batch_sampler = False + if isinstance(dataset, IterableDataset): + new_batch_sampler = None + else: + sampler_is_batch_sampler = isinstance(dataloader.sampler, BatchSampler) + batch_sampler = ( + dataloader.sampler if sampler_is_batch_sampler else dataloader.batch_sampler + ) + new_batch_sampler = SkipBatchSampler(batch_sampler, skip_batches=num_batches) + + # We ignore all of those since they are all dealt with by our new_batch_sampler + ignore_kwargs = [ + "batch_size", + "shuffle", + "sampler", + "batch_sampler", + "drop_last", + ] + + kwargs = { + k: getattr(dataloader, k, _PYTORCH_DATALOADER_KWARGS[k]) + for k in _PYTORCH_DATALOADER_KWARGS + if k not in ignore_kwargs + } + + # Need to provide batch_size as batch_sampler is None for Iterable dataset + if new_batch_sampler is None: + kwargs["drop_last"] = dataloader.drop_last + kwargs["batch_size"] = dataloader.batch_size + + if new_batch_sampler is None: + # Need to manually skip batches in the dataloader + dataloader = SkipDataLoader(dataset, skip_batches=num_batches, **kwargs) + else: + dataloader = DataLoader(dataset, batch_sampler=new_batch_sampler, **kwargs) + + return dataloader diff --git a/src/util/depth_transform.py b/src/util/depth_transform.py index 6062f59..ac9d626 100644 --- a/src/util/depth_transform.py +++ b/src/util/depth_transform.py @@ -1,7 +1,8 @@ # Author: Bingxin Ke -# Last modified: 2024-02-08 +# Last modified: 2024-04-18 import torch +import logging def get_depth_normalizer(cfg_normalizer): @@ -12,8 +13,8 @@ def identical(x): depth_transform = identical - elif "near_far_metric" == cfg_normalizer.type: - depth_transform = NearFarMetricNormalizer( + elif "scale_shift_depth" == cfg_normalizer.type: + depth_transform = ScaleShiftDepthNormalizer( norm_min=cfg_normalizer.norm_min, norm_max=cfg_normalizer.norm_max, min_max_quantile=cfg_normalizer.min_max_quantile, @@ -25,7 +26,7 @@ def identical(x): class DepthNormalizerBase: - is_relative = None + is_absolute = None far_plane_at_max = None def __init__( @@ -46,12 +47,15 @@ def denormalize(self, depth_norm, **kwargs): raise NotImplementedError -class NearFarMetricNormalizer(DepthNormalizerBase): +class ScaleShiftDepthNormalizer(DepthNormalizerBase): """ - depth in [0, d_max] -> [-1, 1] + Use near and far plane to linearly normalize depth, + i.e. d' = d * s + t, + where near plane is mapped to `norm_min`, and far plane is mapped to `norm_max` + Near and far planes are determined by taking quantile values. """ - is_relative = True + is_absolute = False far_plane_at_max = True def __init__( @@ -95,4 +99,5 @@ def scale_back(self, depth_norm): return depth_linear def denormalize(self, depth_norm, **kwargs): + logging.warning(f"{self.__class__} is not revertible without GT") return self.scale_back(depth_norm=depth_norm) diff --git a/src/util/logging_util.py b/src/util/logging_util.py new file mode 100644 index 0000000..37dd103 --- /dev/null +++ b/src/util/logging_util.py @@ -0,0 +1,102 @@ +# Author: Bingxin Ke +# Last modified: 2024-03-12 + +import logging +import os +import sys +import wandb +from tabulate import tabulate +from torch.utils.tensorboard import SummaryWriter + + +def config_logging(cfg_logging, out_dir=None): + file_level = cfg_logging.get("file_level", 10) + console_level = cfg_logging.get("console_level", 10) + + log_formatter = logging.Formatter(cfg_logging["format"]) + + root_logger = logging.getLogger() + root_logger.handlers.clear() + + root_logger.setLevel(min(file_level, console_level)) + + if out_dir is not None: + _logging_file = os.path.join( + out_dir, cfg_logging.get("filename", "logging.log") + ) + file_handler = logging.FileHandler(_logging_file) + file_handler.setFormatter(log_formatter) + file_handler.setLevel(file_level) + root_logger.addHandler(file_handler) + + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setFormatter(log_formatter) + console_handler.setLevel(console_level) + root_logger.addHandler(console_handler) + + # Avoid pollution by packages + logging.getLogger("PIL").setLevel(logging.INFO) + logging.getLogger("matplotlib").setLevel(logging.INFO) + + +class MyTrainingLogger: + """Tensorboard + wandb logger""" + + writer: SummaryWriter + is_initialized = False + + def __init__(self) -> None: + pass + + def set_dir(self, tb_log_dir): + if self.is_initialized: + raise ValueError("Do not initialize writer twice") + self.writer = SummaryWriter(tb_log_dir) + self.is_initialized = True + + def log_dic(self, scalar_dic, global_step, walltime=None): + for k, v in scalar_dic.items(): + self.writer.add_scalar(k, v, global_step=global_step, walltime=walltime) + return + + +# global instance +tb_logger = MyTrainingLogger() + + +# -------------- wandb tools -------------- +def init_wandb(enable: bool, **kwargs): + if enable: + run = wandb.init(sync_tensorboard=True, **kwargs) + else: + run = wandb.init(mode="disabled") + return run + + +def log_slurm_job_id(step): + global tb_logger + _jobid = os.getenv("SLURM_JOB_ID") + if _jobid is None: + _jobid = -1 + tb_logger.writer.add_scalar("job_id", int(_jobid), global_step=step) + logging.debug(f"Slurm job_id: {_jobid}") + + +def load_wandb_job_id(out_dir): + with open(os.path.join(out_dir, "WANDB_ID"), "r") as f: + wandb_id = f.read() + return wandb_id + + +def save_wandb_job_id(run, out_dir): + with open(os.path.join(out_dir, "WANDB_ID"), "w+") as f: + f.write(run.id) + + +def eval_dic_to_text(val_metrics: dict, dataset_name: str, sample_list_path: str): + eval_text = f"Evaluation metrics:\n\ + on dataset: {dataset_name}\n\ + over samples in: {sample_list_path}\n" + + eval_text += tabulate([val_metrics.keys(), val_metrics.values()]) + return eval_text diff --git a/src/util/loss.py b/src/util/loss.py new file mode 100644 index 0000000..ee6dace --- /dev/null +++ b/src/util/loss.py @@ -0,0 +1,124 @@ +# Author: Bingxin Ke +# Last modified: 2024-02-22 + +import torch + + +def get_loss(loss_name, **kwargs): + if "silog_mse" == loss_name: + criterion = SILogMSELoss(**kwargs) + elif "silog_rmse" == loss_name: + criterion = SILogRMSELoss(**kwargs) + elif "mse_loss" == loss_name: + criterion = torch.nn.MSELoss(**kwargs) + elif "l1_loss" == loss_name: + criterion = torch.nn.L1Loss(**kwargs) + elif "l1_loss_with_mask" == loss_name: + criterion = L1LossWithMask(**kwargs) + elif "mean_abs_rel" == loss_name: + criterion = MeanAbsRelLoss() + else: + raise NotImplementedError + + return criterion + + +class L1LossWithMask: + def __init__(self, batch_reduction=False): + self.batch_reduction = batch_reduction + + def __call__(self, depth_pred, depth_gt, valid_mask=None): + diff = depth_pred - depth_gt + if valid_mask is not None: + diff[~valid_mask] = 0 + n = valid_mask.sum((-1, -2)) + else: + n = depth_gt.shape[-2] * depth_gt.shape[-1] + + loss = torch.sum(torch.abs(diff)) / n + if self.batch_reduction: + loss = loss.mean() + return loss + + +class MeanAbsRelLoss: + def __init__(self) -> None: + # super().__init__() + pass + + def __call__(self, pred, gt): + diff = pred - gt + rel_abs = torch.abs(diff / gt) + loss = torch.mean(rel_abs, dim=0) + return loss + + +class SILogMSELoss: + def __init__(self, lamb, log_pred=True, batch_reduction=True): + """Scale Invariant Log MSE Loss + + Args: + lamb (_type_): lambda, lambda=1 -> scale invariant, lambda=0 -> L2 loss + log_pred (bool, optional): True if model prediction is logarithmic depht. Will not do log for depth_pred + """ + super(SILogMSELoss, self).__init__() + self.lamb = lamb + self.pred_in_log = log_pred + self.batch_reduction = batch_reduction + + def __call__(self, depth_pred, depth_gt, valid_mask=None): + log_depth_pred = ( + depth_pred if self.pred_in_log else torch.log(torch.clip(depth_pred, 1e-8)) + ) + log_depth_gt = torch.log(depth_gt) + + diff = log_depth_pred - log_depth_gt + if valid_mask is not None: + diff[~valid_mask] = 0 + n = valid_mask.sum((-1, -2)) + else: + n = depth_gt.shape[-2] * depth_gt.shape[-1] + + diff2 = torch.pow(diff, 2) + + first_term = torch.sum(diff2, (-1, -2)) / n + second_term = self.lamb * torch.pow(torch.sum(diff, (-1, -2)), 2) / (n**2) + loss = first_term - second_term + if self.batch_reduction: + loss = loss.mean() + return loss + + +class SILogRMSELoss: + def __init__(self, lamb, alpha, log_pred=True): + """Scale Invariant Log RMSE Loss + + Args: + lamb (_type_): lambda, lambda=1 -> scale invariant, lambda=0 -> L2 loss + alpha: + log_pred (bool, optional): True if model prediction is logarithmic depht. Will not do log for depth_pred + """ + super(SILogRMSELoss, self).__init__() + self.lamb = lamb + self.alpha = alpha + self.pred_in_log = log_pred + + def __call__(self, depth_pred, depth_gt, valid_mask): + log_depth_pred = depth_pred if self.pred_in_log else torch.log(depth_pred) + log_depth_gt = torch.log(depth_gt) + # borrowed from https://github.com/aliyun/NeWCRFs + # diff = log_depth_pred[valid_mask] - log_depth_gt[valid_mask] + # return torch.sqrt((diff ** 2).mean() - self.lamb * (diff.mean() ** 2)) * self.alpha + + diff = log_depth_pred - log_depth_gt + if valid_mask is not None: + diff[~valid_mask] = 0 + n = valid_mask.sum((-1, -2)) + else: + n = depth_gt.shape[-2] * depth_gt.shape[-1] + + diff2 = torch.pow(diff, 2) + first_term = torch.sum(diff2, (-1, -2)) / n + second_term = self.lamb * torch.pow(torch.sum(diff, (-1, -2)), 2) / (n**2) + loss = torch.sqrt(first_term - second_term).mean() * self.alpha + return loss diff --git a/src/util/lr_scheduler.py b/src/util/lr_scheduler.py new file mode 100644 index 0000000..cd2d67f --- /dev/null +++ b/src/util/lr_scheduler.py @@ -0,0 +1,48 @@ +# Author: Bingxin Ke +# Last modified: 2024-02-22 + +import numpy as np + + +class IterExponential: + def __init__(self, total_iter_length, final_ratio, warmup_steps=0) -> None: + """ + Customized iteration-wise exponential scheduler. + Re-calculate for every step, to reduce error accumulation + + Args: + total_iter_length (int): Expected total iteration number + final_ratio (float): Expected LR ratio at n_iter = total_iter_length + """ + self.total_length = total_iter_length + self.effective_length = total_iter_length - warmup_steps + self.final_ratio = final_ratio + self.warmup_steps = warmup_steps + + def __call__(self, n_iter) -> float: + if n_iter < self.warmup_steps: + alpha = 1.0 * n_iter / self.warmup_steps + elif n_iter >= self.total_length: + alpha = self.final_ratio + else: + actual_iter = n_iter - self.warmup_steps + alpha = np.exp( + actual_iter / self.effective_length * np.log(self.final_ratio) + ) + return alpha + + +if "__main__" == __name__: + lr_scheduler = IterExponential( + total_iter_length=50000, final_ratio=0.01, warmup_steps=200 + ) + lr_scheduler = IterExponential( + total_iter_length=50000, final_ratio=0.01, warmup_steps=0 + ) + + x = np.arange(100000) + alphas = [lr_scheduler(i) for i in x] + import matplotlib.pyplot as plt + + plt.plot(alphas) + plt.savefig("lr_scheduler.png") diff --git a/src/util/multi_res_noise.py b/src/util/multi_res_noise.py new file mode 100644 index 0000000..e4d0ee0 --- /dev/null +++ b/src/util/multi_res_noise.py @@ -0,0 +1,75 @@ +# Author: Bingxin Ke +# Last modified: 2024-04-18 + +import torch +import math + + +# adapted from: https://wandb.ai/johnowhitaker/multires_noise/reports/Multi-Resolution-Noise-for-Diffusion-Model-Training--VmlldzozNjYyOTU2?s=31 +def multi_res_noise_like( + x, strength=0.9, downscale_strategy="original", generator=None, device=None +): + if torch.is_tensor(strength): + strength = strength.reshape((-1, 1, 1, 1)) + b, c, w, h = x.shape + + if device is None: + device = x.device + + up_sampler = torch.nn.Upsample(size=(w, h), mode="bilinear") + noise = torch.randn(x.shape, device=x.device, generator=generator) + + if "original" == downscale_strategy: + for i in range(10): + r = ( + torch.rand(1, generator=generator, device=device) * 2 + 2 + ) # Rather than always going 2x, + w, h = max(1, int(w / (r**i))), max(1, int(h / (r**i))) + noise += ( + up_sampler( + torch.randn(b, c, w, h, generator=generator, device=device).to(x) + ) + * strength**i + ) + if w == 1 or h == 1: + break # Lowest resolution is 1x1 + elif "every_layer" == downscale_strategy: + for i in range(int(math.log2(min(w, h)))): + w, h = max(1, int(w / 2)), max(1, int(h / 2)) + noise += ( + up_sampler( + torch.randn(b, c, w, h, generator=generator, device=device).to(x) + ) + * strength**i + ) + elif "power_of_two" == downscale_strategy: + for i in range(10): + r = 2 + w, h = max(1, int(w / (r**i))), max(1, int(h / (r**i))) + noise += ( + up_sampler( + torch.randn(b, c, w, h, generator=generator, device=device).to(x) + ) + * strength**i + ) + if w == 1 or h == 1: + break # Lowest resolution is 1x1 + elif "random_step" == downscale_strategy: + for i in range(10): + r = ( + torch.rand(1, generator=generator, device=device) * 2 + 2 + ) # Rather than always going 2x, + w, h = max(1, int(w / (r))), max(1, int(h / (r))) + noise += ( + up_sampler( + torch.randn(b, c, w, h, generator=generator, device=device).to(x) + ) + * strength**i + ) + if w == 1 or h == 1: + break # Lowest resolution is 1x1 + else: + raise ValueError(f"unknown downscale strategy: {downscale_strategy}") + + noise = noise / noise.std() # Scaled back to roughly unit variance + return noise diff --git a/src/util/seed_all.py b/src/util/seeding.py similarity index 72% rename from src/util/seed_all.py rename to src/util/seeding.py index 9579565..b63a778 100644 --- a/src/util/seed_all.py +++ b/src/util/seeding.py @@ -21,6 +21,7 @@ import numpy as np import random import torch +import logging def seed_all(seed: int = 0): @@ -31,3 +32,23 @@ def seed_all(seed: int = 0): np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) + + +def generate_seed_sequence( + initial_seed: int, + length: int, + min_val=-0x8000_0000_0000_0000, + max_val=0xFFFF_FFFF_FFFF_FFFF, +): + if initial_seed is None: + logging.warning("initial_seed is None, reproducibility is not guaranteed") + random.seed(initial_seed) + + seed_sequence = [] + + for _ in range(length): + seed = random.randint(min_val, max_val) + + seed_sequence.append(seed) + + return seed_sequence diff --git a/src/util/slurm_util.py b/src/util/slurm_util.py new file mode 100644 index 0000000..a983d86 --- /dev/null +++ b/src/util/slurm_util.py @@ -0,0 +1,15 @@ +# Author: Bingxin Ke +# Last modified: 2024-02-22 + +import os + + +def is_on_slurm(): + cluster_name = os.getenv("SLURM_CLUSTER_NAME") + is_on_slurm = cluster_name is not None + return is_on_slurm + + +def get_local_scratch_dir(): + local_scratch_dir = os.getenv("TMPDIR") + return local_scratch_dir diff --git a/train.py b/train.py new file mode 100644 index 0000000..47bd87d --- /dev/null +++ b/train.py @@ -0,0 +1,363 @@ +# An official reimplemented version of Marigold training script +# Last modified: 2024-05-17 +# +# Copyright 2023 Bingxin Ke, ETH Zurich. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -------------------------------------------------------------------------- +# If you find this code useful, we kindly ask you to cite our paper in your work. +# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation +# More information about the method can be found at https://marigoldmonodepth.github.io +# -------------------------------------------------------------------------- + +import argparse +import logging +import os +import shutil +from datetime import datetime, timedelta +from typing import List + +import torch +from omegaconf import OmegaConf +from torch.utils.data import ConcatDataset, DataLoader +from tqdm import tqdm + +from marigold.marigold_pipeline import MarigoldPipeline +from src.dataset import BaseDepthDataset, DatasetMode, get_dataset +from src.dataset.mixed_sampler import MixedBatchSampler +from src.trainer import get_trainer_cls +from src.util.config_util import ( + find_value_in_omegaconf, + recursive_load_config, +) +from src.util.depth_transform import ( + DepthNormalizerBase, + get_depth_normalizer, +) +from src.util.logging_util import ( + config_logging, + init_wandb, + load_wandb_job_id, + log_slurm_job_id, + save_wandb_job_id, + tb_logger, +) +from src.util.slurm_util import get_local_scratch_dir, is_on_slurm + +if "__main__" == __name__: + t_start = datetime.now() + print(f"start at {t_start}") + + # -------------------- Arguments -------------------- + parser = argparse.ArgumentParser(description="Train your cute model!") + parser.add_argument( + "--config", + type=str, + default="config/train_marigold.yaml", + help="Path to config file.", + ) + parser.add_argument( + "--resume_run", + action="store", + default=None, + help="Path of checkpoint to be resumed. If given, will ignore --config, and checkpoint in the config", + ) + parser.add_argument( + "--output_dir", type=str, default=None, help="directory to save checkpoints" + ) + parser.add_argument("--no_cuda", action="store_true", help="Do not use cuda.") + parser.add_argument( + "--exit_after", + type=int, + default=-1, + help="Save checkpoint and exit after X minutes.", + ) + parser.add_argument("--no_wandb", action="store_true", help="run without wandb") + parser.add_argument( + "--do_not_copy_data", + action="store_true", + help="On Slurm cluster, do not copy data to local scratch", + ) + parser.add_argument( + "--base_data_dir", type=str, default=None, help="directory of training data" + ) + parser.add_argument( + "--base_ckpt_dir", + type=str, + default=None, + help="directory of pretrained checkpoint", + ) + parser.add_argument( + "--add_datetime_prefix", + action="store_true", + help="Add datetime to the output folder name", + ) + + args = parser.parse_args() + resume_run = args.resume_run + output_dir = args.output_dir + base_data_dir = ( + args.base_data_dir + if args.base_data_dir is not None + else os.environ["BASE_DATA_DIR"] + ) + base_ckpt_dir = ( + args.base_ckpt_dir + if args.base_ckpt_dir is not None + else os.environ["BASE_CKPT_DIR"] + ) + + # -------------------- Initialization -------------------- + # Resume previous run + if resume_run is not None: + print(f"Resume run: {resume_run}") + out_dir_run = os.path.dirname(os.path.dirname(resume_run)) + job_name = os.path.basename(out_dir_run) + # Resume config file + cfg = OmegaConf.load(os.path.join(out_dir_run, "config.yaml")) + else: + # Run from start + cfg = recursive_load_config(args.config) + # Full job name + pure_job_name = os.path.basename(args.config).split(".")[0] + # Add time prefix + if args.add_datetime_prefix: + job_name = f"{t_start.strftime('%y_%m_%d-%H_%M_%S')}-{pure_job_name}" + else: + job_name = pure_job_name + + # Output dir + if output_dir is not None: + out_dir_run = os.path.join(output_dir, job_name) + else: + out_dir_run = os.path.join("./output", job_name) + os.makedirs(out_dir_run, exist_ok=False) + + cfg_data = cfg.dataset + + # Other directories + out_dir_ckpt = os.path.join(out_dir_run, "checkpoint") + if not os.path.exists(out_dir_ckpt): + os.makedirs(out_dir_ckpt) + out_dir_tb = os.path.join(out_dir_run, "tensorboard") + if not os.path.exists(out_dir_tb): + os.makedirs(out_dir_tb) + out_dir_eval = os.path.join(out_dir_run, "evaluation") + if not os.path.exists(out_dir_eval): + os.makedirs(out_dir_eval) + out_dir_vis = os.path.join(out_dir_run, "visualization") + if not os.path.exists(out_dir_vis): + os.makedirs(out_dir_vis) + + # -------------------- Logging settings -------------------- + config_logging(cfg.logging, out_dir=out_dir_run) + logging.debug(f"config: {cfg}") + + # Initialize wandb + if not args.no_wandb: + if resume_run is not None: + wandb_id = load_wandb_job_id(out_dir_run) + wandb_cfg_dic = { + "id": wandb_id, + "resume": "must", + **cfg.wandb, + } + else: + wandb_cfg_dic = { + "config": dict(cfg), + "name": job_name, + "mode": "online", + **cfg.wandb, + } + wandb_cfg_dic.update({"dir": out_dir_run}) + wandb_run = init_wandb(enable=True, **wandb_cfg_dic) + save_wandb_job_id(wandb_run, out_dir_run) + else: + init_wandb(enable=False) + + # Tensorboard (should be initialized after wandb) + tb_logger.set_dir(out_dir_tb) + + log_slurm_job_id(step=0) + + # -------------------- Device -------------------- + cuda_avail = torch.cuda.is_available() and not args.no_cuda + device = torch.device("cuda" if cuda_avail else "cpu") + logging.info(f"device = {device}") + + # -------------------- Snapshot of code and config -------------------- + if resume_run is None: + _output_path = os.path.join(out_dir_run, "config.yaml") + with open(_output_path, "w+") as f: + OmegaConf.save(config=cfg, f=f) + logging.info(f"Config saved to {_output_path}") + # Copy and tar code on the first run + _temp_code_dir = os.path.join(out_dir_run, "code_tar") + _code_snapshot_path = os.path.join(out_dir_run, "code_snapshot.tar") + os.system( + f"rsync --relative -arhvz --quiet --filter=':- .gitignore' --exclude '.git' . '{_temp_code_dir}'" + ) + os.system(f"tar -cf {_code_snapshot_path} {_temp_code_dir}") + os.system(f"rm -rf {_temp_code_dir}") + logging.info(f"Code snapshot saved to: {_code_snapshot_path}") + + # -------------------- Copy data to local scratch (Slurm) -------------------- + if is_on_slurm() and (not args.do_not_copy_data): + # local scratch dir + original_data_dir = base_data_dir + base_data_dir = os.path.join(get_local_scratch_dir(), "Marigold_data") + # copy data + required_data_list = find_value_in_omegaconf("dir", cfg_data) + # if cfg_train.visualize.init_latent_path is not None: + # required_data_list.append(cfg_train.visualize.init_latent_path) + required_data_list = list(set(required_data_list)) + logging.info(f"Required_data_list: {required_data_list}") + for d in tqdm(required_data_list, desc="Copy data to local scratch"): + ori_dir = os.path.join(original_data_dir, d) + dst_dir = os.path.join(base_data_dir, d) + os.makedirs(os.path.dirname(dst_dir), exist_ok=True) + if os.path.isfile(ori_dir): + shutil.copyfile(ori_dir, dst_dir) + elif os.path.isdir(ori_dir): + shutil.copytree(ori_dir, dst_dir) + logging.info(f"Data copied to: {base_data_dir}") + + # -------------------- Gradient accumulation steps -------------------- + eff_bs = cfg.dataloader.effective_batch_size + accumulation_steps = eff_bs / cfg.dataloader.max_train_batch_size + assert int(accumulation_steps) == accumulation_steps + accumulation_steps = int(accumulation_steps) + + logging.info( + f"Effective batch size: {eff_bs}, accumulation steps: {accumulation_steps}" + ) + + # -------------------- Data -------------------- + loader_seed = cfg.dataloader.seed + if loader_seed is None: + loader_generator = None + else: + loader_generator = torch.Generator().manual_seed(loader_seed) + + # Training dataset + depth_transform: DepthNormalizerBase = get_depth_normalizer( + cfg_normalizer=cfg.depth_normalization + ) + train_dataset: BaseDepthDataset = get_dataset( + cfg_data.train, + base_data_dir=base_data_dir, + mode=DatasetMode.TRAIN, + augmentation_args=cfg.augmentation, + depth_transform=depth_transform, + ) + logging.debug("Augmentation: ", cfg.augmentation) + if "mixed" == cfg_data.train.name: + dataset_ls = train_dataset + assert len(cfg_data.train.prob_ls) == len( + dataset_ls + ), "Lengths don't match: `prob_ls` and `dataset_list`" + concat_dataset = ConcatDataset(dataset_ls) + mixed_sampler = MixedBatchSampler( + src_dataset_ls=dataset_ls, + batch_size=cfg.dataloader.max_train_batch_size, + drop_last=True, + prob=cfg_data.train.prob_ls, + shuffle=True, + generator=loader_generator, + ) + train_loader = DataLoader( + concat_dataset, + batch_sampler=mixed_sampler, + num_workers=cfg.dataloader.num_workers, + ) + else: + train_loader = DataLoader( + dataset=train_dataset, + batch_size=cfg.dataloader.max_train_batch_size, + num_workers=cfg.dataloader.num_workers, + shuffle=True, + generator=loader_generator, + ) + # Validation dataset + val_loaders: List[DataLoader] = [] + for _val_dic in cfg_data.val: + _val_dataset = get_dataset( + _val_dic, + base_data_dir=base_data_dir, + mode=DatasetMode.EVAL, + ) + _val_loader = DataLoader( + dataset=_val_dataset, + batch_size=1, + shuffle=False, + num_workers=cfg.dataloader.num_workers, + ) + val_loaders.append(_val_loader) + + # Visualization dataset + vis_loaders: List[DataLoader] = [] + for _vis_dic in cfg_data.vis: + _vis_dataset = get_dataset( + _vis_dic, + base_data_dir=base_data_dir, + mode=DatasetMode.EVAL, + ) + _vis_loader = DataLoader( + dataset=_vis_dataset, + batch_size=1, + shuffle=False, + num_workers=cfg.dataloader.num_workers, + ) + vis_loaders.append(_vis_loader) + + # -------------------- Model -------------------- + _pipeline_kwargs = cfg.pipeline.kwargs if cfg.pipeline.kwargs is not None else {} + model = MarigoldPipeline.from_pretrained( + os.path.join(base_ckpt_dir, cfg.model.pretrained_path), **_pipeline_kwargs + ) + + # -------------------- Trainer -------------------- + # Exit time + if args.exit_after > 0: + t_end = t_start + timedelta(minutes=args.exit_after) + logging.info(f"Will exit at {t_end}") + else: + t_end = None + + trainer_cls = get_trainer_cls(cfg.trainer.name) + logging.debug(f"Trainer: {trainer_cls}") + trainer = trainer_cls( + cfg=cfg, + model=model, + train_dataloader=train_loader, + device=device, + base_ckpt_dir=base_ckpt_dir, + out_dir_ckpt=out_dir_ckpt, + out_dir_eval=out_dir_eval, + out_dir_vis=out_dir_vis, + accumulation_steps=accumulation_steps, + val_dataloaders=val_loaders, + vis_dataloaders=vis_loaders, + ) + + # -------------------- Checkpoint -------------------- + if resume_run is not None: + trainer.load_checkpoint( + resume_run, load_trainer_state=True, resume_lr_scheduler=True + ) + + # -------------------- Training & Evaluation Loop -------------------- + try: + trainer.train(t_end=t_end) + except Exception as e: + logging.exception(e)