diff --git a/Makefile b/Makefile index f7ea8b6..78d0a0a 100644 --- a/Makefile +++ b/Makefile @@ -73,7 +73,7 @@ setup_dev: requirements: #cd mmcv && CC=clang CXX=clang++ CFLAGS='-stdlib=libc++' MMCV_WITH_OPS=1 $(BIN)/pip install -e . pip install -r requirements.txt - conda install pytorch==1.7.0 torchvision==0.8.0 torchaudio==0.7.0 cudatoolkit=11.0 -c pytorch -y + conda install pytorch==1.7.0 torchvision==0.8.0 torchaudio==0.7.0 cudatoolkit=10.2 -c pytorch -y pip install "mmcv-full>=1.1.4,<=1.3.0" -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html cd SegFormer && pip install -e . clone_segformer: diff --git a/SegFormer/local_configs/segformer/B1/segformer.b1.512x512.ade.160k.py b/SegFormer/local_configs/segformer/B1/segformer.b1.512x512.ade.160k.py index 1b68633..0f7fca0 100644 --- a/SegFormer/local_configs/segformer/B1/segformer.b1.512x512.ade.160k.py +++ b/SegFormer/local_configs/segformer/B1/segformer.b1.512x512.ade.160k.py @@ -10,7 +10,7 @@ find_unused_parameters = True model = dict( type='EncoderDecoder', - pretrained='pretrained/mit_b1.pth', + pretrained='../../pretrained/ImageNet-1K/mit_b5.pth', backbone=dict( type='mit_b1', style='pytorch'), diff --git a/SegFormer/local_configs/segformer/MSD/segformer.b1.512x512.MSD.40k.py b/SegFormer/local_configs/segformer/MSD/segformer.b1.512x512.MSD.40k.py new file mode 100644 index 0000000..8757b57 --- /dev/null +++ b/SegFormer/local_configs/segformer/MSD/segformer.b1.512x512.MSD.40k.py @@ -0,0 +1,48 @@ +_base_ = [ + '../../_base_/models/segformer.py', + '../../_base_/datasets/ade20k_repeat.py', + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_40k_adamw.py' +] + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +find_unused_parameters = True +model = dict( + type='EncoderDecoder', + pretrained='../../pretrained/ImageNet-1K/mit_b5.pth', + backbone=dict( + type='mit_b5', + style='pytorch'), + decode_head=dict( + type='SegFormerHead', + in_channels=[64, 128, 320, 512], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + decoder_params=dict(embed_dim=256), + loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +# optimizer +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.), + 'head': dict(lr_mult=10.) + })) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + + +data = dict(samples_per_gpu=2) +evaluation = dict(interval=16000, metric='mIoU') diff --git a/SegFormer/mmseg/models/decode_heads/decode_head.py b/SegFormer/mmseg/models/decode_heads/decode_head.py index 22af5ab..3ff58c1 100755 --- a/SegFormer/mmseg/models/decode_heads/decode_head.py +++ b/SegFormer/mmseg/models/decode_heads/decode_head.py @@ -213,7 +213,7 @@ def cls_seg(self, feat): output = self.conv_seg(feat) return output - @force_fp32(apply_to=('seg_logit', )) + @force_fp32(apply_to=('seg_logit',)) def losses(self, seg_logit, seg_label): """Compute segmentation loss.""" loss = dict() diff --git a/SegFormer/mmseg/models/decode_heads/segformer_head.py b/SegFormer/mmseg/models/decode_heads/segformer_head.py index 8ada6d7..29a3836 100644 --- a/SegFormer/mmseg/models/decode_heads/segformer_head.py +++ b/SegFormer/mmseg/models/decode_heads/segformer_head.py @@ -17,10 +17,12 @@ from IPython import embed + class MLP(nn.Module): """ Linear Embedding """ + def __init__(self, input_dim=2048, embed_dim=768): super().__init__() self.proj = nn.Linear(input_dim, embed_dim) @@ -36,6 +38,7 @@ class SegFormerHead(BaseDecodeHead): """ SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers """ + def __init__(self, feature_strides, **kwargs): super(SegFormerHead, self).__init__(input_transform='multiple_select', **kwargs) assert len(feature_strides) == len(self.in_channels) @@ -53,7 +56,7 @@ def __init__(self, feature_strides, **kwargs): self.linear_c1 = MLP(input_dim=c1_in_channels, embed_dim=embedding_dim) self.linear_fuse = ConvModule( - in_channels=embedding_dim*4, + in_channels=embedding_dim * 4, out_channels=embedding_dim, kernel_size=1, norm_cfg=dict(type='SyncBN', requires_grad=True) @@ -68,19 +71,17 @@ def forward(self, inputs): ############## MLP decoder on C1-C4 ########### n, _, h, w = c4.shape - _c4 = self.linear_c4(c4).permute(0,2,1).reshape(n, -1, c4.shape[2], c4.shape[3]) - _c4 = resize(_c4, size=c1.size()[2:],mode='bilinear',align_corners=False) + _c4 = self.linear_c4(c4).permute(0, 2, 1).reshape(n, -1, c4.shape[2], c4.shape[3]) + _c4 = resize(_c4, size=c1.size()[2:], mode='bilinear', align_corners=False) - _c3 = self.linear_c3(c3).permute(0,2,1).reshape(n, -1, c3.shape[2], c3.shape[3]) - _c3 = resize(_c3, size=c1.size()[2:],mode='bilinear',align_corners=False) + _c3 = self.linear_c3(c3).permute(0, 2, 1).reshape(n, -1, c3.shape[2], c3.shape[3]) + _c3 = resize(_c3, size=c1.size()[2:], mode='bilinear', align_corners=False) - _c2 = self.linear_c2(c2).permute(0,2,1).reshape(n, -1, c2.shape[2], c2.shape[3]) - _c2 = resize(_c2, size=c1.size()[2:],mode='bilinear',align_corners=False) - - _c1 = self.linear_c1(c1).permute(0,2,1).reshape(n, -1, c1.shape[2], c1.shape[3]) + _c2 = self.linear_c2(c2).permute(0, 2, 1).reshape(n, -1, c2.shape[2], c2.shape[3]) + _c2 = resize(_c2, size=c1.size()[2:], mode='bilinear', align_corners=False) + _c1 = self.linear_c1(c1).permute(0, 2, 1).reshape(n, -1, c1.shape[2], c1.shape[3]) _c = self.linear_fuse(torch.cat([_c4, _c3, _c2, _c1], dim=1)) - x = self.dropout(_c) x = self.linear_pred(x) diff --git a/configs/main_conf.yml b/configs/main_conf.yml index bc26281..f424aa3 100644 --- a/configs/main_conf.yml +++ b/configs/main_conf.yml @@ -1,7 +1,7 @@ tag: spleen_segformer_2d network: - type: segformer # Name of the network - config: # Configuration of the network + config: # Configuration of the network loss_function: cross_entropy learning_rate: 0.5 epochs: 100 diff --git a/data/ADEChallengeData2016/annotations/training/ADE_train_00000001.png b/data/ADEChallengeData2016/annotations/training/ADE_train_00000001.png deleted file mode 100644 index 2dcde04..0000000 Binary files a/data/ADEChallengeData2016/annotations/training/ADE_train_00000001.png and /dev/null differ diff --git a/data/ADEChallengeData2016/annotations/validation/ADE_val_00000001.png b/data/ADEChallengeData2016/annotations/validation/ADE_val_00000001.png deleted file mode 100644 index 8a10162..0000000 Binary files a/data/ADEChallengeData2016/annotations/validation/ADE_val_00000001.png and /dev/null differ diff --git a/data/ADEChallengeData2016/images/training/ADE_train_00010072.jpg b/data/ADEChallengeData2016/images/training/ADE_train_00010072.jpg deleted file mode 100644 index 9bb2cec..0000000 Binary files a/data/ADEChallengeData2016/images/training/ADE_train_00010072.jpg and /dev/null differ diff --git a/data/ADEChallengeData2016/images/validation/ADE_val_00001340.jpg b/data/ADEChallengeData2016/images/validation/ADE_val_00001340.jpg deleted file mode 100644 index afca240..0000000 Binary files a/data/ADEChallengeData2016/images/validation/ADE_val_00001340.jpg and /dev/null differ diff --git a/main.py b/main.py deleted file mode 100644 index dab8e2a..0000000 --- a/main.py +++ /dev/null @@ -1,51 +0,0 @@ -import traceback -import argparse -import numpy as np -from src import * -from typing import * - -# Color-logger is used to print colored messages to the console -logger = ColorLogger(logger_name='Main', color='yellow') - - -def get_args() -> argparse.Namespace: - """Set-up the argument parser - - Returns: - argparse.Namespace: - """ - parser = argparse.ArgumentParser( - description='Project 1 for the Deep Learning class (COSC 525). ' - 'Involves the development of a FeedForward Neural Network.', - add_help=False) - # Required Args - required_args = parser.add_argument_group('Required Arguments') - config_file_params = { - 'type': argparse.FileType('r'), - 'required': True, - 'help': "The path to the yaml configuration file." - } - required_args.add_argument('-c', '--config-file', **config_file_params) - # Optional args - optional_args = parser.add_argument_group('Optional Arguments') - optional_args.add_argument('-l', '--log', required=False, default='out.log', - help="Name of the output log file") - optional_args.add_argument("-h", "--help", action="help", help="Show this help message and exit") - - return parser.parse_args() - - -def main(): - """This is the main function of main.py - - Example: - python main.py --dataset xor --network 2x1_net --config confs/main_conf.yml - """ - - -if __name__ == '__main__': - try: - main() - except Exception as e: - logger.error(str(e) + '\n' + str(traceback.format_exc())) - raise e diff --git a/models/20220515_165920.log.json b/models/20220515_165920.log.json new file mode 100644 index 0000000..11e4a72 --- /dev/null +++ b/models/20220515_165920.log.json @@ -0,0 +1 @@ +{"env_info": "sys.platform: linux\nPython: 3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 06:04:18) [GCC 10.3.0]\nCUDA available: True\nGPU 0: Tesla T4\nCUDA_HOME: /usr/local/cuda\nNVCC: Build cuda_11.7.r11.7/compiler.31294372_0\nGCC: gcc (Debian 8.3.0-6) 8.3.0\nPyTorch: 1.7.0\nPyTorch compiling details: PyTorch built with:\n - GCC 7.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v1.6.0 (Git Hash 5ef631a030a6f73131c77892041042805a06064f)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 10.2\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_37,code=compute_37\n - CuDNN 7.6.5\n - Magma 2.5.2\n - Build settings: BLAS=MKL, BUILD_TYPE=Release, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DUSE_VULKAN_WRAPPER -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, USE_CUDA=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, \n\nTorchVision: 0.8.0\nOpenCV: 4.5.1\nMMCV: 1.3.0\nMMCV Compiler: GCC 7.3\nMMCV CUDA Compiler: 11.0\nMMSegmentation: 0.11.0+", "seed": null, "exp_name": "segformer.b1.512x512.ade.160k.py"} diff --git a/models/20220515_170407.log.json b/models/20220515_170407.log.json new file mode 100644 index 0000000..c271494 --- /dev/null +++ b/models/20220515_170407.log.json @@ -0,0 +1,6 @@ +{"env_info": "sys.platform: linux\nPython: 3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 06:04:18) [GCC 10.3.0]\nCUDA available: True\nGPU 0: Tesla T4\nCUDA_HOME: usr/local/cuda\nGCC: gcc (Debian 8.3.0-6) 8.3.0\nPyTorch: 1.7.0\nPyTorch compiling details: PyTorch built with:\n - GCC 7.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v1.6.0 (Git Hash 5ef631a030a6f73131c77892041042805a06064f)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 10.2\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_37,code=compute_37\n - CuDNN 7.6.5\n - Magma 2.5.2\n - Build settings: BLAS=MKL, BUILD_TYPE=Release, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DUSE_VULKAN_WRAPPER -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, USE_CUDA=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, \n\nTorchVision: 0.8.0\nOpenCV: 4.5.1\nMMCV: 1.3.0\nMMCV Compiler: GCC 7.3\nMMCV CUDA Compiler: 11.0\nMMSegmentation: 0.11.0+", "seed": null, "exp_name": "segformer.b1.512x512.ade.160k.py"} +{"mode": "train", "epoch": 1, "iter": 50, "lr": 0.0, "memory": 6463, "data_time": 0.00425, "decode.loss_seg": 4.00595, "decode.acc_seg": 0.3562, "loss": 4.00595, "time": 0.27083} +{"mode": "train", "epoch": 1, "iter": 100, "lr": 0.0, "memory": 6463, "data_time": 0.00232, "decode.loss_seg": 3.96242, "decode.acc_seg": 1.89888, "loss": 3.96242, "time": 0.24262} +{"mode": "train", "epoch": 1, "iter": 150, "lr": 1e-05, "memory": 6463, "data_time": 0.00263, "decode.loss_seg": 3.9038, "decode.acc_seg": 9.5145, "loss": 3.9038, "time": 0.24269} +{"mode": "train", "epoch": 1, "iter": 200, "lr": 1e-05, "memory": 6463, "data_time": 0.00254, "decode.loss_seg": 3.54471, "decode.acc_seg": 20.88363, "loss": 3.54471, "time": 0.24335} +{"mode": "train", "epoch": 1, "iter": 250, "lr": 1e-05, "memory": 6463, "data_time": 0.00237, "decode.loss_seg": 3.22963, "decode.acc_seg": 21.13161, "loss": 3.22963, "time": 0.23734} diff --git a/trained_weights/ADE20K/segformer.b0.512x512.ade.160k.pth b/trained_weights/ADE20K/segformer.b0.512x512.ade.160k.pth index 8be1a15..665c129 100644 Binary files a/trained_weights/ADE20K/segformer.b0.512x512.ade.160k.pth and b/trained_weights/ADE20K/segformer.b0.512x512.ade.160k.pth differ