diff --git a/Makefile b/Makefile
index f7ea8b6..78d0a0a 100644
--- a/Makefile
+++ b/Makefile
@@ -73,7 +73,7 @@ setup_dev:
 requirements:
 	#cd mmcv && CC=clang CXX=clang++ CFLAGS='-stdlib=libc++' MMCV_WITH_OPS=1 $(BIN)/pip install -e .
 	pip install -r requirements.txt
-	conda install pytorch==1.7.0 torchvision==0.8.0 torchaudio==0.7.0 cudatoolkit=11.0 -c pytorch -y
+	conda install pytorch==1.7.0 torchvision==0.8.0 torchaudio==0.7.0 cudatoolkit=10.2 -c pytorch -y
 	pip install "mmcv-full>=1.1.4,<=1.3.0" -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html
 	cd SegFormer && pip install -e .
 clone_segformer:
diff --git a/SegFormer/local_configs/segformer/B1/segformer.b1.512x512.ade.160k.py b/SegFormer/local_configs/segformer/B1/segformer.b1.512x512.ade.160k.py
index 1b68633..0f7fca0 100644
--- a/SegFormer/local_configs/segformer/B1/segformer.b1.512x512.ade.160k.py
+++ b/SegFormer/local_configs/segformer/B1/segformer.b1.512x512.ade.160k.py
@@ -10,7 +10,7 @@
 find_unused_parameters = True
 model = dict(
     type='EncoderDecoder',
-    pretrained='pretrained/mit_b1.pth',
+    pretrained='../../pretrained/ImageNet-1K/mit_b5.pth',
     backbone=dict(
         type='mit_b1',
         style='pytorch'),
diff --git a/SegFormer/local_configs/segformer/MSD/segformer.b1.512x512.MSD.40k.py b/SegFormer/local_configs/segformer/MSD/segformer.b1.512x512.MSD.40k.py
new file mode 100644
index 0000000..8757b57
--- /dev/null
+++ b/SegFormer/local_configs/segformer/MSD/segformer.b1.512x512.MSD.40k.py
@@ -0,0 +1,48 @@
+_base_ = [
+    '../../_base_/models/segformer.py',
+    '../../_base_/datasets/ade20k_repeat.py',
+    '../../_base_/default_runtime.py',
+    '../../_base_/schedules/schedule_40k_adamw.py'
+]
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+find_unused_parameters = True
+model = dict(
+    type='EncoderDecoder',
+    pretrained='../../pretrained/ImageNet-1K/mit_b5.pth',
+    backbone=dict(
+        type='mit_b5',
+        style='pytorch'),
+    decode_head=dict(
+        type='SegFormerHead',
+        in_channels=[64, 128, 320, 512],
+        in_index=[0, 1, 2, 3],
+        feature_strides=[4, 8, 16, 32],
+        channels=128,
+        dropout_ratio=0.1,
+        num_classes=150,
+        norm_cfg=norm_cfg,
+        align_corners=False,
+        decoder_params=dict(embed_dim=256),
+        loss_decode=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='whole'))
+
+# optimizer
+optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
+                 paramwise_cfg=dict(custom_keys={'pos_block': dict(decay_mult=0.),
+                                                 'norm': dict(decay_mult=0.),
+                                                 'head': dict(lr_mult=10.)
+                                                 }))
+
+lr_config = dict(_delete_=True, policy='poly',
+                 warmup='linear',
+                 warmup_iters=1500,
+                 warmup_ratio=1e-6,
+                 power=1.0, min_lr=0.0, by_epoch=False)
+
+
+data = dict(samples_per_gpu=2)
+evaluation = dict(interval=16000, metric='mIoU')
diff --git a/SegFormer/mmseg/models/decode_heads/decode_head.py b/SegFormer/mmseg/models/decode_heads/decode_head.py
index 22af5ab..3ff58c1 100755
--- a/SegFormer/mmseg/models/decode_heads/decode_head.py
+++ b/SegFormer/mmseg/models/decode_heads/decode_head.py
@@ -213,7 +213,7 @@ def cls_seg(self, feat):
         output = self.conv_seg(feat)
         return output
 
-    @force_fp32(apply_to=('seg_logit', ))
+    @force_fp32(apply_to=('seg_logit',))
     def losses(self, seg_logit, seg_label):
         """Compute segmentation loss."""
         loss = dict()
diff --git a/SegFormer/mmseg/models/decode_heads/segformer_head.py b/SegFormer/mmseg/models/decode_heads/segformer_head.py
index 8ada6d7..29a3836 100644
--- a/SegFormer/mmseg/models/decode_heads/segformer_head.py
+++ b/SegFormer/mmseg/models/decode_heads/segformer_head.py
@@ -17,10 +17,12 @@
 
 from IPython import embed
 
+
 class MLP(nn.Module):
     """
     Linear Embedding
     """
+
     def __init__(self, input_dim=2048, embed_dim=768):
         super().__init__()
         self.proj = nn.Linear(input_dim, embed_dim)
@@ -36,6 +38,7 @@ class SegFormerHead(BaseDecodeHead):
     """
     SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers
     """
+
     def __init__(self, feature_strides, **kwargs):
         super(SegFormerHead, self).__init__(input_transform='multiple_select', **kwargs)
         assert len(feature_strides) == len(self.in_channels)
@@ -53,7 +56,7 @@ def __init__(self, feature_strides, **kwargs):
         self.linear_c1 = MLP(input_dim=c1_in_channels, embed_dim=embedding_dim)
 
         self.linear_fuse = ConvModule(
-            in_channels=embedding_dim*4,
+            in_channels=embedding_dim * 4,
             out_channels=embedding_dim,
             kernel_size=1,
             norm_cfg=dict(type='SyncBN', requires_grad=True)
@@ -68,19 +71,17 @@ def forward(self, inputs):
         ############## MLP decoder on C1-C4 ###########
         n, _, h, w = c4.shape
 
-        _c4 = self.linear_c4(c4).permute(0,2,1).reshape(n, -1, c4.shape[2], c4.shape[3])
-        _c4 = resize(_c4, size=c1.size()[2:],mode='bilinear',align_corners=False)
+        _c4 = self.linear_c4(c4).permute(0, 2, 1).reshape(n, -1, c4.shape[2], c4.shape[3])
+        _c4 = resize(_c4, size=c1.size()[2:], mode='bilinear', align_corners=False)
 
-        _c3 = self.linear_c3(c3).permute(0,2,1).reshape(n, -1, c3.shape[2], c3.shape[3])
-        _c3 = resize(_c3, size=c1.size()[2:],mode='bilinear',align_corners=False)
+        _c3 = self.linear_c3(c3).permute(0, 2, 1).reshape(n, -1, c3.shape[2], c3.shape[3])
+        _c3 = resize(_c3, size=c1.size()[2:], mode='bilinear', align_corners=False)
 
-        _c2 = self.linear_c2(c2).permute(0,2,1).reshape(n, -1, c2.shape[2], c2.shape[3])
-        _c2 = resize(_c2, size=c1.size()[2:],mode='bilinear',align_corners=False)
-
-        _c1 = self.linear_c1(c1).permute(0,2,1).reshape(n, -1, c1.shape[2], c1.shape[3])
+        _c2 = self.linear_c2(c2).permute(0, 2, 1).reshape(n, -1, c2.shape[2], c2.shape[3])
+        _c2 = resize(_c2, size=c1.size()[2:], mode='bilinear', align_corners=False)
 
+        _c1 = self.linear_c1(c1).permute(0, 2, 1).reshape(n, -1, c1.shape[2], c1.shape[3])
         _c = self.linear_fuse(torch.cat([_c4, _c3, _c2, _c1], dim=1))
-
         x = self.dropout(_c)
         x = self.linear_pred(x)
 
diff --git a/configs/main_conf.yml b/configs/main_conf.yml
index bc26281..f424aa3 100644
--- a/configs/main_conf.yml
+++ b/configs/main_conf.yml
@@ -1,7 +1,7 @@
 tag: spleen_segformer_2d
 network:
   - type: segformer  # Name of the network
-    config:  # Configuration of the network
+    config: # Configuration of the network
       loss_function: cross_entropy
       learning_rate: 0.5
       epochs: 100
diff --git a/data/ADEChallengeData2016/annotations/training/ADE_train_00000001.png b/data/ADEChallengeData2016/annotations/training/ADE_train_00000001.png
deleted file mode 100644
index 2dcde04..0000000
Binary files a/data/ADEChallengeData2016/annotations/training/ADE_train_00000001.png and /dev/null differ
diff --git a/data/ADEChallengeData2016/annotations/validation/ADE_val_00000001.png b/data/ADEChallengeData2016/annotations/validation/ADE_val_00000001.png
deleted file mode 100644
index 8a10162..0000000
Binary files a/data/ADEChallengeData2016/annotations/validation/ADE_val_00000001.png and /dev/null differ
diff --git a/data/ADEChallengeData2016/images/training/ADE_train_00010072.jpg b/data/ADEChallengeData2016/images/training/ADE_train_00010072.jpg
deleted file mode 100644
index 9bb2cec..0000000
Binary files a/data/ADEChallengeData2016/images/training/ADE_train_00010072.jpg and /dev/null differ
diff --git a/data/ADEChallengeData2016/images/validation/ADE_val_00001340.jpg b/data/ADEChallengeData2016/images/validation/ADE_val_00001340.jpg
deleted file mode 100644
index afca240..0000000
Binary files a/data/ADEChallengeData2016/images/validation/ADE_val_00001340.jpg and /dev/null differ
diff --git a/main.py b/main.py
deleted file mode 100644
index dab8e2a..0000000
--- a/main.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import traceback
-import argparse
-import numpy as np
-from src import *
-from typing import *
-
-# Color-logger is used to print colored messages to the console
-logger = ColorLogger(logger_name='Main', color='yellow')
-
-
-def get_args() -> argparse.Namespace:
-    """Set-up the argument parser
-
-    Returns:
-        argparse.Namespace:
-    """
-    parser = argparse.ArgumentParser(
-        description='Project 1 for the Deep Learning class (COSC 525). '
-                    'Involves the development of a FeedForward Neural Network.',
-        add_help=False)
-    # Required Args
-    required_args = parser.add_argument_group('Required Arguments')
-    config_file_params = {
-        'type': argparse.FileType('r'),
-        'required': True,
-        'help': "The path to the yaml configuration file."
-    }
-    required_args.add_argument('-c', '--config-file', **config_file_params)
-    # Optional args
-    optional_args = parser.add_argument_group('Optional Arguments')
-    optional_args.add_argument('-l', '--log', required=False, default='out.log',
-                               help="Name of the output log file")
-    optional_args.add_argument("-h", "--help", action="help", help="Show this help message and exit")
-
-    return parser.parse_args()
-
-
-def main():
-    """This is the main function of main.py
-
-    Example:
-        python main.py --dataset xor --network 2x1_net --config confs/main_conf.yml
-    """
-
-
-if __name__ == '__main__':
-    try:
-        main()
-    except Exception as e:
-        logger.error(str(e) + '\n' + str(traceback.format_exc()))
-        raise e
diff --git a/models/20220515_165920.log.json b/models/20220515_165920.log.json
new file mode 100644
index 0000000..11e4a72
--- /dev/null
+++ b/models/20220515_165920.log.json
@@ -0,0 +1 @@
+{"env_info": "sys.platform: linux\nPython: 3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 06:04:18) [GCC 10.3.0]\nCUDA available: True\nGPU 0: Tesla T4\nCUDA_HOME: /usr/local/cuda\nNVCC: Build cuda_11.7.r11.7/compiler.31294372_0\nGCC: gcc (Debian 8.3.0-6) 8.3.0\nPyTorch: 1.7.0\nPyTorch compiling details: PyTorch built with:\n  - GCC 7.3\n  - C++ Version: 201402\n  - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications\n  - Intel(R) MKL-DNN v1.6.0 (Git Hash 5ef631a030a6f73131c77892041042805a06064f)\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n  - NNPACK is enabled\n  - CPU capability usage: AVX2\n  - CUDA Runtime 10.2\n  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_37,code=compute_37\n  - CuDNN 7.6.5\n  - Magma 2.5.2\n  - Build settings: BLAS=MKL, BUILD_TYPE=Release, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DUSE_VULKAN_WRAPPER -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, USE_CUDA=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, \n\nTorchVision: 0.8.0\nOpenCV: 4.5.1\nMMCV: 1.3.0\nMMCV Compiler: GCC 7.3\nMMCV CUDA Compiler: 11.0\nMMSegmentation: 0.11.0+", "seed": null, "exp_name": "segformer.b1.512x512.ade.160k.py"}
diff --git a/models/20220515_170407.log.json b/models/20220515_170407.log.json
new file mode 100644
index 0000000..c271494
--- /dev/null
+++ b/models/20220515_170407.log.json
@@ -0,0 +1,6 @@
+{"env_info": "sys.platform: linux\nPython: 3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 06:04:18) [GCC 10.3.0]\nCUDA available: True\nGPU 0: Tesla T4\nCUDA_HOME: usr/local/cuda\nGCC: gcc (Debian 8.3.0-6) 8.3.0\nPyTorch: 1.7.0\nPyTorch compiling details: PyTorch built with:\n  - GCC 7.3\n  - C++ Version: 201402\n  - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications\n  - Intel(R) MKL-DNN v1.6.0 (Git Hash 5ef631a030a6f73131c77892041042805a06064f)\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n  - NNPACK is enabled\n  - CPU capability usage: AVX2\n  - CUDA Runtime 10.2\n  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_37,code=compute_37\n  - CuDNN 7.6.5\n  - Magma 2.5.2\n  - Build settings: BLAS=MKL, BUILD_TYPE=Release, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DUSE_VULKAN_WRAPPER -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, USE_CUDA=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, \n\nTorchVision: 0.8.0\nOpenCV: 4.5.1\nMMCV: 1.3.0\nMMCV Compiler: GCC 7.3\nMMCV CUDA Compiler: 11.0\nMMSegmentation: 0.11.0+", "seed": null, "exp_name": "segformer.b1.512x512.ade.160k.py"}
+{"mode": "train", "epoch": 1, "iter": 50, "lr": 0.0, "memory": 6463, "data_time": 0.00425, "decode.loss_seg": 4.00595, "decode.acc_seg": 0.3562, "loss": 4.00595, "time": 0.27083}
+{"mode": "train", "epoch": 1, "iter": 100, "lr": 0.0, "memory": 6463, "data_time": 0.00232, "decode.loss_seg": 3.96242, "decode.acc_seg": 1.89888, "loss": 3.96242, "time": 0.24262}
+{"mode": "train", "epoch": 1, "iter": 150, "lr": 1e-05, "memory": 6463, "data_time": 0.00263, "decode.loss_seg": 3.9038, "decode.acc_seg": 9.5145, "loss": 3.9038, "time": 0.24269}
+{"mode": "train", "epoch": 1, "iter": 200, "lr": 1e-05, "memory": 6463, "data_time": 0.00254, "decode.loss_seg": 3.54471, "decode.acc_seg": 20.88363, "loss": 3.54471, "time": 0.24335}
+{"mode": "train", "epoch": 1, "iter": 250, "lr": 1e-05, "memory": 6463, "data_time": 0.00237, "decode.loss_seg": 3.22963, "decode.acc_seg": 21.13161, "loss": 3.22963, "time": 0.23734}
diff --git a/trained_weights/ADE20K/segformer.b0.512x512.ade.160k.pth b/trained_weights/ADE20K/segformer.b0.512x512.ade.160k.pth
index 8be1a15..665c129 100644
Binary files a/trained_weights/ADE20K/segformer.b0.512x512.ade.160k.pth and b/trained_weights/ADE20K/segformer.b0.512x512.ade.160k.pth differ