facebookresearch · schmidt-ai · Oct 11, 2023 · Oct 11, 2023 · Oct 11, 2023 · Oct 11, 2023
diff --git a/.gitignore b/.gitignore
@@ -9,3 +9,6 @@ dist/
 *.swp
 
 .vscode/
+
+.DS_Store
+*venv*
diff --git a/dinov2/data/masking.py b/dinov2/data/masking.py
@@ -31,17 +31,8 @@ def __init__(
         max_aspect = max_aspect or 1 / min_aspect
         self.log_aspect_ratio = (math.log(min_aspect), math.log(max_aspect))
 
-    def __repr__(self):
-        repr_str = "Generator(%d, %d -> [%d ~ %d], max = %d, %.3f ~ %.3f)" % (
-            self.height,
-            self.width,
-            self.min_num_patches,
-            self.max_num_patches,
-            self.num_masking_patches,
-            self.log_aspect_ratio[0],
-            self.log_aspect_ratio[1],
-        )
-        return repr_str
+    def __repr__(self) -> str:
+        return f"Generator({self.height}, {self.width} -> [{self.min_num_patches} ~ {self.max_num_patches}], max = {self.num_masking_patches}, {self.log_aspect_ratio[0]:.3f} ~ {self.log_aspect_ratio[1]:.3f})"
 
     def get_shape(self):
         return self.height, self.width

diff --git a/dinov2/fsdp/__init__.py b/dinov2/fsdp/__init__.py
@@ -152,6 +152,3 @@ def tag_last_checkpoint(self, last_filename_basename: str) -> None:
         save_file = os.path.join(self.save_dir, f"last_checkpoint.{rankstr()}")
         with self.path_manager.open(save_file, "w") as f:
             f.write(last_filename_basename)  # pyre-ignore
-
-
-ShardedGradScaler = ShardedGradScaler
diff --git a/dinov2/loss/dino_clstoken_loss.py b/dinov2/loss/dino_clstoken_loss.py
@@ -26,9 +26,10 @@ def __init__(
         self.async_batch_center = None
 
     @torch.no_grad()
-    def softmax_center_teacher(self, teacher_output, teacher_temp):
+    def softmax_center_teacher(self, teacher_output: torch.Tensor, teacher_temp):
         self.apply_center_update()
         # teacher centering and sharpening
+        self.center = self.center.to(device=teacher_output.device)
         return F.softmax((teacher_output - self.center) / teacher_temp, dim=-1)
 
     @torch.no_grad()

diff --git a/dinov2/loss/ibot_patch_loss.py b/dinov2/loss/ibot_patch_loss.py
@@ -43,7 +43,7 @@ def __init__(self, patch_out_dim, student_temp=0.1, center_momentum=0.9):
         self.async_batch_center = None
 
     @torch.no_grad()
-    def softmax_center_teacher(self, teacher_patch_tokens, teacher_temp):
+    def softmax_center_teacher(self, teacher_patch_tokens: torch.Tensor, teacher_temp):
         self.apply_center_update()
         # teacher centering and sharpening
         #
@@ -53,6 +53,7 @@ def softmax_center_teacher(self, teacher_patch_tokens, teacher_temp):
         # teacher_patch_tokens = teacher_patch_tokens.float()
         # return F.softmax((teacher_patch_tokens.sub_(self.center.to(teacher_patch_tokens.dtype))).mul_(1 / teacher_temp), dim=-1)
 
+        self.center = self.center.to(device=teacher_patch_tokens.device)
         return F.softmax((teacher_patch_tokens - self.center) / teacher_temp, dim=-1)
 
         # this is experimental, keep everything in float16 and let's see what happens:

diff --git a/dinov2/models/__init__.py b/dinov2/models/__init__.py
@@ -16,6 +16,7 @@ def build_model(args, only_teacher=False, img_size=224):
     if "vit" in args.arch:
         vit_kwargs = dict(
             img_size=img_size,
+            in_chans=args.in_chans,
             patch_size=args.patch_size,
             init_values=args.layerscale,
             ffn_layer=args.ffn_layer,
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,3 +9,6 @@ dist/ @@
     *.swp
     .vscode/
+    .DS_Store
+    *venv*