threestudio-project · Royalvice · Jan 12, 2024 · Jan 12, 2024
diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md
@@ -143,7 +143,7 @@ Geometry models properties for locations in space, including density, SDF, featu
 | ------------------------------------ | ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | radius                               | float             | Half side length of the scene bounding box. Default: 1.0                                                                                                                                                                                                                         |
 | isosurface                           | bool              | Whether to enable surface extraction. Default: True                                                                                                                                                                                                                              |
-| isosusrface_method                   | str               | Method for surface extraction, in ["mc", "mt"]. "mc" uses the marching cubes algorithm, not differentiable; "mt" uses the marching tetrahedra algorithm, differentiable. Default: "mt"                                                                                           |
+| isosurface_method                   | str               | Method for surface extraction, in ["mc", "mt"]. "mc" uses the marching cubes algorithm, not differentiable; "mt" uses the marching tetrahedra algorithm, differentiable. Default: "mt"                                                                                           |
 | isosurface_resolution                | int               | Grid resolution for surface extraction. Default: 128                                                                                                                                                                                                                             |
 | isosurface_threshold                 | Union[float,str]  | The threshold value to determine the surface location of the implicit field, in [float, "auto"]. If "auto", use the mean value of the field as the threshold. Default: 0                                                                                                         |
 | isosurface_chunk                     | int               | Chunk size when computing the field value on grid vertices, used to prevent OOM. If 0, does not use chunking. Default: 0                                                                                                                                                         |

diff --git a/configs/magic123-coarse-sd.yaml b/configs/magic123-coarse-sd.yaml
@@ -111,14 +111,20 @@ system:
   loss:
     lambda_rgb: 1000.
     lambda_mask: 100.
+    lambda_depth: 0.
+    lambda_depth_rel: 0.
     lambda_sd: 0.025
     lambda_3d_sd: 1.
     lambda_sd_img: 0.
     lambda_orient: 0.
+    lambda_normal: 0.
+    lambda_3d_normal_smooth: [ 100, 7.0, 5.0, 150, 10.0, 200 ]
     lambda_normal_smoothness_2d: 1000.
     lambda_sparsity: 0.
     lambda_opaque: 0.
 
+
+
   optimizer:
     name: Adam
     args:

diff --git a/configs/magic123-hifa-coarse-sd.yaml b/configs/magic123-hifa-coarse-sd.yaml
@@ -111,10 +111,14 @@ system:
   loss:
     lambda_rgb: 1000.
     lambda_mask: 100.
+    lambda_depth: 0.
+    lambda_depth_rel: 0.
     lambda_sd: 0.025
     lambda_3d_sd: 1.
     lambda_sd_img: 0.00025
     lambda_orient: 0.
+    lambda_normal: 0.
+    lambda_3d_normal_smooth: [ 100, 7.0, 5.0, 150, 10.0, 200 ]
     lambda_normal_smoothness_2d: 1000.
     lambda_sparsity: 0.
     lambda_opaque: 0.

diff --git a/threestudio/systems/magic123.py b/threestudio/systems/magic123.py
@@ -87,6 +87,42 @@ def training_step(self, batch, batch_idx):
                 )
 
         if not self.cfg.refinement:
+            # depth loss
+            if self.C(self.cfg.loss.lambda_depth) > 0:
+                valid_gt_depth = batch["ref_depth"][batch["mask"].squeeze(-1)].unsqueeze(1)
+                valid_pred_depth = out_input["depth"][batch["mask"]].unsqueeze(1)
+                with torch.no_grad():
+                    A = torch.cat(
+                        [valid_gt_depth, torch.ones_like(valid_gt_depth)], dim=-1
+                    )  # [B, 2]
+                    X = torch.linalg.lstsq(A, valid_pred_depth).solution  # [2, 1]
+                    valid_gt_depth = A @ X  # [B, 1]
+                loss_depth = F.mse_loss(valid_pred_depth, valid_gt_depth)
+                self.log("train/loss_depth", loss_depth)
+                loss += loss_depth * self.C(self.cfg.loss.lambda_depth)
+
+            # relative depth loss
+            if self.C(self.cfg.loss.lambda_depth_rel) > 0:
+                valid_gt_depth = batch["ref_depth"][batch["mask"].squeeze(-1)]  # [B,]
+                valid_pred_depth = out_input["depth"][batch["mask"]]  # [B,]
+                loss_depth_rel = 1 - self.pearson(valid_pred_depth, valid_gt_depth)
+                self.log("train/loss_relative_depth", loss_depth_rel)
+                loss += loss_depth_rel * self.C(self.cfg.loss.lambda_depth_rel)
+
+            # normal loss
+            if self.C(self.cfg.loss.lambda_normal) > 0:
+                valid_gt_normal = (
+                    1 - 2 * batch["ref_normal"][batch["mask"].squeeze(-1)]
+                )  # [B, 3]
+                valid_pred_normal = (
+                    2 * out_input["comp_normal"][batch["mask"].squeeze(-1)] - 1
+                )  # [B, 3]
+                loss_normal = 1 - F.cosine_similarity(
+                    valid_pred_normal, valid_gt_normal
+                ).mean()
+                self.log("train/loss_normal", loss_normal)
+                loss += loss_normal * self.C(self.cfg.loss.lambda_normal)
+
             if self.C(self.cfg.loss.lambda_orient) > 0:
                 if "normal" not in out:
                     raise ValueError(