diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md index e0067547..06ad8ce4 100644 --- a/DOCUMENTATION.md +++ b/DOCUMENTATION.md @@ -143,7 +143,7 @@ Geometry models properties for locations in space, including density, SDF, featu | ------------------------------------ | ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | radius | float | Half side length of the scene bounding box. Default: 1.0 | | isosurface | bool | Whether to enable surface extraction. Default: True | -| isosusrface_method | str | Method for surface extraction, in ["mc", "mt"]. "mc" uses the marching cubes algorithm, not differentiable; "mt" uses the marching tetrahedra algorithm, differentiable. Default: "mt" | +| isosurface_method | str | Method for surface extraction, in ["mc", "mt"]. "mc" uses the marching cubes algorithm, not differentiable; "mt" uses the marching tetrahedra algorithm, differentiable. Default: "mt" | | isosurface_resolution | int | Grid resolution for surface extraction. Default: 128 | | isosurface_threshold | Union[float,str] | The threshold value to determine the surface location of the implicit field, in [float, "auto"]. If "auto", use the mean value of the field as the threshold. Default: 0 | | isosurface_chunk | int | Chunk size when computing the field value on grid vertices, used to prevent OOM. If 0, does not use chunking. Default: 0 | diff --git a/configs/magic123-coarse-sd.yaml b/configs/magic123-coarse-sd.yaml index 5fd06fa5..15ad08e1 100644 --- a/configs/magic123-coarse-sd.yaml +++ b/configs/magic123-coarse-sd.yaml @@ -111,14 +111,20 @@ system: loss: lambda_rgb: 1000. lambda_mask: 100. + lambda_depth: 0. + lambda_depth_rel: 0. lambda_sd: 0.025 lambda_3d_sd: 1. lambda_sd_img: 0. lambda_orient: 0. + lambda_normal: 0. + lambda_3d_normal_smooth: [ 100, 7.0, 5.0, 150, 10.0, 200 ] lambda_normal_smoothness_2d: 1000. lambda_sparsity: 0. lambda_opaque: 0. + + optimizer: name: Adam args: diff --git a/configs/magic123-hifa-coarse-sd.yaml b/configs/magic123-hifa-coarse-sd.yaml index fb96eabf..766926b8 100644 --- a/configs/magic123-hifa-coarse-sd.yaml +++ b/configs/magic123-hifa-coarse-sd.yaml @@ -111,10 +111,14 @@ system: loss: lambda_rgb: 1000. lambda_mask: 100. + lambda_depth: 0. + lambda_depth_rel: 0. lambda_sd: 0.025 lambda_3d_sd: 1. lambda_sd_img: 0.00025 lambda_orient: 0. + lambda_normal: 0. + lambda_3d_normal_smooth: [ 100, 7.0, 5.0, 150, 10.0, 200 ] lambda_normal_smoothness_2d: 1000. lambda_sparsity: 0. lambda_opaque: 0. diff --git a/threestudio/systems/magic123.py b/threestudio/systems/magic123.py index 12551518..57bb98ab 100644 --- a/threestudio/systems/magic123.py +++ b/threestudio/systems/magic123.py @@ -87,6 +87,42 @@ def training_step(self, batch, batch_idx): ) if not self.cfg.refinement: + # depth loss + if self.C(self.cfg.loss.lambda_depth) > 0: + valid_gt_depth = batch["ref_depth"][batch["mask"].squeeze(-1)].unsqueeze(1) + valid_pred_depth = out_input["depth"][batch["mask"]].unsqueeze(1) + with torch.no_grad(): + A = torch.cat( + [valid_gt_depth, torch.ones_like(valid_gt_depth)], dim=-1 + ) # [B, 2] + X = torch.linalg.lstsq(A, valid_pred_depth).solution # [2, 1] + valid_gt_depth = A @ X # [B, 1] + loss_depth = F.mse_loss(valid_pred_depth, valid_gt_depth) + self.log("train/loss_depth", loss_depth) + loss += loss_depth * self.C(self.cfg.loss.lambda_depth) + + # relative depth loss + if self.C(self.cfg.loss.lambda_depth_rel) > 0: + valid_gt_depth = batch["ref_depth"][batch["mask"].squeeze(-1)] # [B,] + valid_pred_depth = out_input["depth"][batch["mask"]] # [B,] + loss_depth_rel = 1 - self.pearson(valid_pred_depth, valid_gt_depth) + self.log("train/loss_relative_depth", loss_depth_rel) + loss += loss_depth_rel * self.C(self.cfg.loss.lambda_depth_rel) + + # normal loss + if self.C(self.cfg.loss.lambda_normal) > 0: + valid_gt_normal = ( + 1 - 2 * batch["ref_normal"][batch["mask"].squeeze(-1)] + ) # [B, 3] + valid_pred_normal = ( + 2 * out_input["comp_normal"][batch["mask"].squeeze(-1)] - 1 + ) # [B, 3] + loss_normal = 1 - F.cosine_similarity( + valid_pred_normal, valid_gt_normal + ).mean() + self.log("train/loss_normal", loss_normal) + loss += loss_normal * self.C(self.cfg.loss.lambda_normal) + if self.C(self.cfg.loss.lambda_orient) > 0: if "normal" not in out: raise ValueError(