From a96727688d553d02f8893aa8eac4a83c333a0dfb Mon Sep 17 00:00:00 2001
From: Jin-Hwa Kim <j1nhwa.kim@navercorp.com>
Date: Fri, 16 Jun 2023 09:59:26 +0900
Subject: [PATCH] Fixing two issues in RayMarcher

This pull request addresses two issues that were identified in the RayMarcher code:

1. Random Order Aggregation in RayMarcher: Previously, the samples were being aggregated in a random order during the forward pass. To ensure correct calculations of dL_drays_o and dL_drays_d using rays_a, I have introduced a sorting mechanism to organize the aggregation process appropriately. Although this fix can be improved using CUDA implementation, it still perform well with reasonable speed.

2. Incorrect Gradient from Opacity: There was a problem with the gradient calculation from opacity, which has now been rectified. The changes made align the gradient calculation with the forward pass, ensuring accurate results.
These fixes enhance the functionality and accuracy of the RayMarcher implementation.
---
 models/csrc/setup.py           | 6 +++---
 models/csrc/volumerendering.cu | 5 +++--
 models/custom_functions.py     | 8 ++++++++
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/models/csrc/setup.py b/models/csrc/setup.py
index 0664eff9..f8324e53 100644
--- a/models/csrc/setup.py
+++ b/models/csrc/setup.py
@@ -13,9 +13,9 @@
 
 setup(
     name='vren',
-    version='2.0',
-    author='kwea123',
-    author_email='kwea123@gmail.com',
+    version='2.0.1',
+    author='kwea123, jnhwkim',
+    author_email='kwea123@gmail.com, jnhwkim@gmail.com',
     description='cuda volume rendering library',
     long_description='cuda volume rendering library',
     ext_modules=[
diff --git a/models/csrc/volumerendering.cu b/models/csrc/volumerendering.cu
index cb61bfaa..b89f7ea0 100644
--- a/models/csrc/volumerendering.cu
+++ b/models/csrc/volumerendering.cu
@@ -112,7 +112,7 @@ __global__ void composite_train_bw_kernel(
     int samples = 0;
     scalar_t R = rgb[ray_idx][0], G = rgb[ray_idx][1], B = rgb[ray_idx][2];
     scalar_t O = opacity[ray_idx], D = depth[ray_idx];
-    scalar_t T = 1.0f, r = 0.0f, g = 0.0f, b = 0.0f, d = 0.0f;
+    scalar_t T = 1.0f, r = 0.0f, g = 0.0f, b = 0.0f, d = 0.0f, o = 0.0f;
 
     // compute prefix sum of dL_dws * ws
     // [a0, a1, a2, a3, ...] -> [a0, a0+a1, a0+a1+a2, a0+a1+a2+a3, ...]
@@ -130,6 +130,7 @@ __global__ void composite_train_bw_kernel(
         r += w*rgbs[s][0]; g += w*rgbs[s][1]; b += w*rgbs[s][2];
         d += w*ts[s];
         T *= 1.0f-a;
+        o += w;
 
         // compute gradients by math...
         dL_drgbs[s][0] = dL_drgb[ray_idx][0]*w;
@@ -140,7 +141,7 @@ __global__ void composite_train_bw_kernel(
             dL_drgb[ray_idx][0]*(rgbs[s][0]*T-(R-r)) + 
             dL_drgb[ray_idx][1]*(rgbs[s][1]*T-(G-g)) + 
             dL_drgb[ray_idx][2]*(rgbs[s][2]*T-(B-b)) + // gradients from rgb
-            dL_dopacity[ray_idx]*(1-O) + // gradient from opacity
+            dL_dopacity[ray_idx]*(T-(O-o)) + // gradient from opacity
             dL_ddepth[ray_idx]*(ts[s]*T-(D-d)) + // gradient from depth
             T*dL_dws[s]-(dL_dws_times_ws_sum-dL_dws_times_ws[s]) // gradient from ws
         );
diff --git a/models/custom_functions.py b/models/custom_functions.py
index 222a8d7e..6c99e64a 100644
--- a/models/custom_functions.py
+++ b/models/custom_functions.py
@@ -104,11 +104,19 @@ def forward(ctx, rays_o, rays_d, hits_t,
     def backward(ctx, dL_drays_a, dL_dxyzs, dL_ddirs,
                  dL_ddeltas, dL_dts, dL_dtotal_samples):
         rays_a, ts = ctx.saved_tensors
+
+        _, indices = rays_a[:,1].sort()  # sort by sample idx for segment_csr
+        rays_a = rays_a[indices]
+        _, indices = rays_a[:,0].sort()  # sort by ray idx for input grads
+
         segments = torch.cat([rays_a[:, 1], rays_a[-1:, 1]+rays_a[-1:, 2]])
         dL_drays_o = segment_csr(dL_dxyzs, segments)
         dL_drays_d = \
             segment_csr(dL_dxyzs*rearrange(ts, 'n -> n 1')+dL_ddirs, segments)
 
+        dL_drays_o = dL_drays_o[indices]
+        dL_drays_d = dL_drays_d[indices]
+
         return dL_drays_o, dL_drays_d, None, None, None, None, None, None, None