diff --git a/models/csrc/setup.py b/models/csrc/setup.py index 0664eff9..f8324e53 100644 --- a/models/csrc/setup.py +++ b/models/csrc/setup.py @@ -13,9 +13,9 @@ setup( name='vren', - version='2.0', - author='kwea123', - author_email='kwea123@gmail.com', + version='2.0.1', + author='kwea123, jnhwkim', + author_email='kwea123@gmail.com, jnhwkim@gmail.com', description='cuda volume rendering library', long_description='cuda volume rendering library', ext_modules=[ diff --git a/models/csrc/volumerendering.cu b/models/csrc/volumerendering.cu index cb61bfaa..b89f7ea0 100644 --- a/models/csrc/volumerendering.cu +++ b/models/csrc/volumerendering.cu @@ -112,7 +112,7 @@ __global__ void composite_train_bw_kernel( int samples = 0; scalar_t R = rgb[ray_idx][0], G = rgb[ray_idx][1], B = rgb[ray_idx][2]; scalar_t O = opacity[ray_idx], D = depth[ray_idx]; - scalar_t T = 1.0f, r = 0.0f, g = 0.0f, b = 0.0f, d = 0.0f; + scalar_t T = 1.0f, r = 0.0f, g = 0.0f, b = 0.0f, d = 0.0f, o = 0.0f; // compute prefix sum of dL_dws * ws // [a0, a1, a2, a3, ...] -> [a0, a0+a1, a0+a1+a2, a0+a1+a2+a3, ...] @@ -130,6 +130,7 @@ __global__ void composite_train_bw_kernel( r += w*rgbs[s][0]; g += w*rgbs[s][1]; b += w*rgbs[s][2]; d += w*ts[s]; T *= 1.0f-a; + o += w; // compute gradients by math... dL_drgbs[s][0] = dL_drgb[ray_idx][0]*w; @@ -140,7 +141,7 @@ __global__ void composite_train_bw_kernel( dL_drgb[ray_idx][0]*(rgbs[s][0]*T-(R-r)) + dL_drgb[ray_idx][1]*(rgbs[s][1]*T-(G-g)) + dL_drgb[ray_idx][2]*(rgbs[s][2]*T-(B-b)) + // gradients from rgb - dL_dopacity[ray_idx]*(1-O) + // gradient from opacity + dL_dopacity[ray_idx]*(T-(O-o)) + // gradient from opacity dL_ddepth[ray_idx]*(ts[s]*T-(D-d)) + // gradient from depth T*dL_dws[s]-(dL_dws_times_ws_sum-dL_dws_times_ws[s]) // gradient from ws ); diff --git a/models/custom_functions.py b/models/custom_functions.py index 222a8d7e..6c99e64a 100644 --- a/models/custom_functions.py +++ b/models/custom_functions.py @@ -104,11 +104,19 @@ def forward(ctx, rays_o, rays_d, hits_t, def backward(ctx, dL_drays_a, dL_dxyzs, dL_ddirs, dL_ddeltas, dL_dts, dL_dtotal_samples): rays_a, ts = ctx.saved_tensors + + _, indices = rays_a[:,1].sort() # sort by sample idx for segment_csr + rays_a = rays_a[indices] + _, indices = rays_a[:,0].sort() # sort by ray idx for input grads + segments = torch.cat([rays_a[:, 1], rays_a[-1:, 1]+rays_a[-1:, 2]]) dL_drays_o = segment_csr(dL_dxyzs, segments) dL_drays_d = \ segment_csr(dL_dxyzs*rearrange(ts, 'n -> n 1')+dL_ddirs, segments) + dL_drays_o = dL_drays_o[indices] + dL_drays_d = dL_drays_d[indices] + return dL_drays_o, dL_drays_d, None, None, None, None, None, None, None