From 2d4055bcc869126e065d70e102f277e37b8cd979 Mon Sep 17 00:00:00 2001 From: Piotr Rybicki Date: Tue, 22 Nov 2022 13:56:26 +0100 Subject: [PATCH] Fix error when raytracing on empty scene (#55) * Fix error when raytracing on empty scene * Fix NaNs in non-hit XYZ * Update src/gpu/optixPrograms.cu Co-authored-by: Mateusz Szczygielski <112629916+msz-rai@users.noreply.github.com> Co-authored-by: Mateusz Szczygielski <112629916+msz-rai@users.noreply.github.com> --- src/DeviceBuffer.hpp | 9 +++---- src/gpu/RaytraceRequestContext.hpp | 5 ++-- src/gpu/optixPrograms.cu | 42 ++++++++++++++++-------------- src/graph/RaytraceNode.cpp | 4 +-- src/scene/Scene.cpp | 5 +++- 5 files changed, 35 insertions(+), 30 deletions(-) diff --git a/src/DeviceBuffer.hpp b/src/DeviceBuffer.hpp index a968b1cb..676052fc 100644 --- a/src/DeviceBuffer.hpp +++ b/src/DeviceBuffer.hpp @@ -116,17 +116,16 @@ struct DeviceBuffer private: bool ensureDeviceCanFit(std::size_t newElemCount) { - if (newElemCount == 0) { - auto msg = fmt::format("Attempted to allocate {} bytes of memory", newElemCount); - throw std::logic_error(msg); - } if (elemCapacity >= newElemCount) { return false; } if (data != nullptr) { CHECK_CUDA(cudaFree(data)); + data = nullptr; + } + if (newElemCount > 0) { + CHECK_CUDA(cudaMalloc(reinterpret_cast(&data), newElemCount * sizeof(T))); } - CHECK_CUDA(cudaMalloc(reinterpret_cast(&data), newElemCount * sizeof(T))); elemCapacity = newElemCount; return true; } diff --git a/src/gpu/RaytraceRequestContext.hpp b/src/gpu/RaytraceRequestContext.hpp index a0646675..2a84c5dd 100644 --- a/src/gpu/RaytraceRequestContext.hpp +++ b/src/gpu/RaytraceRequestContext.hpp @@ -36,6 +36,7 @@ struct RaytraceRequestContext Field::type* isHit; Field::type* rayIdx; Field::type* ringIdx; - Field::type* distanceIdx; - Field::type* intensityIdx; + Field::type* distance; + Field::type* intensity; }; +static_assert(std::is_trivially_copyable::value); diff --git a/src/gpu/optixPrograms.cu b/src/gpu/optixPrograms.cu index 69c89991..5bd65898 100644 --- a/src/gpu/optixPrograms.cu +++ b/src/gpu/optixPrograms.cu @@ -23,7 +23,6 @@ #include #include - extern "C" static __constant__ RaytraceRequestContext ctx; struct Vec3fPayload @@ -53,21 +52,14 @@ Vec3f decodePayloadVec3f(const Vec3fPayload& src) }; } - template __forceinline__ __device__ -void saveRayResult(Vec3f* xyz=nullptr) +void saveRayResult(const Vec3f* xyz=nullptr, const Vec3f* origin=nullptr) { const int rayIdx = optixGetLaunchIndex().x; - Vec3f origin = decodePayloadVec3f({ - optixGetPayload_0(), - optixGetPayload_1(), - optixGetPayload_2() - }); - if (ctx.xyz != nullptr) { - // Return actual XYZ of the hit point or infinity vector with signs of the ray. - ctx.xyz[rayIdx] = isFinite ? *xyz : ctx.rays[rayIdx] * Vec3f{CUDART_INF_F, CUDART_INF_F, CUDART_INF_F}; + // Return actual XYZ of the hit point or infinity vector. + ctx.xyz[rayIdx] = isFinite ? *xyz : Vec3f{CUDART_INF_F, CUDART_INF_F, CUDART_INF_F}; } if (ctx.isHit != nullptr) { ctx.isHit[rayIdx] = isFinite; @@ -78,21 +70,26 @@ void saveRayResult(Vec3f* xyz=nullptr) if (ctx.ringIdx != nullptr && ctx.ringIds != nullptr) { ctx.ringIdx[rayIdx] = ctx.ringIds[rayIdx % ctx.ringIdsCount]; } - if (ctx.distanceIdx != nullptr) { - ctx.distanceIdx[rayIdx] = isFinite - ? sqrt(pow( - (*xyz)[0] - origin[0], 2) + - pow((*xyz)[1] - origin[1], 2) + - pow((*xyz)[2] - origin[2], 2)) + if (ctx.distance != nullptr) { + ctx.distance[rayIdx] = isFinite + ? sqrt( + pow((*xyz)[0] - (*origin)[0], 2) + + pow((*xyz)[1] - (*origin)[1], 2) + + pow((*xyz)[2] - (*origin)[2], 2)) : CUDART_INF_F; } - if (ctx.intensityIdx != nullptr) { - ctx.intensityIdx[rayIdx] = 100; + if (ctx.intensity != nullptr) { + ctx.intensity[rayIdx] = 100; } } extern "C" __global__ void __raygen__() { + if (ctx.scene == 0) { + saveRayResult(); + return; + } + Mat3x4f ray = ctx.rays[optixGetLaunchIndex().x]; Vec3f origin = ray * Vec3f{0, 0, 0}; @@ -124,7 +121,12 @@ extern "C" __global__ void __closesthit__() Vec3f hitObject = Vec3f((1 - u - v) * A + u * B + v * C); Vec3f hitWorld = optixTransformPointFromObjectToWorldSpace(hitObject); - saveRayResult(&hitWorld); + Vec3f origin = decodePayloadVec3f({ + optixGetPayload_0(), + optixGetPayload_1(), + optixGetPayload_2() + }); + saveRayResult(&hitWorld, &origin); } extern "C" __global__ void __miss__() diff --git a/src/graph/RaytraceNode.cpp b/src/graph/RaytraceNode.cpp index 08de331c..aae32a14 100644 --- a/src/graph/RaytraceNode.cpp +++ b/src/graph/RaytraceNode.cpp @@ -57,8 +57,8 @@ void RaytraceNode::schedule(cudaStream_t stream) .isHit = getPtrTo(), .rayIdx = getPtrTo(), .ringIdx = getPtrTo(), - .distanceIdx = getPtrTo(), - .intensityIdx = getPtrTo(), + .distance = getPtrTo(), + .intensity = getPtrTo(), }; CUdeviceptr pipelineArgsPtr = requestCtx->getCUdeviceptr(); diff --git a/src/scene/Scene.cpp b/src/scene/Scene.cpp index 231e7814..e61f8d69 100644 --- a/src/scene/Scene.cpp +++ b/src/scene/Scene.cpp @@ -102,7 +102,7 @@ OptixShaderBindingTable Scene::buildSBT() .missRecordBase = dMissRecords.readDeviceRaw(), .missRecordStrideInBytes = sizeof(MissRecord), .missRecordCount = 1U, - .hitgroupRecordBase = dHitgroupRecords.readDeviceRaw(), + .hitgroupRecordBase = getObjectCount() > 0 ? dHitgroupRecords.readDeviceRaw() : static_cast(0), .hitgroupRecordStrideInBytes = sizeof(HitgroupRecord), .hitgroupRecordCount = static_cast(dHitgroupRecords.getElemCount()), }; @@ -110,6 +110,9 @@ OptixShaderBindingTable Scene::buildSBT() OptixTraversableHandle Scene::buildAS() { + if (getObjectCount() == 0) { + return static_cast(0); + } std::vector instances; for (auto&& entity : entities) { // TODO(prybicki): this is somewhat inefficient, because most of the time only transform changes.