From ec24b9cb0940f84b43d2e3807dd8a602f47ed615 Mon Sep 17 00:00:00 2001 From: Jonathan Schnitzler Date: Thu, 28 Nov 2024 10:35:06 +0100 Subject: [PATCH] Extend cudaimgproc::demosaicing for f32 --- modules/cudaimgproc/src/color.cpp | 33 ++++++++++++++----- modules/cudaimgproc/src/cuda/debayer.cu | 44 +++++++++++++++++++++---- 2 files changed, 61 insertions(+), 16 deletions(-) diff --git a/modules/cudaimgproc/src/color.cpp b/modules/cudaimgproc/src/color.cpp index cad5dcd394d..ed7595e6258 100644 --- a/modules/cudaimgproc/src/color.cpp +++ b/modules/cudaimgproc/src/color.cpp @@ -72,6 +72,8 @@ namespace cv { namespace cuda { template void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); + + void MHCdemosaic_float3(PtrStepSzf src, int2 sourceOffset, PtrStepSzf dst, int2 firstRed, cudaStream_t stream); } }} @@ -2136,7 +2138,7 @@ void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, GpuMat src = _src.getGpuMat(); const int depth = _src.depth(); - CV_Assert( depth == CV_8U ); + CV_Assert( depth == CV_8U || (depth == CV_32F && dcn == 3) ); CV_Assert( src.channels() == 1 ); CV_Assert( dcn == 3 || dcn == 4 ); @@ -2145,18 +2147,31 @@ void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, dst.setTo(Scalar::all(0), stream); - Size wholeSize; - Point ofs; - src.locateROI(wholeSize, ofs); - PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step); - const int2 firstRed = make_int2(code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGB2BGR_MHT ? 0 : 1, code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1); - if (dcn == 3) - cv::cuda::device::MHCdemosaic<3>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + if (depth == CV_8U) + { + Size wholeSize; + Point ofs; + src.locateROI(wholeSize, ofs); + PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step); + + if (dcn == 3) + cv::cuda::device::MHCdemosaic<3>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + else + cv::cuda::device::MHCdemosaic<4>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + } + // depth == CV_32F && dcn == 3 else - cv::cuda::device::MHCdemosaic<4>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + { + Size wholeSize; + Point ofs; + src.locateROI(wholeSize, ofs); + PtrStepSzf srcWhole(wholeSize.height, wholeSize.width, (float*)src.datastart, src.step); + + cv::cuda::device::MHCdemosaic_float3(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); + } break; } diff --git a/modules/cudaimgproc/src/cuda/debayer.cu b/modules/cudaimgproc/src/cuda/debayer.cu index bfe4b6f5ea8..bf91654ddf0 100644 --- a/modules/cudaimgproc/src/cuda/debayer.cu +++ b/modules/cudaimgproc/src/cuda/debayer.cu @@ -382,6 +382,14 @@ namespace cv { namespace cuda { namespace device template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); + template __device__ __forceinline__ D toDstColor(const float3& pix){ + return toDst(make_uchar3(saturate_cast(pix.x), saturate_cast(pix.y), saturate_cast(pix.z))); + } + template <> __device__ __forceinline__ float3 toDstColor(const float3& pix) + { + return pix; + } + ////////////////////////////////////////////////////////////// // Bayer Demosaicing (Malvar, He, and Cutler) // @@ -507,16 +515,15 @@ namespace cv { namespace cuda { namespace device alternate.y = (y + firstRed.y) % 2; // in BGR sequence; - uchar3 pixelColor = + float3 pixelColor = (alternate.y == 0) ? ((alternate.x == 0) ? - make_uchar3(saturate_cast(PATTERN.y), saturate_cast(PATTERN.x), saturate_cast(C)) : - make_uchar3(saturate_cast(PATTERN.w), saturate_cast(C), saturate_cast(PATTERN.z))) : + make_float3(PATTERN.y, PATTERN.x, C) : + make_float3(PATTERN.w, C, PATTERN.z)) : ((alternate.x == 0) ? - make_uchar3(saturate_cast(PATTERN.z), saturate_cast(C), saturate_cast(PATTERN.w)) : - make_uchar3(saturate_cast(C), saturate_cast(PATTERN.x), saturate_cast(PATTERN.y))); - - dst(y, x) = toDst(pixelColor); + make_float3(PATTERN.z, C, PATTERN.w) : + make_float3(C, PATTERN.x, PATTERN.y)); + dst(y, x) = toDstColor(pixelColor); } template @@ -545,6 +552,29 @@ namespace cv { namespace cuda { namespace device template void MHCdemosaic<1>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); template void MHCdemosaic<3>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); template void MHCdemosaic<4>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); + + // Implement MHCdemosaic for float and with a result of 3 channels + void MHCdemosaic_float3(PtrStepSzf src, int2 sourceOffset, PtrStepSzf dst, int2 firstRed, cudaStream_t stream) + { + typedef typename TypeVec::vec_type dst_t; + + const dim3 block(32, 8); + const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); + + if (sourceOffset.x || sourceOffset.y) { + cv::cudev::TextureOff texSrc(src, sourceOffset.y, sourceOffset.x); + MHCdemosaic><<>>((PtrStepSz)dst, texSrc, firstRed); + } + else { + cv::cudev::Texture texSrc(src); + MHCdemosaic><<>>((PtrStepSz)dst, texSrc, firstRed); + } + + cudaSafeCall( cudaGetLastError() ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } }}} #endif /* CUDA_DISABLER */