Skip to content

Commit

Permalink
first proposal of cv::remap with relative displacement field (#24621,…
Browse files Browse the repository at this point in the history
… #24603)

CUDA implementation of the feature
  • Loading branch information
chacha21 committed Nov 30, 2023
1 parent daaf645 commit 85f7cd5
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 31 deletions.
88 changes: 60 additions & 28 deletions modules/cudawarping/src/cuda/remap.cu
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,23 @@ namespace cv { namespace cuda { namespace device
}
}

template <typename Ptr2D, typename T> __global__ void remap_relative(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, PtrStepSz<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;

if (x < dst.cols && y < dst.rows)
{
const float xcoo = x+mapx.ptr(y)[x];
const float ycoo = y+mapy.ptr(y)[x];

dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
}
}

template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
{
static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool)
static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool, bool isRelative)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;

Expand All @@ -81,14 +95,17 @@ namespace cv { namespace cuda { namespace device
BorderReader<PtrStep<T>, B<work_type>> brdSrc(src, brd);
Filter<BorderReader<PtrStep<T>, B<work_type>>> filter_src(brdSrc);

remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
if (isRelative)
remap_relative<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
else
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
cudaSafeCall( cudaGetLastError() );
}
};

template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, bool)
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, bool, bool isRelative)
{
CV_UNUSED(srcWhole);
CV_UNUSED(xoff);
Expand All @@ -102,7 +119,10 @@ namespace cv { namespace cuda { namespace device
BorderReader<PtrStep<T>, B<work_type>> brdSrc(src, brd);
Filter<BorderReader<PtrStep<T>, B<work_type>>> filter_src(brdSrc);

remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
if (isRelative)
remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
else
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
cudaSafeCall( cudaGetLastError() );

cudaSafeCall( cudaDeviceSynchronize() );
Expand All @@ -112,7 +132,7 @@ namespace cv { namespace cuda { namespace device
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStreamTex
{
static void call(PtrStepSz< T > src, PtrStepSz< T > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
PtrStepSz< T > dst, const float* borderValue, bool cc20)
PtrStepSz< T > dst, const float* borderValue, bool cc20, bool isRelative)
{
typedef typename TypeVec<float, VecTraits< T >::cn>::vec_type work_type;
dim3 block(32, cc20 ? 8 : 4);
Expand All @@ -123,15 +143,21 @@ namespace cv { namespace cuda { namespace device
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader<cudev::TexturePtr<T>, B<work_type>> brdSrc(texSrcWhole, brd);
Filter<BorderReader<cudev::TexturePtr<T>, B<work_type>>> filter_src(brdSrc);
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
if (isRelative)
remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
else
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);

}
else {
cudev::TextureOff<T> texSrcWhole(srcWhole, yoff, xoff);
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader<cudev::TextureOffPtr<T>, B<work_type>> brdSrc(texSrcWhole, brd);
Filter<BorderReader<cudev::TextureOffPtr<T>, B<work_type>>> filter_src(brdSrc);
remap<<<grid, block >>>(filter_src, mapx, mapy, dst);
if (isRelative)
remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
else
remap<<<grid, block >>>(filter_src, mapx, mapy, dst);
}

cudaSafeCall( cudaGetLastError() );
Expand All @@ -142,23 +168,29 @@ namespace cv { namespace cuda { namespace device
template <template <typename> class Filter, typename T> struct RemapDispatcherNonStreamTex<Filter, BrdReplicate, T>
{
static void call(PtrStepSz< T > src, PtrStepSz< T > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
PtrStepSz< T > dst, const float*, bool)
PtrStepSz< T > dst, const float*, bool, bool isRelative)
{
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows)
{
cudev::Texture<T> texSrcWhole(srcWhole);
Filter<cudev::TexturePtr<T>> filter_src(texSrcWhole);
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
if (isRelative)
remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
else
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
}
else
{
cudev::TextureOff<T> texSrcWhole(srcWhole, yoff, xoff);
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader<cudev::TextureOffPtr<T>, BrdReplicate<T>> brdSrc(texSrcWhole, brd);
Filter<BorderReader<cudev::TextureOffPtr<T>, BrdReplicate<T>>> filter_src(brdSrc);
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
if (isRelative)
remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
else
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
}
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
Expand Down Expand Up @@ -203,20 +235,20 @@ namespace cv { namespace cuda { namespace device
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20)
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative)
{
if (stream == 0)
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc20);
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc20, isRelative);
else
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc20);
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc20, isRelative);
}
};

template <typename T> void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20);
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);

static const caller_t callers[3][5] =
{
Expand Down Expand Up @@ -244,24 +276,24 @@ namespace cv { namespace cuda { namespace device
};

callers[interpolation][borderMode](static_cast<PtrStepSz<T>>(src), static_cast<PtrStepSz<T>>(srcWhole), xoff, yoff, xmap, ymap,
static_cast<PtrStepSz<T>>(dst), borderValue, stream, cc20);
static_cast<PtrStepSz<T>>(dst), borderValue, stream, cc20, isRelative);
}

template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);

template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);

template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);

template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
} // namespace imgproc
}}} // namespace cv { namespace cuda { namespace cudev

Expand Down
9 changes: 6 additions & 3 deletions modules/cudawarping/src/remap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,19 @@ namespace cv { namespace cuda { namespace device
{
template <typename T>
void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst,
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
}
}}}

void cv::cuda::remap(InputArray _src, OutputArray _dst, InputArray _xmap, InputArray _ymap, int interpolation, int borderMode, Scalar borderValue, Stream& stream)
{
using namespace cv::cuda::device::imgproc;

const bool hasRelativeFlag = ((interpolation & WARP_RELATIVE_MAP) != 0);
interpolation &= ~WARP_RELATIVE_MAP;

typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
static const func_t funcs[6][4] =
{
{remap_gpu<uchar> , 0 /*remap_gpu<uchar2>*/ , remap_gpu<uchar3> , remap_gpu<uchar4> },
Expand Down Expand Up @@ -98,7 +101,7 @@ void cv::cuda::remap(InputArray _src, OutputArray _dst, InputArray _xmap, InputA
src.locateROI(wholeSize, ofs);

func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap,
dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20), hasRelativeFlag);
}

#endif // HAVE_CUDA
Loading

0 comments on commit 85f7cd5

Please sign in to comment.