Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

first proposal of cv::remap with relative displacement field (#24621,… #3600

Merged
merged 5 commits into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions modules/cudawarping/include/opencv2/cudawarping.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ namespace cv { namespace cuda {
@param ymap Y values. Only CV_32FC1 type is supported.
@param interpolation Interpolation method (see resize ). INTER_NEAREST , INTER_LINEAR and
INTER_CUBIC are supported for now.
The extra flag WARP_RELATIVE_MAP can be ORed to the interpolation method
(e.g. INTER_LINEAR | WARP_RELATIVE_MAP)
@param borderMode Pixel extrapolation method (see borderInterpolate ). BORDER_REFLECT101 ,
BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supported for now.
@param borderValue Value used in case of a constant border. By default, it is 0.
Expand All @@ -79,6 +81,10 @@ The function transforms the source image using the specified map:

\f[\texttt{dst} (x,y) = \texttt{src} (xmap(x,y), ymap(x,y))\f]

with the WARP_RELATIVE_MAP flag :

\f[\texttt{dst} (x,y) = \texttt{src} (x+map_x(x,y),y+map_y(x,y))\f]

Values of pixels with non-integer coordinates are computed using the bilinear interpolation.

@sa remap
Expand Down
88 changes: 60 additions & 28 deletions modules/cudawarping/src/cuda/remap.cu
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,23 @@ namespace cv { namespace cuda { namespace device
}
}

template <typename Ptr2D, typename T> __global__ void remap_relative(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, PtrStepSz<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;

if (x < dst.cols && y < dst.rows)
{
const float xcoo = x+mapx.ptr(y)[x];
const float ycoo = y+mapy.ptr(y)[x];

dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
}
}

template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
{
static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool)
static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool, bool isRelative)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;

Expand All @@ -81,14 +95,17 @@ namespace cv { namespace cuda { namespace device
BorderReader<PtrStep<T>, B<work_type>> brdSrc(src, brd);
Filter<BorderReader<PtrStep<T>, B<work_type>>> filter_src(brdSrc);

remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
if (isRelative)
remap_relative<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
else
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
cudaSafeCall( cudaGetLastError() );
}
};

template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, bool)
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, bool, bool isRelative)
{
CV_UNUSED(srcWhole);
CV_UNUSED(xoff);
Expand All @@ -102,7 +119,10 @@ namespace cv { namespace cuda { namespace device
BorderReader<PtrStep<T>, B<work_type>> brdSrc(src, brd);
Filter<BorderReader<PtrStep<T>, B<work_type>>> filter_src(brdSrc);

remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
if (isRelative)
remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
else
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
cudaSafeCall( cudaGetLastError() );

cudaSafeCall( cudaDeviceSynchronize() );
Expand All @@ -112,7 +132,7 @@ namespace cv { namespace cuda { namespace device
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStreamTex
{
static void call(PtrStepSz< T > src, PtrStepSz< T > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
PtrStepSz< T > dst, const float* borderValue, bool cc20)
PtrStepSz< T > dst, const float* borderValue, bool cc20, bool isRelative)
{
typedef typename TypeVec<float, VecTraits< T >::cn>::vec_type work_type;
dim3 block(32, cc20 ? 8 : 4);
Expand All @@ -123,15 +143,21 @@ namespace cv { namespace cuda { namespace device
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader<cudev::TexturePtr<T>, B<work_type>> brdSrc(texSrcWhole, brd);
Filter<BorderReader<cudev::TexturePtr<T>, B<work_type>>> filter_src(brdSrc);
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
if (isRelative)
remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
else
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);

}
else {
cudev::TextureOff<T> texSrcWhole(srcWhole, yoff, xoff);
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
BorderReader<cudev::TextureOffPtr<T>, B<work_type>> brdSrc(texSrcWhole, brd);
Filter<BorderReader<cudev::TextureOffPtr<T>, B<work_type>>> filter_src(brdSrc);
remap<<<grid, block >>>(filter_src, mapx, mapy, dst);
if (isRelative)
remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
else
remap<<<grid, block >>>(filter_src, mapx, mapy, dst);
}

cudaSafeCall( cudaGetLastError() );
Expand All @@ -142,23 +168,29 @@ namespace cv { namespace cuda { namespace device
template <template <typename> class Filter, typename T> struct RemapDispatcherNonStreamTex<Filter, BrdReplicate, T>
{
static void call(PtrStepSz< T > src, PtrStepSz< T > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
PtrStepSz< T > dst, const float*, bool)
PtrStepSz< T > dst, const float*, bool, bool isRelative)
{
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows)
{
cudev::Texture<T> texSrcWhole(srcWhole);
Filter<cudev::TexturePtr<T>> filter_src(texSrcWhole);
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
if (isRelative)
remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
else
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
}
else
{
cudev::TextureOff<T> texSrcWhole(srcWhole, yoff, xoff);
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader<cudev::TextureOffPtr<T>, BrdReplicate<T>> brdSrc(texSrcWhole, brd);
Filter<BorderReader<cudev::TextureOffPtr<T>, BrdReplicate<T>>> filter_src(brdSrc);
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
if (isRelative)
remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
else
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
}
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
Expand Down Expand Up @@ -203,20 +235,20 @@ namespace cv { namespace cuda { namespace device
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20)
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative)
{
if (stream == 0)
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc20);
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc20, isRelative);
else
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc20);
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc20, isRelative);
}
};

template <typename T> void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20);
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);

static const caller_t callers[3][5] =
{
Expand Down Expand Up @@ -244,24 +276,24 @@ namespace cv { namespace cuda { namespace device
};

callers[interpolation][borderMode](static_cast<PtrStepSz<T>>(src), static_cast<PtrStepSz<T>>(srcWhole), xoff, yoff, xmap, ymap,
static_cast<PtrStepSz<T>>(dst), borderValue, stream, cc20);
static_cast<PtrStepSz<T>>(dst), borderValue, stream, cc20, isRelative);
}

template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);

template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);

template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);

template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
} // namespace imgproc
}}} // namespace cv { namespace cuda { namespace cudev

Expand Down
9 changes: 6 additions & 3 deletions modules/cudawarping/src/remap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,19 @@ namespace cv { namespace cuda { namespace device
{
template <typename T>
void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst,
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
}
}}}

void cv::cuda::remap(InputArray _src, OutputArray _dst, InputArray _xmap, InputArray _ymap, int interpolation, int borderMode, Scalar borderValue, Stream& stream)
{
using namespace cv::cuda::device::imgproc;

const bool hasRelativeFlag = ((interpolation & WARP_RELATIVE_MAP) != 0);
interpolation &= ~WARP_RELATIVE_MAP;

typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
static const func_t funcs[6][4] =
{
{remap_gpu<uchar> , 0 /*remap_gpu<uchar2>*/ , remap_gpu<uchar3> , remap_gpu<uchar4> },
Expand Down Expand Up @@ -98,7 +101,7 @@ void cv::cuda::remap(InputArray _src, OutputArray _dst, InputArray _xmap, InputA
src.locateROI(wholeSize, ofs);

func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap,
dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20), hasRelativeFlag);
}

#endif // HAVE_CUDA
Loading
Loading