-
Notifications
You must be signed in to change notification settings - Fork 5.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add interleaved versions of phase/cartToPolar/polarToCart #3607
Merged
Merged
Changes from 6 commits
Commits
Show all changes
13 commits
Select commit
Hold shift + click to select a range
f19a582
add interleaved versions of phase/cartToPolar/polarToCart
chacha21 b330b6c
fixed compilation
chacha21 7e1435b
simplifications as suggested
chacha21 997927f
Merge branch 'cuda_phase_interleaved' of https://github.com/chacha21/…
chacha21 0552aed
more simplifications as suggested
chacha21 094d517
fixed bug
chacha21 19c772f
modifications as suggested
chacha21 9b4b9dd
Merge branch '4.x' into cuda_phase_interleaved
chacha21 63b1f29
Merge remote-tracking branch 'upstream/4.x' into cuda_phase_interleaved
chacha21 d4e341e
Merge remote-tracking branch 'upstream/4.x' into cuda_phase_interleaved
chacha21 65f75dc
disambiguification of tuple
chacha21 0b65a8b
new attempt to fix compiler error under Ubuntu
chacha21 cf284c8
new attempt to please both msvc and gcc
chacha21 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,11 +66,7 @@ void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& | |
|
||
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream); | ||
|
||
GpuMat_<float> xc(x.reshape(1)); | ||
GpuMat_<float> yc(y.reshape(1)); | ||
GpuMat_<float> magc(dst.reshape(1)); | ||
|
||
gridTransformBinary(xc, yc, magc, magnitude_func<float>(), stream); | ||
gridTransformBinary(globPtr<float>(x), globPtr<float>(y), globPtr<float>(dst), magnitude_func<float>(), stream); | ||
|
||
syncOutput(dst, _dst, stream); | ||
} | ||
|
@@ -85,11 +81,7 @@ void cv::cuda::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stre | |
|
||
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream); | ||
|
||
GpuMat_<float> xc(x.reshape(1)); | ||
GpuMat_<float> yc(y.reshape(1)); | ||
GpuMat_<float> magc(dst.reshape(1)); | ||
|
||
gridTransformBinary(xc, yc, magc, magnitude_sqr_func<float>(), stream); | ||
gridTransformBinary(globPtr<float>(x), globPtr<float>(y), globPtr<float>(dst), magnitude_sqr_func<float>(), stream); | ||
|
||
syncOutput(dst, _dst, stream); | ||
} | ||
|
@@ -104,14 +96,26 @@ void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleI | |
|
||
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream); | ||
|
||
GpuMat_<float> xc(x.reshape(1)); | ||
GpuMat_<float> yc(y.reshape(1)); | ||
GpuMat_<float> anglec(dst.reshape(1)); | ||
if (angleInDegrees) | ||
gridTransformBinary(globPtr<float>(x), globPtr<float>(y), globPtr<float>(dst), direction_func<float, true>(), stream); | ||
else | ||
gridTransformBinary(globPtr<float>(x), globPtr<float>(y), globPtr<float>(dst), direction_func<float, false>(), stream); | ||
|
||
syncOutput(dst, _dst, stream); | ||
} | ||
|
||
void cv::cuda::phase(InputArray _xy, OutputArray _dst, bool angleInDegrees, Stream& stream) | ||
{ | ||
GpuMat xy = getInputMat(_xy, stream); | ||
|
||
CV_Assert( xy.type() == CV_32FC2 ); | ||
|
||
GpuMat dst = getOutputMat(_dst, xy.size(), CV_32FC1, stream); | ||
|
||
if (angleInDegrees) | ||
gridTransformBinary(xc, yc, anglec, direction_func<float, true>(), stream); | ||
gridTransformUnary(globPtr<float2>(xy), globPtr<float>(dst), direction_interleaved_func<float2, true>(), stream); | ||
else | ||
gridTransformBinary(xc, yc, anglec, direction_func<float, false>(), stream); | ||
gridTransformUnary(globPtr<float2>(xy), globPtr<float>(dst), direction_interleaved_func<float2, false>(), stream); | ||
|
||
syncOutput(dst, _dst, stream); | ||
} | ||
|
@@ -155,6 +159,67 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu | |
syncOutput(angle, _angle, stream); | ||
} | ||
|
||
void cv::cuda::cartToPolar(InputArray _xy, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream) | ||
{ | ||
GpuMat xy = getInputMat(_xy, stream); | ||
|
||
CV_Assert( xy.type() == CV_32FC2 ); | ||
|
||
GpuMat mag = getOutputMat(_mag, xy.size(), CV_32FC1, stream); | ||
GpuMat angle = getOutputMat(_angle, xy.size(), CV_32FC1, stream); | ||
|
||
GpuMat_<float> magc(mag.reshape(1)); | ||
GpuMat_<float> anglec(angle.reshape(1)); | ||
|
||
if (angleInDegrees) | ||
{ | ||
gridTransformTuple(globPtr<float2>(xy), | ||
tie(magc, anglec), | ||
make_tuple( | ||
magnitude_interleaved_func<float2>(), | ||
direction_interleaved_func<float2, true>()), | ||
stream); | ||
} | ||
else | ||
{ | ||
gridTransformTuple(globPtr<float2>(xy), | ||
tie(magc, anglec), | ||
make_tuple( | ||
magnitude_interleaved_func<float2>(), | ||
direction_interleaved_func<float2, false>()), | ||
stream); | ||
} | ||
|
||
syncOutput(mag, _mag, stream); | ||
syncOutput(angle, _angle, stream); | ||
} | ||
|
||
void cv::cuda::cartToPolar(InputArray _xy, OutputArray _magAngle, bool angleInDegrees, Stream& stream) | ||
{ | ||
GpuMat xy = getInputMat(_xy, stream); | ||
|
||
CV_Assert( xy.type() == CV_32FC2 ); | ||
|
||
GpuMat magAngle = getOutputMat(_magAngle, xy.size(), CV_32FC2, stream); | ||
|
||
if (angleInDegrees) | ||
{ | ||
gridTransformUnary(globPtr<float2>(xy), | ||
globPtr<float2>(magAngle), | ||
magnitude_direction_interleaved_func<float2, true>(), | ||
stream); | ||
} | ||
else | ||
{ | ||
gridTransformUnary(globPtr<float2>(xy), | ||
globPtr<float2>(magAngle), | ||
magnitude_direction_interleaved_func<float2, false>(), | ||
stream); | ||
} | ||
|
||
syncOutput(magAngle, _magAngle, stream); | ||
} | ||
|
||
namespace | ||
{ | ||
template <typename T> struct sincos_op | ||
|
@@ -173,12 +238,12 @@ namespace | |
}; | ||
|
||
template <typename T, bool useMag> | ||
__global__ void polarToCartImpl_(const GlobPtr<T> mag, const GlobPtr<T> angle, GlobPtr<T> xmat, GlobPtr<T> ymat, const T scale, const int rows, const int cols) | ||
__global__ void polarToCartImpl_(const PtrStepSz<T> mag, const PtrStepSz<T> angle, PtrStepSz<T> xmat, PtrStepSz<T> ymat, const T scale) | ||
{ | ||
const int x = blockDim.x * blockIdx.x + threadIdx.x; | ||
const int y = blockDim.y * blockIdx.y + threadIdx.y; | ||
|
||
if (x >= cols || y >= rows) | ||
if (x >= angle.cols || y >= angle.rows) | ||
return; | ||
|
||
const T mag_val = useMag ? mag(y, x) : static_cast<T>(1.0); | ||
|
@@ -192,23 +257,90 @@ namespace | |
ymat(y, x) = mag_val * sin_a; | ||
} | ||
|
||
template <typename T, bool useMag> | ||
__global__ void polarToCartDstInterleavedImpl_(const PtrStepSz<T> mag, const PtrStepSz<T> angle, PtrStepSz<typename MakeVec<T, 2>::type > xymat, const T scale) | ||
{ | ||
typedef typename MakeVec<T, 2>::type T2; | ||
const int x = blockDim.x * blockIdx.x + threadIdx.x; | ||
const int y = blockDim.y * blockIdx.y + threadIdx.y; | ||
|
||
if (x >= xymat.cols || y >= xymat.rows) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Try to keep the out of range check consistent. I realize Then you only need to use |
||
return; | ||
|
||
const T mag_val = useMag ? mag(y, x) : static_cast<T>(1.0); | ||
const T angle_val = angle(y, x); | ||
|
||
T sin_a, cos_a; | ||
sincos_op<T> op; | ||
op(scale * angle_val, &sin_a, &cos_a); | ||
|
||
const T2 xy = {mag_val * cos_a, mag_val * sin_a}; | ||
xymat(y, x) = xy; | ||
} | ||
|
||
template <typename T> | ||
__global__ void polarToCartInterleavedImpl_(const PtrStepSz<typename MakeVec<T, 2>::type > magAngle, PtrStepSz<typename MakeVec<T, 2>::type > xymat, const T scale) | ||
{ | ||
typedef typename MakeVec<T, 2>::type T2; | ||
const int x = blockDim.x * blockIdx.x + threadIdx.x; | ||
const int y = blockDim.y * blockIdx.y + threadIdx.y; | ||
|
||
if (x >= magAngle.cols || y >= magAngle.rows) | ||
return; | ||
|
||
const T2 magAngle_val = magAngle(y, x); | ||
const T mag_val = magAngle_val.x; | ||
const T angle_val = magAngle_val.y; | ||
|
||
T sin_a, cos_a; | ||
sincos_op<T> op; | ||
op(scale * angle_val, &sin_a, &cos_a); | ||
|
||
const T2 xy = {mag_val * cos_a, mag_val * sin_a}; | ||
xymat(y, x) = xy; | ||
} | ||
|
||
template <typename T> | ||
void polarToCartImpl(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t& stream) | ||
{ | ||
GpuMat_<T> xc(x.reshape(1)); | ||
GpuMat_<T> yc(y.reshape(1)); | ||
GpuMat_<T> magc(mag.reshape(1)); | ||
GpuMat_<T> anglec(angle.reshape(1)); | ||
const dim3 block(32, 8); | ||
const dim3 grid(divUp(angle.cols, block.x), divUp(angle.rows, block.y)); | ||
|
||
const T scale = angleInDegrees ? static_cast<T>(CV_PI / 180.0) : static_cast<T>(1.0); | ||
|
||
if (mag.empty()) | ||
polarToCartImpl_<T, false> << <grid, block, 0, stream >> >(mag, angle, x, y, scale); | ||
else | ||
polarToCartImpl_<T, true> << <grid, block, 0, stream >> >(mag, angle, x, y, scale); | ||
} | ||
|
||
template <typename T> | ||
void polarToCartDstInterleavedImpl(const GpuMat& mag, const GpuMat& angle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream) | ||
{ | ||
typedef typename MakeVec<T, 2>::type T2; | ||
|
||
const dim3 block(32, 8); | ||
const dim3 grid(divUp(anglec.cols, block.x), divUp(anglec.rows, block.y)); | ||
const dim3 grid(divUp(angle.cols, block.x), divUp(angle.rows, block.y)); | ||
|
||
const T scale = angleInDegrees ? static_cast<T>(CV_PI / 180.0) : static_cast<T>(1.0); | ||
|
||
if (magc.empty()) | ||
polarToCartImpl_<T, false> << <grid, block, 0, stream >> >(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xc), shrinkPtr(yc), scale, anglec.rows, anglec.cols); | ||
if (mag.empty()) | ||
polarToCartDstInterleavedImpl_<T, false> << <grid, block, 0, stream >> >(mag, angle, xy, scale); | ||
else | ||
polarToCartImpl_<T, true> << <grid, block, 0, stream >> >(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xc), shrinkPtr(yc), scale, anglec.rows, anglec.cols); | ||
polarToCartDstInterleavedImpl_<T, true> << <grid, block, 0, stream >> >(mag, angle, xy, scale); | ||
} | ||
|
||
template <typename T> | ||
void polarToCartInterleavedImpl(const GpuMat& magAngle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream) | ||
{ | ||
typedef typename MakeVec<T, 2>::type T2; | ||
|
||
const dim3 block(32, 8); | ||
const dim3 grid(divUp(magAngle.cols, block.x), divUp(magAngle.rows, block.y)); | ||
|
||
const T scale = angleInDegrees ? static_cast<T>(CV_PI / 180.0) : static_cast<T>(1.0); | ||
|
||
polarToCartInterleavedImpl_<T> << <grid, block, 0, stream >> >(magAngle, xy, scale); | ||
} | ||
} | ||
|
||
|
@@ -237,4 +369,48 @@ void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, O | |
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); | ||
} | ||
|
||
void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _xy, bool angleInDegrees, Stream& _stream) | ||
{ | ||
typedef void(*func_t)(const GpuMat& mag, const GpuMat& angle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream); | ||
static const func_t funcs[7] = { 0, 0, 0, 0, 0, polarToCartDstInterleavedImpl<float>, polarToCartDstInterleavedImpl<double> }; | ||
|
||
GpuMat mag = getInputMat(_mag, _stream); | ||
GpuMat angle = getInputMat(_angle, _stream); | ||
|
||
CV_Assert(angle.depth() == CV_32F || angle.depth() == CV_64F); | ||
CV_Assert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) ); | ||
|
||
GpuMat xy = getOutputMat(_xy, angle.size(), CV_MAKETYPE(angle.depth(), 2), _stream); | ||
|
||
cudaStream_t stream = StreamAccessor::getStream(_stream); | ||
funcs[angle.depth()](mag, angle, xy, angleInDegrees, stream); | ||
CV_CUDEV_SAFE_CALL( cudaGetLastError() ); | ||
|
||
syncOutput(xy, _xy, _stream); | ||
|
||
if (stream == 0) | ||
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); | ||
} | ||
|
||
void cv::cuda::polarToCart(InputArray _magAngle, OutputArray _xy, bool angleInDegrees, Stream& _stream) | ||
{ | ||
typedef void(*func_t)(const GpuMat& magAngle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream); | ||
static const func_t funcs[7] = { 0, 0, 0, 0, 0, polarToCartInterleavedImpl<float>, polarToCartInterleavedImpl<double> }; | ||
|
||
GpuMat magAngle = getInputMat(_magAngle, _stream); | ||
|
||
CV_Assert(magAngle.type() == CV_32FC2 || magAngle.type() == CV_64FC2); | ||
|
||
GpuMat xy = getOutputMat(_xy, magAngle.size(), magAngle.type(), _stream); | ||
|
||
cudaStream_t stream = StreamAccessor::getStream(_stream); | ||
funcs[magAngle.depth()](magAngle, xy, angleInDegrees, stream); | ||
CV_CUDEV_SAFE_CALL( cudaGetLastError() ); | ||
|
||
syncOutput(xy, _xy, _stream); | ||
|
||
if (stream == 0) | ||
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); | ||
} | ||
|
||
#endif |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we remove the reshape completely, looking at it again it doesn't do anything? i.e.
GpuMat_ magc(mag);