From 5bf7569f4918fd7f3863fad15b2976712a1f3d6d Mon Sep 17 00:00:00 2001 From: cudawarped <12133430+cudawarped@users.noreply.github.com> Date: Fri, 17 Nov 2023 12:53:41 +0200 Subject: [PATCH] cudaoptflow: fix FarnebackOpticalFlow internal stream synchronization when used with an external CUDA stream --- modules/cudaoptflow/src/farneback.cpp | 6 +++ modules/cudaoptflow/test/test_optflow.cpp | 60 +++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/modules/cudaoptflow/src/farneback.cpp b/modules/cudaoptflow/src/farneback.cpp index 7cc8373f72b..eb82d0c34e4 100644 --- a/modules/cudaoptflow/src/farneback.cpp +++ b/modules/cudaoptflow/src/farneback.cpp @@ -140,6 +140,7 @@ namespace int polyN_; double polySigma_; int flags_; + Event sourceStreamComplete; private: void prepareGaussian( @@ -317,7 +318,10 @@ namespace Stream streams[5]; if (stream) + { streams[0] = stream; + sourceStreamComplete.record(); + } Size size = frame0.size(); GpuMat prevFlowX, prevFlowY, curFlowX, curFlowY; @@ -336,6 +340,8 @@ namespace } frame0.convertTo(frames_[0], CV_32F, streams[0]); + if (stream) + streams[1].waitEvent(sourceStreamComplete); frame1.convertTo(frames_[1], CV_32F, streams[1]); if (fastPyramids_) diff --git a/modules/cudaoptflow/test/test_optflow.cpp b/modules/cudaoptflow/test/test_optflow.cpp index 214e6e48ffe..985143165df 100644 --- a/modules/cudaoptflow/test/test_optflow.cpp +++ b/modules/cudaoptflow/test/test_optflow.cpp @@ -355,6 +355,66 @@ INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, FarnebackOpticalFlow, testing::Combine( testing::Values(FarnebackOptFlowFlags(0), FarnebackOptFlowFlags(cv::OPTFLOW_FARNEBACK_GAUSSIAN)), testing::Values(UseInitFlow(false), UseInitFlow(true)))); + +PARAM_TEST_CASE(FarnebackOpticalFlowAsync, cv::cuda::DeviceInfo, PyrScale, PolyN, FarnebackOptFlowFlags) +{ + cv::cuda::DeviceInfo devInfo; + double pyrScale; + int polyN; + int flags; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + pyrScale = GET_PARAM(1); + polyN = GET_PARAM(2); + flags = GET_PARAM(3); + + cv::cuda::setDevice(devInfo.deviceID()); + } +}; + +CUDA_TEST_P(FarnebackOpticalFlowAsync, Accuracy) +{ + cv::Mat frame0Mat = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0Mat.empty()); + + cv::Mat frame1Mat = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1Mat.empty()); + + cv::Ptr farn = cv::cuda::FarnebackOpticalFlow::create(); + farn->setPyrScale(pyrScale); + farn->setPolyN(polyN); + farn->setPolySigma(1.1); + farn->setFlags(flags); + + Stream sourceStream; + HostMem dummyHost(4000, 4000, CV_8UC3), frame0(frame0Mat), frame1(frame1Mat); + GpuMat d_flow, dummyDevice(dummyHost.size(), dummyHost.type()), frame0Device(frame0.size(), frame0.type()), frame1Device(frame1.size(), frame1.type()); + + // initialize and warm up CUDA kernels to ensure this doesn't occur during the test + farn->calc(loadMat(frame0Mat), loadMat(frame1Mat), d_flow); + d_flow.setTo(0); + + frame0Device.upload(frame0, sourceStream); + // place extra work in sourceStream to test internal stream synchronization by delaying the upload of frame1 that stream, see https://github.com/opencv/opencv/issues/24540 + dummyDevice.upload(dummyHost, sourceStream); + frame1Device.upload(frame1, sourceStream); + farn->calc(frame0Device, frame1Device, d_flow, sourceStream); + + Mat flow; + cv::calcOpticalFlowFarneback( + frame0, frame1, flow, farn->getPyrScale(), farn->getNumLevels(), farn->getWinSize(), + farn->getNumIters(), farn->getPolyN(), farn->getPolySigma(), farn->getFlags()); + EXPECT_MAT_SIMILAR(flow, d_flow, 1e-4); +} + +INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, FarnebackOpticalFlowAsync, testing::Combine( + ALL_DEVICES, + testing::Values(PyrScale(0.3)), + testing::Values(PolyN(5)), + testing::Values(FarnebackOptFlowFlags(0)))); + ////////////////////////////////////////////////////// // OpticalFlowDual_TVL1