diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..f459e72 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,50 @@ +cmake_minimum_required (VERSION 2.8) +project (fusibile) + +find_package(OpenCV REQUIRED ) +#find_package(Armadillo REQUIRED) +find_package(CUDA 6.0 REQUIRED ) # For Cuda Managed Memory and c++11 + +include_directories(${OpenCV_INCLUDE_DIRS}) +include_directories(.) + +#set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-O3 --use_fast_math --ptxas-options=-v -std=c++11 --compiler-options -Wall -gencode arch=compute_30,code=sm_30 -gencode arch=compute_52,code=sm_52) +set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-O3 --use_fast_math --ptxas-options=-v -std=c++11 --compiler-options -Wall -gencode arch=compute_52,code=sm_52) + +if(CMAKE_COMPILER_IS_GNUCXX) + add_definitions(-std=c++11) + #add_definitions(-march=native) +endif() + +find_package(OpenMP) +if (OPENMP_FOUND) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +endif() + +# for YouCompleteMe +set( CMAKE_EXPORT_COMPILE_COMMANDS 1 ) + +# For compilation ... +# Specify target & source files to compile it from +cuda_add_executable( + fusibile + cameraGeometryUtils.h + vector_operations.h + camera.h + globalstate.h + algorithmparameters.h + cameraparameters.h + linestate.h + groundTruthUtils.h + displayUtils.h + mathUtils.h + fileIoUtils.h + fusibile.cu + main.cpp + ) + +# For linking ... +# Specify target & libraries to link it with +target_link_libraries(fusibile + ${OpenCV_LIBS} + ) diff --git a/algorithmparameters.h b/algorithmparameters.h new file mode 100644 index 0000000..073f8d2 --- /dev/null +++ b/algorithmparameters.h @@ -0,0 +1,93 @@ +#pragma once +#include "managed.h" + +//cost function +enum { PM_COST = 0, CENSUS_TRANSFORM = 1, ADAPTIVE_CENSUS = 2, CENSUS_SELFSIMILARITY = 3, PM_SELFSIMILARITY = 4, ADCENSUS = 5, ADCENSUS_SELFSIMILARITY = 6, SPARSE_CENSUS = 7 }; + +//cost combination +enum { COMB_ALL = 0, COMB_BEST_N = 1, COMB_ANGLE = 2, COMB_GOOD = 3}; + + +class AlgorithmParameters : public Managed{ +public: + int algorithm; // algorithm cost type + float max_disparity; // maximal disparity value CUDA + float min_disparity; // minimum disparity value (default 0) CUDA + int box_hsize; // filter kernel width CUDA + int box_vsize; // filter kernel height CUDA + float tau_color; // PM_COST max. threshold for color CUDA + float tau_gradient; // PM_COST max. threshold for gradient CUDA + float alpha; // PM_COST weighting between color and gradient CUDA + float gamma; // parameter for weight function (used e.g. in PM_COST) CUDA + int border_value; // what value should pixel at extended border get (constant or replicate -1) + int iterations; // number of iterations + bool color_processing; // use color processing or not (otherwise just grayscale processing) + float dispTol; //PM Stereo: 1, PM Huber: 0.5 + float normTol; // 0.1 ... about 5.7 degrees + float census_epsilon; //for census transform + int self_similarity_n; // number of pixels considered for self similarity + float cam_scale; //used to rescale K in case of rescaled image size + int num_img_processed; //number of images that are processed as reference images + float costThresh; // threshold to decide whether disparity/depth is valid or not + float good_factor; // for cost aggregation/combination good: factor for truncation CUDA + int n_best; // CUDA + int cost_comb; // CUDA + bool viewSelection; + float depthMin; // CUDA + float depthMax; // CUDA + // hack XXX + int cols; + int rows; + // fuse + bool storePlyFiles; + bool gt_compare; + bool gt_normal_compare; + + //threshold for consistency check + float depthThresh; + float normalThresh ; + + //how many views need to be consistent? (for update: numConsistentThresh+1) + int numConsistentThresh; + bool saveTexture; + + AlgorithmParameters(){ + algorithm = 0; // algorithm cost type + max_disparity = 256.0f; // maximal disparity value CUDA + min_disparity = 0.0f; // minimum disparity value (default 0) CUDA + box_hsize = 15; // filter kernel width CUDA + box_vsize = 15; // filter kernel height CUDA + tau_color = 10; // PM_COST max. threshold for color CUDA + tau_gradient = 2.0f; // PM_COST max. threshold for gradient CUDA + alpha = 0.9f; // PM_COST weighting between color and gradient CUDA + gamma = 10.0f; // parameter for weight function (used e.g. in PM_COST) CUDA + border_value = -1; // what value should pixel at extended border get (constant or replicate -1) + iterations = 8; // number of iterations + color_processing = false; // use color processing or not (otherwise just grayscale processing) + dispTol = 1.0; //PM Stereo: 1, PM Huber: 0.5 + normTol = 0.1f; // 0.1 ... about 5.7 degrees + census_epsilon = 2.5f; //for census transform + self_similarity_n = 50; // number of pixels considered for self similarity + cam_scale = 1.0f; //used to rescale K in case of rescaled image size + num_img_processed = 1; //number of images that are processed as reference images + costThresh = 40.0f; // threshold to decide whether disparity/depth is valid or not + good_factor = 1.5f; // for cost aggregation/combination good: factor for truncation CUDA + n_best = 2; // CUDA + cost_comb = 1; // CUDA + viewSelection = false; + depthMin = 2.0f; // CUDA + depthMax = 20.0f; // CUDA + + storePlyFiles = true; + gt_compare = false; + gt_normal_compare = false; + + //threshold for consistency check + depthThresh = 0.5f; + normalThresh = 0.52f; + + //how many views need to be consistent? (for update: numConsistentThresh+1) + numConsistentThresh = 2; + saveTexture = true; + } +}; diff --git a/camera.h b/camera.h new file mode 100644 index 0000000..7760a43 --- /dev/null +++ b/camera.h @@ -0,0 +1,52 @@ +#pragma once +#include "managed.h" +#include + +class Camera_cu : public Managed { +public: + float* P; + /*float* P_col3;*/ + float4 P_col34; + float* P_inv; + float* M_inv; + float* R; + /*float* t;*/ + float4 t4; + /*float* C;*/ + float4 C4; + float fx; + float fy; + float f; + float alpha; + float baseline; + bool reference; + float depthMin; //this could be figured out from the bounding volume (not done right now, but that's why this parameter is here as well and not only in AlgorithmParameters) + float depthMax; //this could be figured out from the bounding volume (not done right now, but that's why this parameter is here as well and not only in AlgorithmParameters) + //int id; //corresponds to the image name id (eg. 0-10), independent of order in argument list, just dependent on name + char* id; + float* K; + float* K_inv; + Camera_cu() + { + baseline = 0.54f; + reference = false; + depthMin = 2.0f; //this could be figured out from the bounding volume (not done right now, but that's why this parameter is here as well and not only in AlgorithmParameters) + depthMax = 20.0f; + cudaMallocManaged (&P, sizeof(float) * 4 * 4); + cudaMallocManaged (&P_inv, sizeof(float) * 4 * 4); + cudaMallocManaged (&M_inv, sizeof(float) * 4 * 4); + cudaMallocManaged (&K, sizeof(float) * 4 * 4); + cudaMallocManaged (&K_inv, sizeof(float) * 4 * 4); + cudaMallocManaged (&R, sizeof(float) * 4 * 4); + } + ~Camera_cu() + { + cudaFree (P); + cudaFree (P_inv); + cudaFree (M_inv); + cudaFree (K); + cudaFree (K_inv); + cudaFree (R); + } + +}; diff --git a/cameraGeometryUtils.h b/cameraGeometryUtils.h new file mode 100644 index 0000000..ef61bc4 --- /dev/null +++ b/cameraGeometryUtils.h @@ -0,0 +1,495 @@ +/* + * utility functions for camera geometry related stuff + * most of them from: "Multiple View Geometry in computer vision" by Hartley and Zisserman + */ + +#pragma once +#include "mathUtils.h" +#include +#include + +Mat_ getColSubMat ( Mat_ M, int* indices, int numCols ) { + Mat_ subMat = Mat::zeros ( M.rows,numCols,CV_32F ); + for ( int i = 0; i < numCols; i++ ) { + M.col ( indices[i] ).copyTo ( subMat.col ( i ) ); + } + return subMat; +} + +// Multi View Geometry, page 163 +Mat_ getCameraCenter ( Mat_ &P ) { + Mat_ C = Mat::zeros ( 4,1,CV_32F ); + + Mat_ M = Mat::zeros ( 3,3,CV_32F ); + + int xIndices[] = { 1, 2, 3 }; + int yIndices[] = { 0, 2, 3 }; + int zIndices[] = { 0, 1, 3 }; + int tIndices[] = { 0, 1, 2 }; + + // x coordinate + M = getColSubMat ( P,xIndices,sizeof ( xIndices )/sizeof ( xIndices[0] ) ); + C ( 0,0 ) = ( float )determinant ( M ); + + // y coordinate + M = getColSubMat ( P,yIndices,sizeof ( yIndices )/sizeof ( yIndices[0] ) ); + C ( 1,0 ) = - ( float )determinant ( M ); + + // z coordinate + M = getColSubMat ( P,zIndices,sizeof ( zIndices )/sizeof ( zIndices[0] ) ); + C ( 2,0 ) = ( float )determinant ( M ); + + // t coordinate + M = getColSubMat ( P,tIndices,sizeof ( tIndices )/sizeof ( tIndices[0] ) ); + C ( 3,0 ) = - ( float )determinant ( M ); + + return C; +} + +inline Vec3f get3Dpoint ( Camera &cam, float x, float y, float depth ) { + // in case camera matrix is not normalized: see page 162, then depth might not be the real depth but w and depth needs to be computed from that first + + Mat_ pt = Mat::ones ( 3,1,CV_32F ); + pt ( 0,0 ) = x; + pt ( 1,0 ) = y; + + //formula taken from page 162 (alternative expression) + Mat_ ptX = cam.M_inv * ( depth*pt - cam.P.col ( 3 ) ); + return Vec3f ( ptX ( 0 ),ptX ( 1 ),ptX ( 2 ) ); +} + +inline Vec3f get3Dpoint ( Camera &cam, int x, int y, float depth ){ + return get3Dpoint(cam,(float)x,(float)y,depth); +} + +// get the viewing ray for a pixel position of the camera +static inline Vec3f getViewVector ( Camera &cam, int x, int y, bool rectified ) { + + //get some point on the line (the other point on the line is the camera center) + Vec3f ptX = get3Dpoint ( cam,x,y,1.0f ); + + //get vector between camera center and other point on the line + Vec3f v = ptX - cam.C; + return normalize ( v ); +} + +//get d parameter of plane pi = [nT, d]T, which is the distance of the plane to the camera center +float inline getPlaneDistance ( Vec3f &normal, Vec3f &X ) { + /*return -normal ( 0 )*X ( 0 )-normal ( 1 )*X ( 1 )-normal ( 2 )*X ( 2 );*/ + return -(normal.dot(X)); +} + +static float getD ( Vec3f &normal, int x0, int y0, float depth, bool rectified, Camera &cam ) { + Vec3f pt; + { + pt = get3Dpoint ( cam, (float)x0, (float)y0, depth ); + } + /* XXX WTF ? + float d = getPlaneDistance ( normal,pt ); + if ( d != d ) { + d = FLT_MAX; + } + return d; + */ + return getPlaneDistance ( normal,pt ); +} + +Mat_ getTransformationMatrix ( Mat_ R, Mat_ t ) { + Mat_ transMat = Mat::eye ( 4,4, CV_32F ); + //Mat_ Rt = - R * t; + R.copyTo ( transMat ( Range ( 0,3 ),Range ( 0,3 ) ) ); + t.copyTo ( transMat ( Range ( 0,3 ),Range ( 3,4 ) ) ); + + return transMat; +} + +/* compute depth value from disparity or disparity value from depth + * Input: f - focal length in pixel + * baseline - baseline between cameras (in meters) + * d - either disparity or depth value + * Output: either depth or disparity value + */ +float disparityDepthConversion ( float f, float baseline, float d ) { + /*if ( d == 0 )*/ + /*return FLT_MAX;*/ + return f * baseline / d; +} + +Mat_ getTransformationReferenceToOrigin ( Mat_ R,Mat_ t ) { + // create rotation translation matrix + Mat_ transMat_original = getTransformationMatrix ( R,t ); + + // get transformation matrix for [R1|t1] = [I|0] + return transMat_original.inv (); +} + +void transformCamera ( Mat_ R,Mat_ t, Mat_ transform, Camera &cam, Mat_ K ) { + // create rotation translation matrix + Mat_ transMat_original = getTransformationMatrix ( R,t ); + + //transform + Mat_ transMat_t = transMat_original * transform; + + // compute translated P (only consider upper 3x4 matrix) + cam.P = K * transMat_t ( Range ( 0,3 ),Range ( 0,4 ) ); + // set R and t + cam.R = transMat_t ( Range ( 0,3 ),Range ( 0,3 ) ); + cam.t = transMat_t ( Range ( 0,3 ),Range ( 3,4 ) ); + // set camera center C + Mat_ C = getCameraCenter ( cam.P ); + + C = C / C ( 3,0 ); + cam.C = Vec3f ( C ( 0,0 ),C ( 1,0 ),C ( 2,0 ) ); +} + +Mat_ scaleK ( Mat_ K, float scaleFactor ) { + //compute focal length in mm (for APS-C sensor) + //float imgwidth_original = 3072.f; + //float imgheight_original = 2048.f; + //float ccdWidth = 22.7f; + //float ccdHeight = 15.1f; + //float f_mm = alpha_x * ccdWidth / imgwidth_original; + //float f_mm2 = alpha_y * ccdHeight / imgheight_original; + //alpha_x = f_mm / (ccdWidth / (imgwidth_original/scaleFactor)); + //alpha_y = f_mm2 / (ccdHeight / (imgheight_original/scaleFactor)); + //cout << "focal length: " << f_mm << "/" << f_mm2 << " , " << alpha_x << "/" << alpha_y << endl; + + Mat_ K_scaled = K.clone(); + //scale focal length + K_scaled ( 0,0 ) = K ( 0,0 ) / scaleFactor; + K_scaled ( 1,1 ) = K ( 1,1 ) / scaleFactor; + //scale center point + K_scaled ( 0,2 ) = K ( 0,2 ) / scaleFactor; + K_scaled ( 1,2 ) = K ( 1,2 ) / scaleFactor; + + return K_scaled; +} +void copyOpencvVecToFloat4 ( Vec3f &v, float4 *a) +{ + a->x = v(0); + a->y = v(1); + a->z = v(2); +} +void copyOpencvVecToFloatArray ( Vec3f &v, float *a) +{ + a[0] = v(0); + a[1] = v(1); + a[2] = v(2); +} +void copyOpencvMatToFloatArray ( Mat_ &m, float **a) +{ + + for (int pj=0; pj Rt_110 = Mat::eye ( 4, 4, CV_32F ); + //110 + // 0.9999 0.0043 0.0118 0.0008 + //-0.0044 1.0000 0.0027 0.0008 + //-0.0118 -0.0028 0.9999 -0.6181 + Rt_110(0,0) = 0.9999f; + Rt_110(0,1) = 0.0043f; + Rt_110(0,2) = 0.0118f; + Rt_110(0,3) = 0.0008f; + Rt_110(1,0) = -0.0044f; + Rt_110(1,1) = 1.0000f; + Rt_110(1,2) = 0.0027f; + Rt_110(1,3) = 0.0008f; + Rt_110(2,0) = -0.0118f; + Rt_110(2,1) = -0.0028f; + Rt_110(2,2) = 0.9999f; + Rt_110(2,3) =-0.6181f; + + params.cameras[2].P = params.cameras[0].P.clone(); + params.cameras[3].P = params.cameras[1].P.clone(); + + Mat_ K,R,T,C,t; + //left camera + decomposeProjectionMatrix ( params.cameras[2].P,K,R,T); + // get 3-dimensional translation vectors and camera center (divide by augmented component) + C = T ( Range ( 0,3 ),Range ( 0,1 ) ) / T ( 3,0 ); + t = -R * C; + transformCamera ( R,t,Rt_110,params.cameras[2],K ); + + //right camera + decomposeProjectionMatrix ( params.cameras[3].P,K,R,T); + // get 3-dimensional translation vectors and camera center (divide by augmented component) + C = T ( Range ( 0,3 ),Range ( 0,1 ) ) / T ( 3,0 ); + t = -R * C; + transformCamera ( R,t,Rt_110,params.cameras[3],K ); + + + cout << params.cameras[0].P < KMaros = Mat::eye ( 3, 3, CV_32F ); + KMaros(0,0) = 8066.0; + KMaros(1,1) = 8066.0; + KMaros(0,2) = 2807.5; + KMaros(1,2) = 1871.5; + + // Load pmvs files + if ( !inputFiles.pmvs_folder.empty () ) { + numCameras = inputFiles.img_filenames.size (); + params.cameras.resize ( numCameras ); + for ( size_t i = 0; i < numCameras; i++ ) { + int lastindex = inputFiles.img_filenames[i].find_last_of("."); + string filename_without_extension = inputFiles.img_filenames[i].substr(0, lastindex); + readPFileStrechaPmvs ( inputFiles.p_folder + filename_without_extension + ".txt",params.cameras[i].P ); + unsigned found = inputFiles.img_filenames[i].find_last_of ( "." ); + //params.cameras[i].id = atoi ( inputFiles.img_filenames[i].substr ( 0,found ).c_str () ); + params.cameras[i].id = inputFiles.img_filenames[i].substr ( 0,found ).c_str (); + // params.cameras[i].P = KMaros * params.cameras[i].P; + //cout << params.cameras[i].P << endl; + + } + } + //load projection matrix from file (e.g. for Strecha) + cout << "P folder is " << inputFiles.p_folder << endl; + if ( !inputFiles.p_folder.empty () ) { + numCameras = inputFiles.img_filenames.size (); + params.cameras.resize ( numCameras ); + for ( size_t i = 0; i < numCameras; i++ ) { + readPFileStrechaPmvs ( inputFiles.p_folder + inputFiles.img_filenames[i] + ".P",params.cameras[i].P ); + unsigned found = inputFiles.img_filenames[i].find_last_of ( "." ); + //params.cameras[i].id = atoi ( inputFiles.img_filenames[i].substr ( 0,found ).c_str () ); + params.cameras[i].id = inputFiles.img_filenames[i].substr ( 0,found ).c_str (); + /*params.cameras[i].P = KMaros * params.cameras[i].P;*/ + } + + } + //load projection matrix from file (e.g. for Middlebury) + if ( !inputFiles.krt_file.empty () ) { + numCameras = inputFiles.img_filenames.size (); + params.cameras.resize ( numCameras ); + /*cout << "Num Cameras " << numCameras << endl;*/ + size_t numImages = inputFiles.img_filenames.size (); + readKRtFileMiddlebury ( inputFiles.krt_file, params.cameras, inputFiles); + for ( size_t i = 0; i < numCameras; i++ ) { + unsigned found = inputFiles.img_filenames[i].find_last_of ( "." ); + params.cameras[i].id = inputFiles.img_filenames[i].substr ( 0,found ).c_str (); + } + } + + /*cout << "KMaros is" << endl;*/ + /*cout << KMaros << endl;*/ + cout << "numCameras is " << numCameras << endl; + + + // decompose projection matrices into K, R and t + vector > K ( numCameras ); + vector > R ( numCameras ); + vector > T ( numCameras ); + + vector > C ( numCameras ); + vector > t ( numCameras ); + + for ( int i = 0; i < numCameras; i++ ) { + decomposeProjectionMatrix ( params.cameras[i].P,K[i],R[i],T[i] ); + + /*cout << "K: " << K[i] << endl;*/ + /*cout << "R: " << R[i] << endl;*/ + /*cout << "T: " << T[i] << endl;*/ + + // get 3-dimensional translation vectors and camera center (divide by augmented component) + C[i] = T[i] ( Range ( 0,3 ),Range ( 0,1 ) ) / T[i] ( 3,0 ); + t[i] = -R[i] * C[i]; + + /*cout << "C: " << C[i] << endl;*/ + /*cout << "t: " << t[i] << endl;*/ + } + + // transform projection matrices (R and t part) so that P1 = K [I | 0] + //computeTranslatedProjectionMatrices(R1, R2, t1, t2, params); + Mat_ transform = Mat::eye ( 4,4,CV_32F ); + + if ( transformP ) + transform = getTransformationReferenceToOrigin ( R[0],t[0] ); + /*cout << "transform is " << transform << endl;*/ + params.cameras[0].reference = true; + params.idRef = 0; + //cout << "K before scale is" << endl; + //cout << K[0] << endl; + /*K[0](0,1)=0;*/ + /*K[0](2,2)=1;*/ + + //assuming K is the same for all cameras + params.K = scaleK ( K[0],scaleFactor ); + params.K_inv = params.K.inv (); + // get focal length from calibration matrix + params.f = params.K ( 0,0 ); + + for ( int i = 0; i < numCameras; i++ ) { + params.cameras[i].K = scaleK(K[i],scaleFactor); + params.cameras[i].K_inv = params.cameras[i].K.inv ( ); + //params.cameras[i].f = params.cameras[i].K(0,0); + + + params.cameras[i].depthMin = depthMin; + params.cameras[i].depthMax = depthMax; + + if ( !inputFiles.bounding_folder.empty () ) { + Vec3f ptBL, ptTR; + readBoundingVolume ( inputFiles.bounding_folder + inputFiles.img_filenames[i] + ".bounding",ptBL,ptTR ); + + //cout << "d1: " << getDepth ( ptBL,params.cameras[i].P ) < blMat = Mat::zeros(4,1,CV_32F); + blMat(0,0) = ptBL(0); + blMat(1,0) = ptBL(1); + blMat(2,0) = ptBL(2); + cout << "transformed BL: " << transform * blMat << endl; + + Mat_ trMat = Mat::zeros(4,1,CV_32F); + trMat(0,0) = ptTR(0); + trMat(1,0) = ptTR(1); + trMat(2,0) = ptTR(2); + cout << "transformed TR: " << transform * trMat << endl; + */ + } + + transformCamera ( R[i],t[i], transform, params.cameras[i],params.K ); + + params.cameras[i].P_inv = params.cameras[i].P.inv ( DECOMP_SVD ); + params.cameras[i].M_inv = params.cameras[i].P.colRange ( 0,3 ).inv (); + + // set camera baseline (if unknown we need to guess something) + //float b = (float)norm(t1,t2,NORM_L2); + params.cameras[i].baseline = 0.54f; //0.54 = Kitti baseline + + // K + Mat_ tmpK = params.K.t (); + //copyOpencvMatToFloatArray ( params.K, &cpc.K); + //copyOpencvMatToFloatArray ( params.K_inv, &cpc.K_inv); + copyOpencvMatToFloatArray ( params.cameras[i].K, &cpc.cameras[i].K); + copyOpencvMatToFloatArray ( params.cameras[i].K_inv, &cpc.cameras[i].K_inv); + cpc.cameras[i].fy = params.K(1,1); + cpc.f = params.K(0,0); + cpc.cameras[i].f = params.K(0,0); + cpc.cameras[i].fx = params.K(0,0); + cpc.cameras[i].fy = params.K(1,1); + cpc.cameras[i].depthMin = params.cameras[i].depthMin; + cpc.cameras[i].depthMax = params.cameras[i].depthMax; + cpc.cameras[i].baseline = params.cameras[i].baseline; + cpc.cameras[i].reference = params.cameras[i].reference; + + /*printf("MATRIXXX\n");*/ + /*for (int pj=0; pj<3 ; pj++) {*/ + /*for (int pi=0; pi<3 ; pi++)*/ + /*cout << cpc.K[pi+pj*3] << " ";*/ + /*cout << endl;*/ + /*}*/ + + /*params.cameras[i].alpha = params.K ( 0,0 )/params.K(1,1);*/ + cpc.cameras[i].alpha = params.K ( 0,0 )/params.K(1,1); + // Copy data to cuda structure + copyOpencvMatToFloatArray ( params.cameras[i].P, &cpc.cameras[i].P); + copyOpencvMatToFloatArray ( params.cameras[i].P_inv, &cpc.cameras[i].P_inv); + copyOpencvMatToFloatArray ( params.cameras[i].M_inv, &cpc.cameras[i].M_inv); + //copyOpencvMatToFloatArray ( params.K, &cpc.cameras[i].K); + //copyOpencvMatToFloatArray ( params.K_inv, &cpc.cameras[i].K_inv); + copyOpencvMatToFloatArray ( params.cameras[i].K, &cpc.cameras[i].K); + copyOpencvMatToFloatArray ( params.cameras[i].K_inv, &cpc.cameras[i].K_inv); + copyOpencvMatToFloatArray ( params.cameras[i].R, &cpc.cameras[i].R); + /*copyOpencvMatToFloatArray ( params.cameras[i].t, &cpc.cameras[i].t);*/ + /*copyOpencvVecToFloatArray ( params.cameras[i].C, cpc.cameras[i].C);*/ + copyOpencvVecToFloat4 ( params.cameras[i].C, &cpc.cameras[i].C4); + cpc.cameras[i].t4.x = params.cameras[i].t(0); + cpc.cameras[i].t4.y = params.cameras[i].t(1); + cpc.cameras[i].t4.z = params.cameras[i].t(2); + Mat_ tmp = params.cameras[i].P.col(3); + /*cpc.cameras[i].P_col3[0] = tmp(0,0);*/ + /*cpc.cameras[i].P_col3[1] = tmp(1,0);*/ + /*cpc.cameras[i].P_col3[2] = tmp(2,0);*/ + cpc.cameras[i].P_col34.x = tmp(0,0); + cpc.cameras[i].P_col34.y = tmp(1,0); + cpc.cameras[i].P_col34.z = tmp(2,0); + + /*cout << "K for " << i << " is " << endl;*/ + /*for (int pj=0; pj<3 ; pj++) {*/ + /*for (int pi=0; pi<3 ; pi++)*/ + /*cout << cpc.K[pi+pj*3] << " ";*/ + /*cout << endl;*/ + /*}*/ + /*cout << "P for " << i << " is " << endl;*/ + /*for (int pj=0; pj<3 ; pj++) {*/ + /*for (int pi=0; pi<4 ; pi++)*/ + /*cout << cpc.cameras[i].P[pi+pj*4] << " ";*/ + /*cout << endl;*/ + /*}*/ + /*cout << "R for " << i << " is " << endl;*/ + /*for (int pj=0; pj<3 ; pj++) {*/ + /*for (int pi=0; pi<3 ; pi++)*/ + /*cout << cpc.cameras[i].R[pi+pj*3] << " ";*/ + /*cout << endl;*/ + /*}*/ + /*cout << "t for " << i << " is " << endl;*/ + /*printf("%f %f %f\n", */ + /*cpc.cameras[i].t4.x,*/ + /*cpc.cameras[i].t4.y,*/ + /*cpc.cameras[i].t4.z);*/ + /*cout << endl;*/ + + + Mat_ tmpKinv = params.K_inv.t (); + + /*cout << "Camera " << i << endl;*/ + /*cout << "P " << endl;*/ + /*cout << params.cameras[i].P_A << endl;*/ + /*cout << params.cameras[i].P << endl;*/ + + /*cout << "K " << endl;*/ + /*cout << params.K_A << endl;*/ + /*cout << params.K << endl;*/ + + /*cout << "R " << endl;*/ + /*cout << params.cameras[i].R_A << endl;*/ + /*cout << params.cameras[i].R << endl;*/ + + /*cout << "t " << endl;*/ + /*cout << params.cameras[i].t_A << endl;*/ + /*cout << params.cameras[i].t << endl;*/ + + /*cout << "C " << endl;*/ + /*cout << params.cameras[i].C_A << endl;*/ + /*cout << params.cameras[i].C << endl;*/ + /*cout << endl;*/ + } + //exit(1); + + return params; +} diff --git a/cameraparameters.h b/cameraparameters.h new file mode 100644 index 0000000..a5b785a --- /dev/null +++ b/cameraparameters.h @@ -0,0 +1,27 @@ +#pragma once + +#include "camera.h" +#include "managed.h" +#include "config.h" + +class __align__(128) CameraParameters_cu : public Managed { +public: + float f; // used only for dummy depth -> disparity conversion + bool rectified; + Camera_cu cameras[MAX_IMAGES]; + int idRef; + int cols; + int rows; + int* viewSelectionSubset; + int viewSelectionSubsetNumber; + CameraParameters_cu() + { + rectified = true; + idRef = 0; + cudaMallocManaged (&viewSelectionSubset, sizeof(int) * MAX_IMAGES); + } + ~CameraParameters_cu() + { + cudaFree (viewSelectionSubset); + } +}; diff --git a/config.h b/config.h new file mode 100644 index 0000000..845f3e6 --- /dev/null +++ b/config.h @@ -0,0 +1,280 @@ +#pragma once +#define MAX_IMAGES 64 + +#define BLOCKSIZE_W 11 + 1 // +1 for the gradient computation +#define BLOCKSIZE_H 11 + 1 // +1 for the gradient computation +#define WIN_RADIUS_W (BLOCKSIZE_W) / (2) +#define WIN_RADIUS_H (BLOCKSIZE_H) / (2) + +#define BLOCK_W 32 +#define BLOCK_H (BLOCK_W/2) +#define TILE_W BLOCK_W +#define TILE_H BLOCK_H * 2 +#define TILE_SIZE TILE_W * TILE_H +#define SHARED_SIZE_W (TILE_W + WIN_RADIUS_W * 2) +#define SHARED_SIZE_H (TILE_H + WIN_RADIUS_H * 2) +#define SHARED_SIZE (SHARED_SIZE_W * SHARED_SIZE_H) + +#define REFERENCE 0 +#define MAXCOST 1000.0f + +#define FORCEINLINE __forceinline__ +//#define FORCEINLINE + +#define pow2(x) ((x)*(x)) +#define pow3(x) ((x)*(x)*(x)) +#define pow4(x) ((x)*(x)*(x)*(x)) +#define get_pow2_norm(x,y) (pow2(x)+pow2(y)) +#define get_normf(x,y) (sqrtf((pow2(x)+pow2(y)))) +#define get_normf3(v) (sqrtf((pow2(v.x)+pow2(v.y) + pow2(v.z)))) +#define get_pow2_normf3(v) ((pow2(v.x)+pow2(v.y) + pow2(v.z))) + +// Vector operations +#define dot4(v0,v1) v0.x * v1.x + \ + v0.y * v1.y + \ + v0.z * v1.z +#define mul4(v,k) \ +v->x = v->x * k; \ +v->y = v->y * k; \ +v->z = v->z * k; + +#define vecdiv4(v,k) \ +v->x = v->x / k; \ +v->y = v->y / k; \ +v->z = v->z / k; + +#define vecdiv42(v,k) \ +v.x = v.x / k; \ +v.y = v.y / k; \ +v.z = v.z / k; + + +#define vecdiv(v,k) \ +v[0] = v[0] / k; \ +v[1] = v[1] / k; \ +v[2] = v[2] / k; + +#define get_pow2_normv3(x,y) (pow2(x)+pow2(y)) + +#define negate(v) mul(v,-1.0f) +#define negate4(v) v->x = -v->x; \ + v->y = -v->y; \ + v->z = -v->z; + +#define sub(v0,v1) v0.x = v0.x - v1.x; \ + v0.y = v0.y - v1.y; \ + v0.z = v0.z - v1.z; +#define subout(v0,v1,vout) vout.x = v0.x - v1.x; \ + vout.y = v0.y - v1.y; \ + vout.z = v0.z - v1.z; + +#define outer_product(v0,v1, out) \ +out[0] = v0[0] * v1[0]; \ +out[1] = v0[0] * v1[1]; \ +out[2] = v0[0] * v1[2]; \ +out[3] = v0[1] * v1[0]; \ +out[4] = v0[1] * v1[1]; \ +out[5] = v0[1] * v1[2]; \ +out[6] = v0[2] * v1[0]; \ +out[7] = v0[2] * v1[1]; \ +out[8] = v0[2] * v1[2]; + +#define outer_product4(v0,v1, out) \ +out[0] = v0.x * v1.x; \ +out[1] = v0.x * v1.y; \ +out[2] = v0.x * v1.z; \ +out[3] = v0.y * v1.x; \ +out[4] = v0.y * v1.y; \ +out[5] = v0.y * v1.z; \ +out[6] = v0.z * v1.x; \ +out[7] = v0.z * v1.y; \ +out[8] = v0.z * v1.z; +// Matrix operations +#define matadd(m,k) m[0] = m[0] + k; \ + m[1] = m[1] + k; \ + m[2] = m[2] + k; \ + m[3] = m[3] + k; \ + m[4] = m[4] + k; \ + m[5] = m[5] + k; \ + m[6] = m[6] + k; \ + m[7] = m[7] + k; \ + m[8] = m[8] + k; + +#define matsub(m,k) m[0] = m[0] - k; \ + m[1] = m[1] - k; \ + m[2] = m[2] - k; \ + m[3] = m[3] - k; \ + m[4] = m[4] - k; \ + m[5] = m[5] - k; \ + m[6] = m[6] - k; \ + m[7] = m[7] - k; \ + m[8] = m[8] - k; + +#define matmatsub(m0, m1) \ +m0[0] = m0[0] - m1[0]; \ +m0[1] = m0[1] - m1[1]; \ +m0[2] = m0[2] - m1[2]; \ +m0[3] = m0[3] - m1[3]; \ +m0[4] = m0[4] - m1[4]; \ +m0[5] = m0[5] - m1[5]; \ +m0[6] = m0[6] - m1[6]; \ +m0[7] = m0[7] - m1[7]; \ +m0[8] = m0[8] - m1[8]; + +#define matmatsub2(m0, m1) \ +m1[0] = m0[0] - m1[0]; \ +m1[1] = m0[1] - m1[1]; \ +m1[2] = m0[2] - m1[2]; \ +m1[3] = m0[3] - m1[3]; \ +m1[4] = m0[4] - m1[4]; \ +m1[5] = m0[5] - m1[5]; \ +m1[6] = m0[6] - m1[6]; \ +m1[7] = m0[7] - m1[7]; \ +m1[8] = m0[8] - m1[8]; + + +#define matdivide(m,k) \ +m[0] = m[0] / k; \ +m[1] = m[1] / k; \ +m[2] = m[2] / k; \ +m[3] = m[3] / k; \ +m[4] = m[4] / k; \ +m[5] = m[5] / k; \ +m[6] = m[6] / k; \ +m[7] = m[7] / k; \ +m[8] = m[8] / k; + +#define matvecmul4noz(m, v, out) \ +out->x = \ +m [0] * v.x + \ +m [1] * v.y + \ +m [2];\ +out->y = \ +m [3] * v.x + \ +m [4] * v.y + \ +m [5]; \ +out->z = \ +m [6] * v.x + \ +m [7] * v.y + \ +m [8]; +#define matvecmul4P(m, v, out) \ +out->x = \ +m [0] * v.x + \ +m [1] * v.y + \ +m [2] * v.z + \ +m [3]; \ +out->y = \ +m [4] * v.x + \ +m [5] * v.y + \ +m [6] * v.z + \ +m [7]; \ +out->z = \ +m [8] * v.x + \ +m [9] * v.y + \ +m [10] * v.z + \ +m [11]; + +#define matvecmul4(m, v, out) \ +out->x = \ +m [0] * v.x + \ +m [1] * v.y + \ +m [2] * v.z; \ +out->y = \ +m [3] * v.x + \ +m [4] * v.y + \ +m [5] * v.z; \ +out->z = \ +m [6] * v.x + \ +m [7] * v.y + \ +m [8] * v.z; +#define matvecmul4div(m, v, out,d) \ +out->x = \ +(m [0] * v.x + \ +m [1] * v.y + \ +m [2] * v.z)/d; \ +out->y = \ +(m [3] * v.x + \ +m [4] * v.y + \ +m [5] * v.z)/d; \ +out->z = \ +(m [6] * v.x + \ +m [7] * v.y + \ +m [8] * v.z)/d; + +#define matvecmul(m, v, out) \ +out[0] = \ +m [0] * v[0] + \ +m [1] * v[1] + \ +m [2] * v[2]; \ +out[1] = \ +m [3] * v[0] + \ +m [4] * v[1] + \ +m [5] * v[2]; \ +out[2] = \ +m [6] * v[0] + \ +m [7] * v[1] + \ +m [8] * v[2]; + +#define matmul_cu(m0, m1, out) \ +out[0] = \ +m0 [0] * m1[0] + \ +m0 [1] * m1[0+3] + \ +m0 [2] * m1[0+6]; \ +out[1] = \ +m0 [0] * m1[1] + \ +m0 [1] * m1[1+3] + \ +m0 [2] * m1[1+6]; \ +out[2] = \ +m0 [0] * m1[2] + \ +m0 [1] * m1[2+3] + \ +m0 [2] * m1[2+6]; \ +out[3] = \ +m0 [3] * m1[0] + \ +m0 [4] * m1[0+3] + \ +m0 [5] * m1[0+6]; \ +out[4] = \ +m0 [3] * m1[1] + \ +m0 [4] * m1[1+3] + \ +m0 [5] * m1[1+6]; \ +out[5] = \ +m0 [3] * m1[2] + \ +m0 [4] * m1[2+3] + \ +m0 [5] * m1[2+6]; \ +out[6] = \ +m0 [6] * m1[0] + \ +m0 [7] * m1[0+3] + \ +m0 [8] * m1[0+6]; \ +out[7] = \ +m0 [6] * m1[1] + \ +m0 [7] * m1[1+3] + \ +m0 [8] * m1[1+6]; \ +out[8] = \ +m0 [6] * m1[2] + \ +m0 [7] * m1[2+3] + \ +m0 [8] * m1[2+6]; + +// texture utils + +#define texat(tex,x, y) \ + tex2D(tex, x+0.5f, y+0.5f) +#define texatf4(tex,x, y) \ + tex2D(tex, x+0.5f, y+0.5f) + +#define texatpt(tex,pt) \ +texat(tex, pt[0], pt[1]) +#define texatpt4(tex,pt) \ +texat(tex, pt.x, pt.y) +#define texatptf4(tex,pt) \ +texatf4(tex, pt.x, pt.y) + +#define colorDifferenceL1_macro(x,y) \ +fabsf(x-y) + +#define print_vector(v,string) \ +printf("%s is \tnp.array([%e, %e ,%e])\n", string, v[0],v[1],v[2]); + +#define print_vector4(v,string) \ +printf("%s is \tnp.array([%f, %f ,%f])\n", string, v.x,v.y,v.z); + +#define print_matrix(m,string) \ +printf("%s is [[%.4e, %.4e, %.4e], \n\t [%.4e, %.4e, %.4e], \n\t [%.4e, %.4e, %.4e]]\n", string, m[0],m[1],m[2],m[3],m[4],m[5],m[6],m[7],m[8]); diff --git a/displayUtils.h b/displayUtils.h new file mode 100644 index 0000000..e0d00e2 --- /dev/null +++ b/displayUtils.h @@ -0,0 +1,441 @@ +/* + * utility functions for visualization of results (disparity in color, warped output, ...) + */ + +#pragma once +#include +#include + +#include "point_cloud.h" +#include "point_cloud_list.h" + +/* compute gamma correction (just for display purposes to see more details in farther away areas of disparity image) + * Input: img - image + * gamma - gamma value + * Output: gamma corrected image + */ +Mat correctGamma( Mat& img, double gamma ) { + double inverse_gamma = 1.0 / gamma; + + Mat lut_matrix(1, 256, CV_8UC1 ); + uchar * ptr = lut_matrix.ptr(); + for( int i = 0; i < 256; i++ ) + ptr[i] = (int)( pow( (double) i / 255.0, inverse_gamma ) * 255.0 ); + + Mat result; + LUT( img, lut_matrix, result ); + + return result; +}; + + +static void getDisparityForDisplay(const Mat_ &disp, Mat &dispGray, Mat &dispColor, float numDisparities, float minDisp = 0.0f){ + float gamma = 2.0f; // to get higher contrast for lower disparity range (just for color visualization) + disp.convertTo(dispGray,CV_16U,65535.f/(numDisparities-minDisp),-minDisp*65535.f/(numDisparities-minDisp)); + Mat disp8; + disp.convertTo(disp8,CV_8U,255.f/(numDisparities-minDisp),-minDisp*255.f/(numDisparities-minDisp)); + if(minDisp == 0.0f) + disp8 = correctGamma(disp8,gamma); + applyColorMap(disp8, dispColor, COLORMAP_JET); + for(int y = 0; y < dispColor.rows; y++){ + for(int x = 0; x < dispColor.cols; x++){ + if(disp(y,x) <= 0.0f) + dispColor.at(y,x) = Vec3b(0,0,0); + } + } +}; + +static void convertDisparityDepthImage(const Mat_ &dispL, Mat_ &d, float f, float baseline){ + d = Mat::zeros(dispL.rows, dispL.cols, CV_32F); + for(int y = 0; y < dispL.rows; y++){ + for(int x = 0; x < dispL.cols; x++){ + d(y,x) = disparityDepthConversion(f,baseline,dispL(y,x)); + } + } +}; + +static string getColorString(uint8_t color){ + stringstream ss; + ss << (int)color << " " << (int)color << " " << (int)color; + return ss.str(); +} + + +static string getColorString(Vec3b color){ + stringstream ss; + ss << (int)color(2) << " " << (int)color(1) << " " << (int)color(0); + return ss.str(); +} + +static string getColorString(Vec3i color){ + stringstream ss; + ss << (int)((float)color(2)/256.f) << " " << (int)((float)color(1)/256.f) << " " << (int)((float)color(0)/256.f); + return ss.str(); +} +static void storePlyFileBinaryPointCloud (char* plyFilePath, PointCloudList &pc, Camera cam, Mat_ &distImg) { + cout << "store 3D points to ply file" << endl; + + FILE *outputPly; + outputPly=fopen(plyFilePath,"wb"); + + /*write header*/ + fprintf(outputPly, "ply\n"); + fprintf(outputPly, "format binary_little_endian 1.0\n"); + fprintf(outputPly, "element vertex %d\n",pc.size); + fprintf(outputPly, "property float x\n"); + fprintf(outputPly, "property float y\n"); + fprintf(outputPly, "property float z\n"); + fprintf(outputPly, "property float nx\n"); + fprintf(outputPly, "property float ny\n"); + fprintf(outputPly, "property float nz\n"); + fprintf(outputPly, "property uchar red\n"); + fprintf(outputPly, "property uchar green\n"); + fprintf(outputPly, "property uchar blue\n"); + fprintf(outputPly, "end_header\n"); + + distImg = Mat::zeros(pc.rows,pc.cols,CV_32F); + + //write data +#pragma omp parallel for + for(int i = 0; i < pc.size; i++) { + const Point_li &p = pc.points[i]; + const float4 normal = p.normal; + float4 X = p.coord; + const char color = (int)p.texture; + /*const int color = 127.0f;*/ + /*printf("Writing point %f %f %f\n", X.x, X.y, X.z);*/ + + if(!(X.x < FLT_MAX && X.x > -FLT_MAX) || !(X.y < FLT_MAX && X.y > -FLT_MAX) || !(X.z < FLT_MAX && X.z >= -FLT_MAX)){ + X.x = 0.0f; + X.y = 0.0f; + X.z = 0.0f; + } +#pragma omp critical + { + /*myfile << X.x << " " << X.y << " " << X.z << " " << normal.x << " " << normal.y << " " << normal.z << " " << color << " " << color << " " << color << endl;*/ + fwrite(&X.x, sizeof(X.x), 1, outputPly); + fwrite(&X.y, sizeof(X.y), 1, outputPly); + fwrite(&X.z, sizeof(X.z), 1, outputPly); + fwrite(&normal.x, sizeof(normal.x), 1, outputPly); + fwrite(&normal.y, sizeof(normal.y), 1, outputPly); + fwrite(&normal.z, sizeof(normal.z), 1, outputPly); + fwrite(&color, sizeof(char), 1, outputPly); + fwrite(&color, sizeof(char), 1, outputPly); + fwrite(&color, sizeof(char), 1, outputPly); + } + + } + fclose(outputPly); +} +static void storeXYZPointCloud (char* plyFilePath, PointCloudList &pc, Camera cam, Mat_ &distImg) { + cout << "store 3D points to ply file" << endl; + + ofstream myfile; + myfile.open (plyFilePath, ios::out); + + //write data +#pragma omp parallel for + for(int i = 0; i < pc.size; i++) { + const Point_li &p = pc.points[i]; + const float4 normal = p.normal; + float4 X = p.coord; + + if(!(X.x < FLT_MAX && X.x > -FLT_MAX) || !(X.y < FLT_MAX && X.y > -FLT_MAX) || !(X.z < FLT_MAX && X.z >= -FLT_MAX)){ + X.x = 0.0f; + X.y = 0.0f; + X.z = 0.0f; + } +#pragma omp critical + { + myfile << X.x << " " << X.y << " " << X.z << " " << normal.x << " " << normal.y << " " << normal.z << endl; + } + + } + myfile.close(); +} +static void storePlyFileAsciiPointCloud (char* plyFilePath, PointCloudList &pc, Camera cam, Mat_ &distImg) { + cout << "store 3D points to ply file" << endl; + + /*FILE *outputPly;*/ + /*outputPly=fopen(plyFilePath,"wb");*/ + + /*write header*/ + /*fprintf(outputPly, "ply\n");*/ + /*fprintf(outputPly, "format binary_little_endian 1.0\n");*/ + /*fprintf(outputPly, "element vertex %d\n",count);*/ + /*fprintf(outputPly, "property float x\n");*/ + /*fprintf(outputPly, "property float y\n");*/ + /*fprintf(outputPly, "property float z\n");*/ + /*fprintf(outputPly, "property float nx\n");*/ + /*fprintf(outputPly, "property float ny\n");*/ + /*fprintf(outputPly, "property float nz\n");*/ + /*fprintf(outputPly, "property uchar red\n");*/ + /*fprintf(outputPly, "property uchar green\n");*/ + /*fprintf(outputPly, "property uchar blue\n");*/ + /*fprintf(outputPly, "end_header\n");*/ + + ofstream myfile; + myfile.open (plyFilePath, ios::out); + + //write header + myfile << "ply" << endl; + myfile << "format ascii 1.0" << endl; + myfile << "element vertex " << pc.size << endl; + myfile << "property float x" << endl; + myfile << "property float y" << endl; + myfile << "property float z" << endl; + myfile << "property float nx" << endl; + myfile << "property float ny" << endl; + myfile << "property float nz" << endl; + myfile << "property uchar red" << endl; + myfile << "property uchar green" << endl; + myfile << "property uchar blue" << endl; + myfile << "end_header" << endl; + + + distImg = Mat::zeros(pc.rows,pc.cols,CV_32F); + + //write data + #pragma omp parallel for + for(int i = 0; i < pc.size; i++) { + const Point_li &p = pc.points[i]; + const float4 normal = p.normal; + float4 X = p.coord; + const int color = (int)p.texture; + /*const int color = 127.0f;*/ + /*printf("Writing point %f %f %f\n", X.x, X.y, X.z);*/ + + if(!(X.x < FLT_MAX && X.x > -FLT_MAX) || !(X.y < FLT_MAX && X.y > -FLT_MAX) || !(X.z < FLT_MAX && X.z >= -FLT_MAX)){ + X.x = 0.0f; + X.y = 0.0f; + X.z = 0.0f; + } +#pragma omp critical + { + myfile << X.x << " " << X.y << " " << X.z << " " << normal.x << " " << normal.y << " " << normal.z << " " << color << " " << color << " " << color << endl; + /*fwrite(&X.x, sizeof(float), 1, outputPly);*/ + /*fwrite(&X.y, sizeof(float), 1, outputPly);*/ + /*fwrite(&X.z, sizeof(float), 1, outputPly);*/ + /*fwrite(&normal.x, sizeof(float), 1, outputPly);*/ + /*fwrite(&normal.y, sizeof(float), 1, outputPly);*/ + /*fwrite(&normal.z, sizeof(float), 1, outputPly);*/ + /*fwrite(&color, sizeof(float), 1, outputPly);*/ + /*fwrite(&color, sizeof(float), 1, outputPly);*/ + /*fwrite(&color, sizeof(float), 1, outputPly);*/ + } + + } + myfile.close(); +/*fclose(outputPly);*/ +} +//template +static void storePlyFileBinaryPointCloud(char* plyFilePath, PointCloud &pc, Camera cam, Mat_ &distImg) { + cout << "store 3D points to ply file" << endl; + + FILE *outputPly; + outputPly=fopen(plyFilePath,"wb"); + + /*write header*/ + fprintf(outputPly, "ply\n"); + fprintf(outputPly, "format binary_little_endian 1.0\n"); + fprintf(outputPly, "element vertex %d\n",pc.size); + fprintf(outputPly, "property float x\n"); + fprintf(outputPly, "property float y\n"); + fprintf(outputPly, "property float z\n"); + /*fprintf(outputPly, "property float nx\n");*/ + /*fprintf(outputPly, "property float ny\n");*/ + /*fprintf(outputPly, "property float nz\n");*/ + /*fprintf(outputPly, "property uchar red\n");*/ + /*fprintf(outputPly, "property uchar green\n");*/ + /*fprintf(outputPly, "property uchar blue\n");*/ + fprintf(outputPly, "end_header\n"); + + + distImg = Mat::zeros(pc.rows,pc.cols,CV_32F); + + //write data +/*#pragma omp parallel for*/ + for(int i = 0; i < pc.size; i++){ + const Point_cu &p = pc.points[i]; + const float4 normal = p.normal; + float4 X = p.coord; + /*printf("Writing point %f %f %f\n", X.x, X.y, X.z);*/ + /*float color = p.texture;*/ + const char color = 127.0f; + + if(!(X.x < FLT_MAX && X.x > -FLT_MAX) || !(X.y < FLT_MAX && X.y > -FLT_MAX) || !(X.z < FLT_MAX && X.z >= -FLT_MAX)){ + X.x = 0.0f; + X.y = 0.0f; + X.z = 0.0f; + } +/*#pragma omp critical*/ + { + /*myfile << X.x << " " << X.y << " " << X.z << " " << normal.x << " " << normal.y << " " << normal.z << " " << color << " " << color << " " << color << endl;*/ + fwrite(&(X.x), sizeof(float), 1, outputPly); + fwrite(&(X.y), sizeof(float), 1, outputPly); + fwrite(&(X.z), sizeof(float), 1, outputPly); + /*fwrite(&(normal.x), sizeof(float), 1, outputPly);*/ + /*fwrite(&(normal.y), sizeof(float), 1, outputPly);*/ + /*fwrite(&(normal.z), sizeof(float), 1, outputPly);*/ + /*fwrite(&color, sizeof(char), 1, outputPly);*/ + /*fwrite(&color, sizeof(char), 1, outputPly);*/ + /*fwrite(&color, sizeof(char), 1, outputPly);*/ + } + + /*distImg(y,x) = sqrt(pow(X.x-cam.C(0),2)+pow(X.y-cam.C(1),2)+pow(X.z-cam.C(2),2));*/ + } + + /*myfile.close();*/ + fclose(outputPly); +} +template +static void storePlyFileBinary(char* plyFilePath, const Mat_ &depthImg, const Mat_ &normals, const Mat_ img, Camera cam, Mat_ &distImg){ + cout << "store 3D points to ply file" << endl; + + FILE *outputPly; + outputPly=fopen(plyFilePath,"wb"); + + //write header + fprintf(outputPly, "ply\n"); + fprintf(outputPly, "format binary_little_endian 1.0\n"); + fprintf(outputPly, "element vertex %d\n",depthImg.rows * depthImg.cols); + fprintf(outputPly, "property float x\n"); + fprintf(outputPly, "property float y\n"); + fprintf(outputPly, "property float z\n"); + fprintf(outputPly, "property float nx\n"); + fprintf(outputPly, "property float ny\n"); + fprintf(outputPly, "property float nz\n"); + fprintf(outputPly, "property uchar red\n"); + fprintf(outputPly, "property uchar green\n"); + fprintf(outputPly, "property uchar blue\n"); + fprintf(outputPly, "end_header\n"); + + distImg = Mat::zeros(depthImg.rows,depthImg.cols,CV_32F); + + //write data + #pragma omp parallel for + for(int x = 0; x < depthImg.cols; x++){ + for(int y = 0; y < depthImg.rows; y++){ + Vec3f n = normals(y,x); + ImgType color = img(y,x); + + Vec3f ptX = get3Dpoint(cam,x,y,depthImg(y,x)); + + if(!(ptX(0) < FLT_MAX && ptX(0) > -FLT_MAX) || !(ptX(1) < FLT_MAX && ptX(12) > -FLT_MAX) || !(ptX(2) < FLT_MAX && ptX(2) >= -FLT_MAX)){ + ptX(0) = 0.0f; + ptX(1) = 0.0f; + ptX(2) = 0.0f; + } + #pragma omp critical + { + //myfile << ptX(0) << " " << ptX(1) << " " << ptX(2) << " " << n(0) << " " << n(1) << " " << n(2) << " " << getColorString(color) << endl; + fwrite(&(ptX(0)), sizeof(float), 3, outputPly); + fwrite(&(n(0)) , sizeof(float), 3, outputPly); + fwrite(&color, sizeof(color) , 1, outputPly); + fwrite(&color, sizeof(color) , 1, outputPly); + fwrite(&color, sizeof(color) , 1, outputPly); + } + + distImg(y,x) = sqrt(pow(ptX(0)-cam.C(0),2)+pow(ptX(1)-cam.C(1),2)+pow(ptX(2)-cam.C(2),2)); + + //}else{ + // cout << ptX(0) << " " << ptX(1) << " " << ptX(2) << endl; + // cout << depthImg(y,x) << endl; + //} + + + //P * + //cout << xValue << " " << yValue << " " << zValue << " / " << + } + } + +fclose(outputPly); +} + +template +static void storePlyFile(char* plyFilePath, const Mat_ &depthImg, const Mat_ &normals, const Mat_ img, Camera cam, Mat_ &distImg){ + cout << "store 3D points to ply file" << endl; + ofstream myfile; + myfile.open (plyFilePath, ios::out); + + //write header + myfile << "ply" << endl; + myfile << "format ascii 1.0" << endl; + myfile << "element vertex " << depthImg.rows * depthImg.cols << endl; + myfile << "property float x" << endl; + myfile << "property float y" << endl; + myfile << "property float z" << endl; + myfile << "property float nx" << endl; + myfile << "property float ny" << endl; + myfile << "property float nz" << endl; + myfile << "property uchar red" << endl; + myfile << "property uchar green" << endl; + myfile << "property uchar blue" << endl; + myfile << "end_header" << endl; + + distImg = Mat::zeros(depthImg.rows,depthImg.cols,CV_32F); + + //write data + //#pragma omp parallel for + for(int x = 0; x < depthImg.cols; x++){ + for(int y = 0; y < depthImg.rows; y++){ + /* + float zValue = depthImg(x,y); + float xValue = ((float)x-cx)*zValue/camParams.f; + float yValue = ((float)y-cy)*zValue/camParams.f; + myfile << xValue << " " << yValue << " " << zValue << endl; + */ + + //Mat_ pt = Mat::ones(3,1,CV_32F); + //pt(0,0) = (float)x; + //pt(1,0) = (float)y; + + Vec3f n = normals(y,x); + ImgType color = img(y,x); + + //Mat_ ptX = P1_inv * depthImg(y,x)*pt; + + //if(depthImg(y,x) <= 0.0001f) + // continue; + + + Vec3f ptX = get3Dpoint(cam,x,y,depthImg(y,x)); + + //Vec3f ptX_v1 = get3dPointFromPlane(cam.P_inv,cam.C,n,planes.d(y,x),x,y); + //cout << ptX_v1 << " / " << ptX << endl; + + if(!(ptX(0) < FLT_MAX && ptX(0) > -FLT_MAX) || !(ptX(1) < FLT_MAX && ptX(12) > -FLT_MAX) || !(ptX(2) < FLT_MAX && ptX(2) >= -FLT_MAX)){ + ptX(0) = 0.0f; + ptX(1) = 0.0f; + ptX(2) = 0.0f; + } + //#pragma omp critical + { + myfile << ptX(0) << " " << ptX(1) << " " << ptX(2) << " " << n(0) << " " << n(1) << " " << n(2) << " " << getColorString(color) << endl; + } + + distImg(y,x) = sqrt(pow(ptX(0)-cam.C(0),2)+pow(ptX(1)-cam.C(1),2)+pow(ptX(2)-cam.C(2),2)); + + //}else{ + // cout << ptX(0) << " " << ptX(1) << " " << ptX(2) << endl; + // cout << depthImg(y,x) << endl; + //} + + + //P * + //cout << xValue << " " << yValue << " " << zValue << " / " << + } + } + + myfile.close(); +} + + + +static void getNormalsForDisplay(const Mat &normals, Mat &normals_display, int rtype = CV_16U){ + if(rtype == CV_8U) + normals.convertTo(normals_display,CV_8U,128,128); + else + normals.convertTo(normals_display,CV_16U,32767,32767); + cvtColor(normals_display,normals_display,COLOR_RGB2BGR); +}; diff --git a/exception.h b/exception.h new file mode 100644 index 0000000..adda4bc --- /dev/null +++ b/exception.h @@ -0,0 +1,151 @@ +/* +* Copyright 1993-2013 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +/* CUda UTility Library */ +#ifndef _EXCEPTION_H_ +#define _EXCEPTION_H_ + +// includes, system +#include +#include +#include +#include + +//! Exception wrapper. +//! @param Std_Exception Exception out of namespace std for easy typing. +template +class Exception : public Std_Exception +{ + public: + + //! @brief Static construction interface + //! @return Alwayss throws ( Located_Exception) + //! @param file file in which the Exception occurs + //! @param line line in which the Exception occurs + //! @param detailed details on the code fragment causing the Exception + static void throw_it(const char *file, + const int line, + const char *detailed = "-"); + + //! Static construction interface + //! @return Alwayss throws ( Located_Exception) + //! @param file file in which the Exception occurs + //! @param line line in which the Exception occurs + //! @param detailed details on the code fragment causing the Exception + static void throw_it(const char *file, + const int line, + const std::string &detailed); + + //! Destructor + virtual ~Exception() throw(); + + private: + + //! Constructor, default (private) + Exception(); + + //! Constructor, standard + //! @param str string returned by what() + Exception(const std::string &str); + +}; + +//////////////////////////////////////////////////////////////////////////////// +//! Exception handler function for arbitrary exceptions +//! @param ex exception to handle +//////////////////////////////////////////////////////////////////////////////// +template +inline void +handleException(const Exception_Typ &ex) +{ + std::cerr << ex.what() << std::endl; + + exit(EXIT_FAILURE); +} + +//! Convenience macros + +//! Exception caused by dynamic program behavior, e.g. file does not exist +#define RUNTIME_EXCEPTION( msg) \ + Exception::throw_it( __FILE__, __LINE__, msg) + +//! Logic exception in program, e.g. an assert failed +#define LOGIC_EXCEPTION( msg) \ + Exception::throw_it( __FILE__, __LINE__, msg) + +//! Out of range exception +#define RANGE_EXCEPTION( msg) \ + Exception::throw_it( __FILE__, __LINE__, msg) + +//////////////////////////////////////////////////////////////////////////////// +//! Implementation + +// includes, system +#include + +//////////////////////////////////////////////////////////////////////////////// +//! Static construction interface. +//! @param Exception causing code fragment (file and line) and detailed infos. +//////////////////////////////////////////////////////////////////////////////// +/*static*/ template +void +Exception:: +throw_it(const char *file, const int line, const char *detailed) +{ + std::stringstream s; + + // Quiet heavy-weight but exceptions are not for + // performance / release versions + s << "Exception in file '" << file << "' in line " << line << "\n" + << "Detailed description: " << detailed << "\n"; + + throw Exception(s.str()); +} + +//////////////////////////////////////////////////////////////////////////////// +//! Static construction interface. +//! @param Exception causing code fragment (file and line) and detailed infos. +//////////////////////////////////////////////////////////////////////////////// +/*static*/ template +void +Exception:: +throw_it(const char *file, const int line, const std::string &msg) +{ + throw_it(file, line, msg.c_str()); +} + +//////////////////////////////////////////////////////////////////////////////// +//! Constructor, default (private). +//////////////////////////////////////////////////////////////////////////////// +template +Exception::Exception() : + Std_Exception("Unknown Exception.\n") +{ } + +//////////////////////////////////////////////////////////////////////////////// +//! Constructor, standard (private). +//! String returned by what(). +//////////////////////////////////////////////////////////////////////////////// +template +Exception::Exception(const std::string &s) : + Std_Exception(s) +{ } + +//////////////////////////////////////////////////////////////////////////////// +//! Destructor +//////////////////////////////////////////////////////////////////////////////// +template +Exception::~Exception() throw() { } + +// functions, exported + +#endif // #ifndef _EXCEPTION_H_ + diff --git a/fileIoUtils.h b/fileIoUtils.h new file mode 100644 index 0000000..4ca68c6 --- /dev/null +++ b/fileIoUtils.h @@ -0,0 +1,441 @@ +/* + * utility functions for reading and writing files + */ + +#pragma once + +#include +#include + +static void getProjectionMatrix(char* line, Mat_ &P){ + const char* p; + int idx = 0; + for (p = strtok( line, " " ); p; p = strtok( NULL, " " )) + { + if(p[0]=='P' || p[0]=='p') + continue; + + //float val = stof(p); + float val = (float)atof(p); + /*cout << val << endl;*/ + P(idx/4,idx%4)=val; + + idx++; + } +}; + +static int read3Dpoint(char* line, Vec3f &pt){ + const char* p; + int idx = 0; + for (p = strtok( line, " " ); p; p = strtok( NULL, " " )) + { + if(idx > 2) + return -1; + float val = (float)atof(p); + pt[idx] = val; + + idx++; + } + if(idx<2) + return -1; + return 0; +} + +static void readCalibFileKitti(const string calib_filename, Mat_ &P1, Mat_ &P2){ + ifstream myfile; + myfile.open(calib_filename.c_str(),ifstream::in); + //get first line (containing P0) + char line[512]; + myfile.getline(line,512); + getProjectionMatrix(line,P1); + myfile.getline(line,512); + getProjectionMatrix(line,P2); + myfile.close(); +}; + +static void readBoundingVolume(const string filename, Vec3f &ptBL, Vec3f & ptTR){ + ifstream myfile; + myfile.open(filename.c_str(),ifstream::in); + char line[512]; + //bottom left point + myfile.getline(line,512); + read3Dpoint(line,ptBL); + //top right point + myfile.getline(line,512); + read3Dpoint(line,ptTR); + + myfile.close(); +}; + + +static void readCameraFileStrecha(const string camera_filename, float &focalLength){ + // only interested in focal length, but possible to get also other internal and external camera parameters + // focal length is stored in pixel format as alpha_x and alphy_y, only using alpha_x (which is the very first parameter of the internal camera matrix) + ifstream myfile; + myfile.open(camera_filename.c_str(),ifstream::in); + char line[512]; + myfile.getline(line,512); + const char* p = strtok( line, " " ); + focalLength = (float)atof(p); + myfile.close(); +} + +static void readPFileStrechaPmvs(const string p_filename, Mat_ &P){ + ifstream myfile; + myfile.open(p_filename.c_str(),ifstream::in); + + //cout <<"Opening file " << p_filename << endl; + for( int i = 0; i < 4; i++){ + if (myfile.eof()) + break; + char line[512]; + myfile.getline(line,512); + if (strstr(line,"CONTOUR")!= NULL) { + //printf("Skipping CONTOUR\n"); + i--; + continue; + } + + //cout << "Line is "<< line << endl; + const char* p; + int j = 0; + for (p = strtok( line, " " ); p; p = strtok( NULL, " " )) + { + float val = (float)atof(p); + P(i,j)=val; + j++; + } + } + myfile.close(); +} +static void readKRtFileMiddlebury(const string filename, vector cameras, InputFiles inputFiles) +{ + ifstream myfile; + myfile.open( filename, ifstream::in ); + string line; + + getline (myfile, line); // throw away first line + + int i=0; + int truei=-1; + while( getline( myfile,line) ) + { + /*cout << "Line is "<< line << endl;*/ + Mat Rt; + Mat_ K = Mat::zeros( 3, 3, CV_32F ); + Mat_ R = Mat::zeros( 3, 3, CV_32F ); + Vec3f vt; + stringstream ss(line); + string tmp; + ss >> tmp + >> K(0,0) >> K(0,1) >> K(0,2) >> K(1,0) >> K(1,1) >> K(1,2) >> K(2,0) >> K(2,1) >> K(2,2) // + >> R(0,0) >> R(0,1) >> R(0,2) >> R(1,0) >> R(1,1) >> R(1,2) >> R(2,0) >> R(2,1) >> R(2,2) // + >> vt(0) >> vt(1) >> vt(2); + /*cout << "K is " << K << endl;*/ + /*cout << "R is " << R << endl;*/ + /*cout << "t is " << vt << endl;*/ + //cout << "Filename is " << tmp << endl; + //cout << "image Filename is " << inputFiles.img_filenames[i] << endl; + for( int j = 0; j < inputFiles.img_filenames.size(); j++) { + if( tmp == inputFiles.img_filenames[j]) { + truei=j; + break; + } + } + Mat t(vt, false); + /*Mat t(vt);*/ + hconcat(R, t, Rt); + cameras[truei].P = K*Rt; + /*cout << "Rt is " << Rt<< endl;*/ + /*cout << "P is " << P << endl;*/ + /*cout << "P is " << cameras[i].P << endl;*/ + i++; + } + + + /*while (os >> temp) //the stringstream makes temp a token*/ + /*std::cout < &P){ + ifstream myfile; + myfile.open(calib_filename.c_str(),ifstream::in); + + char line[512]; + while (myfile.getline(line, 512)) { + if(line[0] == 'p') + getProjectionMatrix(line,P); + } + + + myfile.close(); +} + +static void writeImageToFile(const char* outputFolder,const char* name,const Mat &img){ + char outputPath[256]; + sprintf(outputPath, "%s/%s.png", outputFolder,name); + imwrite(outputPath,img); +}; + +static void writeParametersToFile(char* resultsFile, InputFiles inputFiles, AlgorithmParameters &algParameters, GTcheckParameters >Parameters, uint32_t numPixels){ + + ofstream myfile; + myfile.open (resultsFile, ios::out); + myfile << "Number of images: " << inputFiles.img_filenames.size() << endl; + myfile << "Image folder: " << inputFiles.images_folder << endl; + myfile << "Images: "; + for(int i=0; i < inputFiles.img_filenames.size(); i++) + myfile << inputFiles.img_filenames[i] << ", " ; + myfile << endl; + if(numPixels != 0) + myfile << "Num. pixels: " << numPixels << endl; + myfile << "\nParameters:" << endl; + myfile << " Max. disparity: " << algParameters.max_disparity << endl; + myfile << " Depth min: " << algParameters.depthMin << endl; + myfile << " Depth max: " << algParameters.depthMax << endl; + myfile << " color processing: "; + if(algParameters.color_processing) + myfile << "yes" << endl; + else + myfile << "no" << endl; + myfile << " view selection: "; + if(algParameters.viewSelection) + myfile << "yes" << endl; + else + myfile << "no" << endl; + myfile.close(); +}; +// read ground truth depth map file (dmb) (provided by Tola et al. "DAISY: A Fast Local Descriptor for Dense Matching" http://cvlab.epfl.ch/software/daisy) +static int readDmbNormal (const char *filename, Mat_ &img) +{ + FILE *inimage; + inimage = fopen(filename, "rb"); + if (!inimage){ + printf("Error opening file %s",filename); + return -1; + } + + int32_t type, h, w, nb; + + type = -1; + + fread(&type,sizeof(int32_t),1,inimage); + fread(&h,sizeof(int32_t),1,inimage); + fread(&w,sizeof(int32_t),1,inimage); + fread(&nb,sizeof(int32_t),1,inimage); + + //only support float + if(type != 1){ + fclose(inimage); + return -1; + } + + int32_t dataSize = h*w*nb; + + float* data; + data = (float*) malloc (sizeof(float)*dataSize); + fread(data,sizeof(float),dataSize,inimage); + + img = Mat(h,w,CV_32FC3,data); + + fclose(inimage); + return 0; + +} +// read ground truth depth map file (dmb) (provided by Tola et al. "DAISY: A Fast Local Descriptor for Dense Matching" http://cvlab.epfl.ch/software/daisy) +static int readDmb(const char *filename, Mat_ &img) +{ + FILE *inimage; + inimage = fopen(filename, "rb"); + if (!inimage){ + printf("Error opening file %s",filename); + return -1; + } + + int32_t type, h, w, nb; + + type = -1; + + fread(&type,sizeof(int32_t),1,inimage); + fread(&h,sizeof(int32_t),1,inimage); + fread(&w,sizeof(int32_t),1,inimage); + fread(&nb,sizeof(int32_t),1,inimage); + + //only support float + if(type != 1){ + fclose(inimage); + return -1; + } + + int32_t dataSize = h*w*nb; + + float* data; + data = (float*) malloc (sizeof(float)*dataSize); + fread(data,sizeof(float),dataSize,inimage); + + img = Mat(h,w,CV_32F,data); + + fclose(inimage); + return 0; + +} +static int writeDmbNormal(const char *filename, Mat_ &img){ + FILE *outimage; + outimage = fopen(filename, "wb"); + if (!outimage) + printf("Error opening file %s",filename); + + int32_t type = 1; //float + int32_t h = img.rows; + int32_t w = img.cols; + int32_t nb = 3; + + fwrite(&type,sizeof(int32_t),1,outimage); + fwrite(&h,sizeof(int32_t),1,outimage); + fwrite(&w,sizeof(int32_t),1,outimage); + fwrite(&nb,sizeof(int32_t),1,outimage); + + float* data = (float*)img.data; + + int32_t datasize = w*h*nb; + fwrite(data,sizeof(float),datasize,outimage); + + fclose(outimage); + return 0; +} + +static int writeDmb(const char *filename, Mat_ &img){ + FILE *outimage; + outimage = fopen(filename, "wb"); + if (!outimage) + printf("Error opening file %s",filename); + + int32_t type = 1; //float + int32_t h = img.rows; + int32_t w = img.cols; + int32_t nb = 1; + + fwrite(&type,sizeof(int32_t),1,outimage); + fwrite(&h,sizeof(int32_t),1,outimage); + fwrite(&w,sizeof(int32_t),1,outimage); + fwrite(&nb,sizeof(int32_t),1,outimage); + + float* data = (float*)img.data; + + int32_t datasize = w*h*nb; + fwrite(data,sizeof(float),datasize,outimage); + + fclose(outimage); + return 0; +} + +static int readPfm( const char *filename, + //double ***u, // double matrix image + Mat_ &img, + long *nx, /*image size in x direction */ + long *ny) /*image size in y direction */ +{ + FILE *inimage; /* input image FILE pointer */ + long i, j; /* loop variable */ + char row[4096]; /* for reading data */ + + /* open input pgm file and read header */ + inimage = fopen(filename, "rb"); + if (!inimage) + printf("Error opening file %s",filename); + + /* calling it two times because of the P6 header */ + fgets (row, 4096, inimage); + if (row[0]!='P') + abort(); + printf("Row is %s\n", row); + char format = row[1]; + switch (format) /* which format to deal with */ + { + case '5': /* P6 format - classic pgm */ + printf("Opening %s P5 file\n", filename ); + fgets (row, 4096, inimage); + + while (row[0]=='#'||row[0]=='\n') fgets(row, 4096, inimage); + sscanf (row, "%ld %ld", nx, ny); + fgets (row, 4096, inimage); + // fgets (row, 4096, inimage); + + /* allocate storage */ + //alloc_matrix_d (u, *nx, *ny); + img = Mat::zeros((int)*ny,(int)*nx,CV_32F); + + /* read image data */ + for (j=0; j<*ny; j++) + for (i=0; i<*nx; i++) + img(j,i) = (float) getc (inimage); + //(*u)[i][j] = (double) getc (inimage); + break; + /* PF format - pbm HDR format file */ + case 'F': + case 'f': + printf("Opening %s PF file\n", filename ); + fgets (row, 4096, inimage); + + while (row[0]=='#'||row[0]=='\n') + fgets(row, 4096, inimage); + + sscanf (row, "%ld %ld", nx, ny); + +// fgets (row, 4096, inimage); + double scale; + fscanf(inimage, "%lf\n", &scale); +// printf("Scale is %f\n", scale); + // fgets (row, 4096, inimage); + + /* allocate storage */ + //alloc_matrix_d (u, (*nx)*4*3, (*ny)*4*3); + img = Mat::zeros((int)*ny,(int)*nx,CV_32F); + + float tmpfloat; +// printf("Float is %lu bytes big\n", sizeof(float)); + /* read image data */ + for (j=*ny-1; j>=0; j--) { + for (i=0; i<*nx; i++) { + // (*u)[i][j] = (double) getc (inimage); + fread((void *)(&tmpfloat), sizeof(float), 1, inimage); + /* overwrite other 3 channels when they are available */ + if (format=='F') { + fread((void *)(&tmpfloat), sizeof(float), 1, inimage); + fread((void *)(&tmpfloat), sizeof(float), 1, inimage); + } + img(j,i) = tmpfloat; + //(*u)[i][j] = (double) tmpfloat; + + /* if positive convert to big endian */ + if (scale > 0) + { + char array[4]; + char tmpbyte; + memcpy(array, &tmpfloat, sizeof (float)); + /* swap 0 3 */ + tmpbyte = array[0]; + array[0] = array[3]; + array[3] = tmpbyte; + /* swap 1 2 */ + tmpbyte = array[1]; + array[1]=array[2]; + array[2] = tmpbyte; + memcpy(&tmpfloat, array, sizeof (float)); + + img(j,i) = tmpfloat; + //(*u)[i][j] = (double) tmpfloat; + } + } + } + break; + } + + fclose(inimage); + return 0; + +}; diff --git a/fusibile.cu b/fusibile.cu new file mode 100644 index 0000000..8d2476c --- /dev/null +++ b/fusibile.cu @@ -0,0 +1,439 @@ +/* vim: ft=cpp + * */ + +//#include +#ifdef _WIN32 +#include +#endif +#include +#include "globalstate.h" +#include "algorithmparameters.h" +#include "cameraparameters.h" +#include "linestate.h" +#include "imageinfo.h" +#include "config.h" + +#include // float4 +#include +#include +#include +#include "vector_operations.h" +#include "point_cloud_list.h" + +#define SAVE_TEXTURE +//#define SMOOTHNESS + +#define FORCEINLINE __forceinline__ +//#define FORCEINLINE + + +__device__ float K[16]; +__device__ float K_inv[16]; + +/*__device__ FORCEINLINE __constant__ float4 camerasK[32];*/ + +/* compute depth value from disparity or disparity value from depth + * Input: f - focal length in pixel + * baseline - baseline between cameras (in meters) + * d - either disparity or depth value + * Output: either depth or disparity value + */ +__device__ FORCEINLINE float disparityDepthConversion_cu ( const float &f, const float &baseline, const float &d ) { + return f * baseline / d; +} + +/* compute depth value from disparity or disparity value from depth + * Input: f - focal length in pixel + * baseline - baseline between cameras (in meters) + * d - either disparity or depth value + * Output: either depth or disparity value + */ +__device__ FORCEINLINE float disparityDepthConversion_cu2 ( const float &f, const Camera_cu &cam_ref, const Camera_cu &cam, const float &d ) { + float baseline = l2_float4(cam_ref.C4 - cam.C4); + return f * baseline / d; +} + +__device__ FORCEINLINE void get3Dpoint_cu ( float4 * __restrict__ ptX, const Camera_cu &cam, const int2 &p, const float &depth ) { + // in case camera matrix is not normalized: see page 162, then depth might not be the real depth but w and depth needs to be computed from that first + const float4 pt = make_float4 ( + depth * (float)p.x - cam.P_col34.x, + depth * (float)p.y - cam.P_col34.y, + depth - cam.P_col34.z, + 0); + + matvecmul4 (cam.M_inv, pt, ptX); +} +__device__ FORCEINLINE void get3Dpoint_cu1 ( float4 * __restrict__ ptX, const Camera_cu &cam, const int2 &p) { + // in case camera matrix is not normalized: see page 162, then depth might not be the real depth but w and depth needs to be computed from that first + float4 pt; + pt.x = (float)p.x - cam.P_col34.x; + pt.y = (float)p.y - cam.P_col34.y; + pt.z = 1.0f - cam.P_col34.z; + + matvecmul4 (cam.M_inv, pt, ptX); +} +//get d parameter of plane pi = [nT, d]T, which is the distance of the plane to the camera center +__device__ FORCEINLINE float getPlaneDistance_cu ( const float4 &normal, const float4 &X ) { + return -(dot4(normal,X)); +} + +__device__ FORCEINLINE void normalize_cu (float4 * __restrict__ v) +{ + const float normSquared = pow2(v->x) + pow2(v->y) + pow2(v->z); + const float inverse_sqrt = rsqrtf (normSquared); + v->x *= inverse_sqrt; + v->y *= inverse_sqrt; + v->z *= inverse_sqrt; +} +__device__ FORCEINLINE void getViewVector_cu (float4 * __restrict__ v, const Camera_cu &camera, const int2 &p) +{ + get3Dpoint_cu1 (v, camera, p); + sub((*v), camera.C4); + normalize_cu(v); + //v->x=0; + //v->y=0; + //v->z=1; +} + +__device__ FORCEINLINE float l1_norm(float f) { + return fabsf(f); +} +__device__ FORCEINLINE float l1_norm(float4 f) { + return ( fabsf (f.x) + + fabsf (f.y) + + fabsf (f.z))*0.3333333f; + +} +__device__ FORCEINLINE float l1_norm2(float4 f) { + return ( fabsf (f.x) + + fabsf (f.y) + + fabsf (f.z)); + +} + +/* get angle between two vectors in 3D + * Input: v1,v2 - vectors + * Output: angle in radian + */ +__device__ FORCEINLINE float getAngle_cu ( const float4 &v1, const float4 &v2 ) { + float angle = acosf ( dot4(v1, v2)); + //if angle is not a number the dot product was 1 and thus the two vectors should be identical --> return 0 + if ( angle != angle ) + return 0.0f; + //if ( acosf ( v1.dot ( v2 ) ) != acosf ( v1.dot ( v2 ) ) ) + //cout << acosf ( v1.dot ( v2 ) ) << " / " << v1.dot ( v2 )<< " / " << v1<< " / " << v2 << endl; + return angle; +} +__device__ FORCEINLINE void project_on_camera (const float4 &X, const Camera_cu &cam, float2 *pt, float *depth) { + float4 tmp = make_float4 (0, 0, 0, 0); + matvecmul4P (cam.P, X, (&tmp)); + pt->x = tmp.x / tmp.z; + pt->y = tmp.y / tmp.z; + *depth = tmp.z; +} + +/* + * Simple and fast depth math fusion based on depth map and normal consensus + */ +__global__ void fusibile (GlobalState &gs, int ref_camera) +{ + int2 p = make_int2 ( blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y ); + //printf("p is %d %d\n", p.x, p.y); + + const int cols = gs.cameras->cols; + const int rows = gs.cameras->rows; + + if (p.x>=cols) + return; + if (p.y>=rows) + return; + + const int center = p.y*cols+p.x; + + const CameraParameters_cu &camParams = *(gs.cameras); + + if (gs.lines[ref_camera].used_pixels[center]==1) + return; + + //printf("ref_camera is %d\n", ref_camera); + const float4 normal = tex2D (gs.normals_depths[ref_camera], p.x+0.5f, p.y+0.5f); + //printf("Normal is %f %f %f\nDepth is %f\n", normal.x, normal.y, normal.z, normal.w); + /* + * For each point of the reference camera compute the 3d position corresponding to the corresponding depth. + * Create a point only if the following conditions are fulfilled: + * - Projected depths of other cameras does not differ more than gs.params.depthThresh + * - Angle of normal does not differ more than gs.params.normalThresh + */ + float depth = normal.w; + + float4 X; + get3Dpoint_cu (&X, camParams.cameras[ref_camera], p, depth); + //if (p.x<100 && p.y ==100) + //printf("3d Point is %f %f %f\n", X.x, X.y, X.z); + float4 consistent_X = X; + float4 consistent_normal = normal; + float consistent_texture = tex2D (gs.imgs[ref_camera], p.x+0.5f, p.y+0.5f); + int number_consistent = 0; + //int2 used_list[camParams.viewSelectionSubsetNumber]; + int2 used_list[MAX_IMAGES]; + for ( int i = 0; i < camParams.viewSelectionSubsetNumber; i++ ) { + + int idxCurr = camParams.viewSelectionSubset[i]; + used_list[idxCurr].x=-1; + used_list[idxCurr].y=-1; + if (idxCurr == ref_camera) + continue; + + // Project 3d point X on camera idxCurr + float2 tmp_pt; + project_on_camera (X, camParams.cameras[idxCurr], &tmp_pt, &depth); + //printf("P for camera %d is \n", i); + //print_matrix (camParams.cameras[idxCurr].P, "camera "); + //printf("2d point for camera %d is %f %f\n", idxCurr, tmp_pt.x, tmp_pt.y); + + // Boundary check + if (tmp_pt.x >=0 && + tmp_pt.x < cols && + tmp_pt.y >=0 && + tmp_pt.y < rows) { + //printf("Boundary check passed\n"); + + // Compute interpolated depth and normal for tmp_pt w.r.t. camera ref_camera + float4 tmp_normal_and_depth; // first 3 components normal, fourth depth + tmp_normal_and_depth = tex2D (gs.normals_depths[idxCurr], tmp_pt.x+0.5f, tmp_pt.y+0.5f); + //printf("New depth is %f vs %f\n", tmp_normal_and_depth.w, depth); + + const float depth_disp = disparityDepthConversion_cu2 ( camParams.cameras[ref_camera].f, camParams.cameras[ref_camera], camParams.cameras[idxCurr], depth ); + const float tmp_normal_and_depth_disp = disparityDepthConversion_cu2 ( camParams.cameras[ref_camera].f, camParams.cameras[ref_camera], camParams.cameras[idxCurr], tmp_normal_and_depth.w ); + // First consistency check on depth + if (fabsf(depth_disp - tmp_normal_and_depth_disp) < gs.params->depthThresh) { + //printf("\tFirst consistency test passed!\n"); + float angle = getAngle_cu (tmp_normal_and_depth, normal); // extract normal + if (angle < gs.params->normalThresh) + { + //printf("\tSecond consistency test passed!\n"); + /// All conditions met: + // - average 3d points and normals + // - save resulting point and normal + // - (optional) average texture (not done yet) + float4 tmp_X; // 3d point of consistent point on other view + int2 tmp_p = make_int2 ((int) tmp_pt.x, (int) tmp_pt.y); + + get3Dpoint_cu (&tmp_X, camParams.cameras[idxCurr], tmp_p, tmp_normal_and_depth.w); + consistent_X = consistent_X + tmp_X; + //consistent_X = tmp_X; + consistent_normal = consistent_normal + tmp_normal_and_depth; + if (gs.params->saveTexture) + consistent_texture = consistent_texture + tex2D (gs.imgs[idxCurr], tmp_pt.x+0.5f, tmp_pt.y+0.5f); + + + // Save the point for later check + //printf ("Saved point on camera %d is %d %d\n", idxCurr, (int)tmp_pt.x, (int)tmp_pt.y); + used_list[idxCurr].x=(int)tmp_pt.x; + used_list[idxCurr].y=(int)tmp_pt.y; + + number_consistent++; + } + } + } + else + continue; + } + + // Average normals and points + consistent_X = consistent_X / ((float) number_consistent + 1.0f); + consistent_normal = consistent_normal / ((float) number_consistent + 1.0f); + consistent_texture = consistent_texture / ((float) number_consistent + 1.0f); + + // If at least numConsistentThresh point agree: + // Create point + // Save normal + // (optional) save texture + if (number_consistent >= gs.params->numConsistentThresh) { + //printf("\tEnough consistent points!\nSaving point %f %f %f", consistent_X.x, consistent_X.y, consistent_X.z); + gs.pc->points[center].coord = consistent_X; + gs.pc->points[center].normal = consistent_normal; + +#ifdef SAVE_TEXTURE + if (gs.params->saveTexture) + gs.pc->points[center].texture = consistent_texture; +#endif + + //// Mark corresponding point on other views as "used" + for ( int i = 0; i < camParams.viewSelectionSubsetNumber; i++ ) { + int idxCurr = camParams.viewSelectionSubset[i]; + if (used_list[idxCurr].x==-1) + continue; + //printf("Used list point on camera %d is %d %d\n", idxCurr, used_list[idxCurr].x, used_list[idxCurr].y); + gs.lines[idxCurr].used_pixels [used_list[idxCurr].x + used_list[idxCurr].y*cols] = 1; + } + } + + return; +} +/* Copy point cloud to global memory */ +//template< typename T > +void copy_point_cloud_to_host(GlobalState &gs, int cam, PointCloudList &pc_list) +{ + printf("Processing camera %d\n", cam); + unsigned int count = pc_list.size; + for (int y=0; yrows; y++) { + for (int x=0; xcols; x++) { + Point_cu &p = gs.pc->points[x+y*gs.pc->cols]; + const float4 X = p.coord; + const float4 normal = p.normal; + float texture = 127.0f; +#ifdef SAVE_TEXTURE + if (gs.params->saveTexture) + texture = p.texture; +#endif + if (count==pc_list.maximum) { + printf("Not enough space to save points :'(\n... allocating more! :)"); + pc_list.increase_size(pc_list.maximum*2); + + } + if (X.x != 0 && X.y != 0 && X.z != 0) { + pc_list.points[count].coord = X; + pc_list.points[count].normal = normal; +#ifdef SAVE_TEXTURE + pc_list.points[count].texture = texture; +#endif + count++; + } + p.coord = make_float4(0,0,0,0); + } + } + printf("Found %.2f million points\n", count/1000000.0f); + pc_list.size = count; +} + +template< typename T > +void fusibile_cu(GlobalState &gs, PointCloudList &pc_list, int num_views) +{ +#ifdef SHARED + cudaDeviceSetCacheConfig(cudaFuncCachePreferShared); +#endif + + int rows = gs.cameras->rows; + int cols = gs.cameras->cols; + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + printf("Run gipuma\n"); + /*curandState* devStates;*/ + //cudaMalloc ( &gs.cs, rows*cols*sizeof( curandState ) ); + + int count = 0; + int i = 0; + + cudaGetDeviceCount(&count); + if(count == 0) { + fprintf(stderr, "There is no device.\n"); + return ; + } + + for(i = 0; i < count; i++) { + cudaDeviceProp prop; + if(cudaGetDeviceProperties(&prop, i) == cudaSuccess) { + if(prop.major >= 1) { + break; + } + } + } + if(i == count) { + fprintf(stderr, "There is no device supporting CUDA.\n"); + return ; + } + //float mind = gs.params.min_disparity; + //float maxd = gs.params.max_disparity; + //srand(0); + //for(int x = 0; x < gs.cameras.cols; x++) { + //for(int y = 0; y < gs.cameras.rows; y++) { + //gs.lines.disp[y*gs.cameras.cols+x] = (float)rand()/(float)RAND_MAX * (maxd-mind) + mind; + //[>printf("%f\n", gs.lines.disp[y*256+x]);<] + //} + //} + /*printf("MAX DISP is %f\n", gs.params.max_disparity);*/ + /*printf("MIN DISP is %f\n", gs.params.min_disparity);*/ + cudaSetDevice(i); + cudaDeviceSetLimit(cudaLimitPrintfFifoSize, 1024*128); + dim3 grid_size; + grid_size.x=(cols+BLOCK_W-1)/BLOCK_W; + grid_size.y=((rows/2)+BLOCK_H-1)/BLOCK_H; + dim3 block_size; + block_size.x=BLOCK_W; + block_size.y=BLOCK_H; + + dim3 grid_size_initrand; + grid_size_initrand.x=(cols+32-1)/32; + grid_size_initrand.y=(rows+32-1)/32; + dim3 block_size_initrand; + block_size_initrand.x=32; + block_size_initrand.y=32; + +/* printf("Launching kernel with grid of size %d %d and block of size %d %d and shared size %d %d\nBlock %d %d and radius %d %d and tile %d %d\n", + grid_size.x, + grid_size.y, + block_size.x, + block_size.y, + SHARED_SIZE_W, + SHARED_SIZE_H, + BLOCK_W, + BLOCK_H, + WIN_RADIUS_W, + WIN_RADIUS_H, + TILE_W, + TILE_H + ); + */ printf("Grid size initrand is grid: %d-%d block: %d-%d\n", grid_size_initrand.x, grid_size_initrand.y, block_size_initrand.x, block_size_initrand.y); + + size_t avail; + size_t total; + cudaMemGetInfo( &avail, &total ); + size_t used = total - avail; + printf("Device memory used: %fMB\n", used/1000000.0f); + printf("Number of iterations is %d\n", gs.params->iterations); + printf("Blocksize is %dx%d\n", gs.params->box_hsize,gs.params->box_vsize); + printf("Disparity threshold is \t%f\n", gs.params->depthThresh); + printf("Normal threshold is \t%f\n", gs.params->normalThresh); + printf("Number of consistent points is \t%d\n", gs.params->numConsistentThresh); + printf("Cam scale is \t%f\n", gs.params->cam_scale); + + //int shared_memory_size = sizeof(float) * SHARED_SIZE ; + printf("Fusing points\n"); + cudaEventRecord(start); + + //printf("Computing final disparity\n"); + //for (int cam=0; cam<10; cam++) { + for (int cam=0; cam>>(gs, cam); + cudaDeviceSynchronize(); + copy_point_cloud_to_host(gs, cam, pc_list); // slower but saves memory + cudaDeviceSynchronize(); + } + + cudaEventRecord(stop); + + cudaEventSynchronize(stop); + + float milliseconds = 0; + cudaEventElapsedTime(&milliseconds, start, stop); + printf("\t\tELAPSED %f seconds\n", milliseconds/1000.f); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + printf("Error: %s\n", cudaGetErrorString(err)); + + // print results to file +} + +int runcuda(GlobalState &gs, PointCloudList &pc_list, int num_views) +{ + printf("Run cuda\n"); + /*GlobalState *gs = new GlobalState;*/ + if(gs.params->color_processing) + fusibile_cu(gs, pc_list, num_views); + else + fusibile_cu(gs, pc_list, num_views); + return 0; +} diff --git a/fusibile.h b/fusibile.h new file mode 100644 index 0000000..63a3b32 --- /dev/null +++ b/fusibile.h @@ -0,0 +1,3 @@ +#include "globalstate.h" +#include "point_cloud_list.h" +int runcuda(GlobalState &gs, PointCloudList &pc_list, int num_views); diff --git a/globalstate.h b/globalstate.h new file mode 100644 index 0000000..aa7023f --- /dev/null +++ b/globalstate.h @@ -0,0 +1,50 @@ +#pragma once + +#include "algorithmparameters.h" +#include "cameraparameters.h" +#include "linestate.h" +#include "imageinfo.h" +#include "managed.h" +#include "point_cloud.h" + +// Includes CUDA +#include +#include +#include +#include + +// includes, cuda +#include +#include +#include +#include +#include +#include + +class GlobalState : public Managed { +public: + CameraParameters_cu *cameras; + //ImageInfo iminfo; + LineState *lines; + curandState *cs; + AlgorithmParameters *params; + PointCloud *pc; + // PointCloud *pc_list; + + cudaTextureObject_t imgs [MAX_IMAGES]; + cudaTextureObject_t normals_depths [MAX_IMAGES]; // first 3 values normal, fourth depth + /*cudaTextureObject_t depths [MAX_IMAGES];*/ + //cudaTextureObject_t gradx [MAX_IMAGES]; + //cudaTextureObject_t grady [MAX_IMAGES]; + void resize(int n) + { + printf("Resizing globalstate to %d\n", n); + cudaMallocManaged (&lines, sizeof(LineState) * n); + } + ~GlobalState() + { + /*cudaFree (c);*/ + cudaFree (lines); + } + +}; diff --git a/helper_cuda.h b/helper_cuda.h new file mode 100644 index 0000000..89fd782 --- /dev/null +++ b/helper_cuda.h @@ -0,0 +1,1182 @@ +/** + * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. + * + * Please refer to the NVIDIA end user license agreement (EULA) associated + * with this source code for terms and conditions that govern your use of + * this software. Any use, reproduction, disclosure, or distribution of + * this software and related documentation outside the terms of the EULA + * is strictly prohibited. + * + */ + +//////////////////////////////////////////////////////////////////////////////// +// These are CUDA Helper functions for initialization and error checking + +#ifndef HELPER_CUDA_H +#define HELPER_CUDA_H + +#pragma once + +#include +#include +#include + +#include "helper_string.h" + +#ifndef EXIT_WAIVED +#define EXIT_WAIVED 2 +#endif + +// Note, it is required that your SDK sample to include the proper header files, please +// refer the CUDA examples for examples of the needed CUDA headers, which may change depending +// on which CUDA functions are used. + +// CUDA Runtime error messages +#ifdef __DRIVER_TYPES_H__ +static const char *_cudaGetErrorEnum(cudaError_t error) +{ + switch (error) + { + case cudaSuccess: + return "cudaSuccess"; + + case cudaErrorMissingConfiguration: + return "cudaErrorMissingConfiguration"; + + case cudaErrorMemoryAllocation: + return "cudaErrorMemoryAllocation"; + + case cudaErrorInitializationError: + return "cudaErrorInitializationError"; + + case cudaErrorLaunchFailure: + return "cudaErrorLaunchFailure"; + + case cudaErrorPriorLaunchFailure: + return "cudaErrorPriorLaunchFailure"; + + case cudaErrorLaunchTimeout: + return "cudaErrorLaunchTimeout"; + + case cudaErrorLaunchOutOfResources: + return "cudaErrorLaunchOutOfResources"; + + case cudaErrorInvalidDeviceFunction: + return "cudaErrorInvalidDeviceFunction"; + + case cudaErrorInvalidConfiguration: + return "cudaErrorInvalidConfiguration"; + + case cudaErrorInvalidDevice: + return "cudaErrorInvalidDevice"; + + case cudaErrorInvalidValue: + return "cudaErrorInvalidValue"; + + case cudaErrorInvalidPitchValue: + return "cudaErrorInvalidPitchValue"; + + case cudaErrorInvalidSymbol: + return "cudaErrorInvalidSymbol"; + + case cudaErrorMapBufferObjectFailed: + return "cudaErrorMapBufferObjectFailed"; + + case cudaErrorUnmapBufferObjectFailed: + return "cudaErrorUnmapBufferObjectFailed"; + + case cudaErrorInvalidHostPointer: + return "cudaErrorInvalidHostPointer"; + + case cudaErrorInvalidDevicePointer: + return "cudaErrorInvalidDevicePointer"; + + case cudaErrorInvalidTexture: + return "cudaErrorInvalidTexture"; + + case cudaErrorInvalidTextureBinding: + return "cudaErrorInvalidTextureBinding"; + + case cudaErrorInvalidChannelDescriptor: + return "cudaErrorInvalidChannelDescriptor"; + + case cudaErrorInvalidMemcpyDirection: + return "cudaErrorInvalidMemcpyDirection"; + + case cudaErrorAddressOfConstant: + return "cudaErrorAddressOfConstant"; + + case cudaErrorTextureFetchFailed: + return "cudaErrorTextureFetchFailed"; + + case cudaErrorTextureNotBound: + return "cudaErrorTextureNotBound"; + + case cudaErrorSynchronizationError: + return "cudaErrorSynchronizationError"; + + case cudaErrorInvalidFilterSetting: + return "cudaErrorInvalidFilterSetting"; + + case cudaErrorInvalidNormSetting: + return "cudaErrorInvalidNormSetting"; + + case cudaErrorMixedDeviceExecution: + return "cudaErrorMixedDeviceExecution"; + + case cudaErrorCudartUnloading: + return "cudaErrorCudartUnloading"; + + case cudaErrorUnknown: + return "cudaErrorUnknown"; + + case cudaErrorNotYetImplemented: + return "cudaErrorNotYetImplemented"; + + case cudaErrorMemoryValueTooLarge: + return "cudaErrorMemoryValueTooLarge"; + + case cudaErrorInvalidResourceHandle: + return "cudaErrorInvalidResourceHandle"; + + case cudaErrorNotReady: + return "cudaErrorNotReady"; + + case cudaErrorInsufficientDriver: + return "cudaErrorInsufficientDriver"; + + case cudaErrorSetOnActiveProcess: + return "cudaErrorSetOnActiveProcess"; + + case cudaErrorInvalidSurface: + return "cudaErrorInvalidSurface"; + + case cudaErrorNoDevice: + return "cudaErrorNoDevice"; + + case cudaErrorECCUncorrectable: + return "cudaErrorECCUncorrectable"; + + case cudaErrorSharedObjectSymbolNotFound: + return "cudaErrorSharedObjectSymbolNotFound"; + + case cudaErrorSharedObjectInitFailed: + return "cudaErrorSharedObjectInitFailed"; + + case cudaErrorUnsupportedLimit: + return "cudaErrorUnsupportedLimit"; + + case cudaErrorDuplicateVariableName: + return "cudaErrorDuplicateVariableName"; + + case cudaErrorDuplicateTextureName: + return "cudaErrorDuplicateTextureName"; + + case cudaErrorDuplicateSurfaceName: + return "cudaErrorDuplicateSurfaceName"; + + case cudaErrorDevicesUnavailable: + return "cudaErrorDevicesUnavailable"; + + case cudaErrorInvalidKernelImage: + return "cudaErrorInvalidKernelImage"; + + case cudaErrorNoKernelImageForDevice: + return "cudaErrorNoKernelImageForDevice"; + + case cudaErrorIncompatibleDriverContext: + return "cudaErrorIncompatibleDriverContext"; + + case cudaErrorPeerAccessAlreadyEnabled: + return "cudaErrorPeerAccessAlreadyEnabled"; + + case cudaErrorPeerAccessNotEnabled: + return "cudaErrorPeerAccessNotEnabled"; + + case cudaErrorDeviceAlreadyInUse: + return "cudaErrorDeviceAlreadyInUse"; + + case cudaErrorProfilerDisabled: + return "cudaErrorProfilerDisabled"; + + case cudaErrorProfilerNotInitialized: + return "cudaErrorProfilerNotInitialized"; + + case cudaErrorProfilerAlreadyStarted: + return "cudaErrorProfilerAlreadyStarted"; + + case cudaErrorProfilerAlreadyStopped: + return "cudaErrorProfilerAlreadyStopped"; + + /* Since CUDA 4.0*/ + case cudaErrorAssert: + return "cudaErrorAssert"; + + case cudaErrorTooManyPeers: + return "cudaErrorTooManyPeers"; + + case cudaErrorHostMemoryAlreadyRegistered: + return "cudaErrorHostMemoryAlreadyRegistered"; + + case cudaErrorHostMemoryNotRegistered: + return "cudaErrorHostMemoryNotRegistered"; + + /* Since CUDA 5.0 */ + case cudaErrorOperatingSystem: + return "cudaErrorOperatingSystem"; + + case cudaErrorPeerAccessUnsupported: + return "cudaErrorPeerAccessUnsupported"; + + case cudaErrorLaunchMaxDepthExceeded: + return "cudaErrorLaunchMaxDepthExceeded"; + + case cudaErrorLaunchFileScopedTex: + return "cudaErrorLaunchFileScopedTex"; + + case cudaErrorLaunchFileScopedSurf: + return "cudaErrorLaunchFileScopedSurf"; + + case cudaErrorSyncDepthExceeded: + return "cudaErrorSyncDepthExceeded"; + + case cudaErrorLaunchPendingCountExceeded: + return "cudaErrorLaunchPendingCountExceeded"; + + case cudaErrorNotPermitted: + return "cudaErrorNotPermitted"; + + case cudaErrorNotSupported: + return "cudaErrorNotSupported"; + + /* Since CUDA 6.0 */ + case cudaErrorHardwareStackError: + return "cudaErrorHardwareStackError"; + + case cudaErrorIllegalInstruction: + return "cudaErrorIllegalInstruction"; + + case cudaErrorMisalignedAddress: + return "cudaErrorMisalignedAddress"; + + case cudaErrorInvalidAddressSpace: + return "cudaErrorInvalidAddressSpace"; + + case cudaErrorInvalidPc: + return "cudaErrorInvalidPc"; + + case cudaErrorIllegalAddress: + return "cudaErrorIllegalAddress"; + + /* Since CUDA 6.5*/ + case cudaErrorInvalidPtx: + return "cudaErrorInvalidPtx"; + + case cudaErrorInvalidGraphicsContext: + return "cudaErrorInvalidGraphicsContext"; + + case cudaErrorStartupFailure: + return "cudaErrorStartupFailure"; + + case cudaErrorApiFailureBase: + return "cudaErrorApiFailureBase"; + } + + return ""; +} +#endif + +#ifdef __cuda_cuda_h__ +// CUDA Driver API errors +static const char *_cudaGetErrorEnum(CUresult error) +{ + switch (error) + { + case CUDA_SUCCESS: + return "CUDA_SUCCESS"; + + case CUDA_ERROR_INVALID_VALUE: + return "CUDA_ERROR_INVALID_VALUE"; + + case CUDA_ERROR_OUT_OF_MEMORY: + return "CUDA_ERROR_OUT_OF_MEMORY"; + + case CUDA_ERROR_NOT_INITIALIZED: + return "CUDA_ERROR_NOT_INITIALIZED"; + + case CUDA_ERROR_DEINITIALIZED: + return "CUDA_ERROR_DEINITIALIZED"; + + case CUDA_ERROR_PROFILER_DISABLED: + return "CUDA_ERROR_PROFILER_DISABLED"; + + case CUDA_ERROR_PROFILER_NOT_INITIALIZED: + return "CUDA_ERROR_PROFILER_NOT_INITIALIZED"; + + case CUDA_ERROR_PROFILER_ALREADY_STARTED: + return "CUDA_ERROR_PROFILER_ALREADY_STARTED"; + + case CUDA_ERROR_PROFILER_ALREADY_STOPPED: + return "CUDA_ERROR_PROFILER_ALREADY_STOPPED"; + + case CUDA_ERROR_NO_DEVICE: + return "CUDA_ERROR_NO_DEVICE"; + + case CUDA_ERROR_INVALID_DEVICE: + return "CUDA_ERROR_INVALID_DEVICE"; + + case CUDA_ERROR_INVALID_IMAGE: + return "CUDA_ERROR_INVALID_IMAGE"; + + case CUDA_ERROR_INVALID_CONTEXT: + return "CUDA_ERROR_INVALID_CONTEXT"; + + case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: + return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT"; + + case CUDA_ERROR_MAP_FAILED: + return "CUDA_ERROR_MAP_FAILED"; + + case CUDA_ERROR_UNMAP_FAILED: + return "CUDA_ERROR_UNMAP_FAILED"; + + case CUDA_ERROR_ARRAY_IS_MAPPED: + return "CUDA_ERROR_ARRAY_IS_MAPPED"; + + case CUDA_ERROR_ALREADY_MAPPED: + return "CUDA_ERROR_ALREADY_MAPPED"; + + case CUDA_ERROR_NO_BINARY_FOR_GPU: + return "CUDA_ERROR_NO_BINARY_FOR_GPU"; + + case CUDA_ERROR_ALREADY_ACQUIRED: + return "CUDA_ERROR_ALREADY_ACQUIRED"; + + case CUDA_ERROR_NOT_MAPPED: + return "CUDA_ERROR_NOT_MAPPED"; + + case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: + return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY"; + + case CUDA_ERROR_NOT_MAPPED_AS_POINTER: + return "CUDA_ERROR_NOT_MAPPED_AS_POINTER"; + + case CUDA_ERROR_ECC_UNCORRECTABLE: + return "CUDA_ERROR_ECC_UNCORRECTABLE"; + + case CUDA_ERROR_UNSUPPORTED_LIMIT: + return "CUDA_ERROR_UNSUPPORTED_LIMIT"; + + case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: + return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE"; + + case CUDA_ERROR_INVALID_SOURCE: + return "CUDA_ERROR_INVALID_SOURCE"; + + case CUDA_ERROR_FILE_NOT_FOUND: + return "CUDA_ERROR_FILE_NOT_FOUND"; + + case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: + return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND"; + + case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: + return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED"; + + case CUDA_ERROR_OPERATING_SYSTEM: + return "CUDA_ERROR_OPERATING_SYSTEM"; + + case CUDA_ERROR_INVALID_HANDLE: + return "CUDA_ERROR_INVALID_HANDLE"; + + case CUDA_ERROR_NOT_FOUND: + return "CUDA_ERROR_NOT_FOUND"; + + case CUDA_ERROR_NOT_READY: + return "CUDA_ERROR_NOT_READY"; + + case CUDA_ERROR_LAUNCH_FAILED: + return "CUDA_ERROR_LAUNCH_FAILED"; + + case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: + return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES"; + + case CUDA_ERROR_LAUNCH_TIMEOUT: + return "CUDA_ERROR_LAUNCH_TIMEOUT"; + + case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: + return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING"; + + case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: + return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED"; + + case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: + return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED"; + + case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: + return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE"; + + case CUDA_ERROR_CONTEXT_IS_DESTROYED: + return "CUDA_ERROR_CONTEXT_IS_DESTROYED"; + + case CUDA_ERROR_ASSERT: + return "CUDA_ERROR_ASSERT"; + + case CUDA_ERROR_TOO_MANY_PEERS: + return "CUDA_ERROR_TOO_MANY_PEERS"; + + case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: + return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED"; + + case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: + return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED"; + + case CUDA_ERROR_UNKNOWN: + return "CUDA_ERROR_UNKNOWN"; + } + + return ""; +} +#endif + +#ifdef CUBLAS_API_H_ +// cuBLAS API errors +static const char *_cudaGetErrorEnum(cublasStatus_t error) +{ + switch (error) + { + case CUBLAS_STATUS_SUCCESS: + return "CUBLAS_STATUS_SUCCESS"; + + case CUBLAS_STATUS_NOT_INITIALIZED: + return "CUBLAS_STATUS_NOT_INITIALIZED"; + + case CUBLAS_STATUS_ALLOC_FAILED: + return "CUBLAS_STATUS_ALLOC_FAILED"; + + case CUBLAS_STATUS_INVALID_VALUE: + return "CUBLAS_STATUS_INVALID_VALUE"; + + case CUBLAS_STATUS_ARCH_MISMATCH: + return "CUBLAS_STATUS_ARCH_MISMATCH"; + + case CUBLAS_STATUS_MAPPING_ERROR: + return "CUBLAS_STATUS_MAPPING_ERROR"; + + case CUBLAS_STATUS_EXECUTION_FAILED: + return "CUBLAS_STATUS_EXECUTION_FAILED"; + + case CUBLAS_STATUS_INTERNAL_ERROR: + return "CUBLAS_STATUS_INTERNAL_ERROR"; + } + + return ""; +} +#endif + +#ifdef _CUFFT_H_ +// cuFFT API errors +static const char *_cudaGetErrorEnum(cufftResult error) +{ + switch (error) + { + case CUFFT_SUCCESS: + return "CUFFT_SUCCESS"; + + case CUFFT_INVALID_PLAN: + return "CUFFT_INVALID_PLAN"; + + case CUFFT_ALLOC_FAILED: + return "CUFFT_ALLOC_FAILED"; + + case CUFFT_INVALID_TYPE: + return "CUFFT_INVALID_TYPE"; + + case CUFFT_INVALID_VALUE: + return "CUFFT_INVALID_VALUE"; + + case CUFFT_INTERNAL_ERROR: + return "CUFFT_INTERNAL_ERROR"; + + case CUFFT_EXEC_FAILED: + return "CUFFT_EXEC_FAILED"; + + case CUFFT_SETUP_FAILED: + return "CUFFT_SETUP_FAILED"; + + case CUFFT_INVALID_SIZE: + return "CUFFT_INVALID_SIZE"; + + case CUFFT_UNALIGNED_DATA: + return "CUFFT_UNALIGNED_DATA"; + + case CUFFT_INCOMPLETE_PARAMETER_LIST: + return "CUFFT_INCOMPLETE_PARAMETER_LIST"; + + case CUFFT_INVALID_DEVICE: + return "CUFFT_INVALID_DEVICE"; + + case CUFFT_PARSE_ERROR: + return "CUFFT_PARSE_ERROR"; + + case CUFFT_NO_WORKSPACE: + return "CUFFT_NO_WORKSPACE"; + + case CUFFT_NOT_IMPLEMENTED: + return "CUFFT_NOT_IMPLEMENTED"; + + case CUFFT_LICENSE_ERROR: + return "CUFFT_LICENSE_ERROR"; + } + + return ""; +} +#endif + + +#ifdef CUSPARSEAPI +// cuSPARSE API errors +static const char *_cudaGetErrorEnum(cusparseStatus_t error) +{ + switch (error) + { + case CUSPARSE_STATUS_SUCCESS: + return "CUSPARSE_STATUS_SUCCESS"; + + case CUSPARSE_STATUS_NOT_INITIALIZED: + return "CUSPARSE_STATUS_NOT_INITIALIZED"; + + case CUSPARSE_STATUS_ALLOC_FAILED: + return "CUSPARSE_STATUS_ALLOC_FAILED"; + + case CUSPARSE_STATUS_INVALID_VALUE: + return "CUSPARSE_STATUS_INVALID_VALUE"; + + case CUSPARSE_STATUS_ARCH_MISMATCH: + return "CUSPARSE_STATUS_ARCH_MISMATCH"; + + case CUSPARSE_STATUS_MAPPING_ERROR: + return "CUSPARSE_STATUS_MAPPING_ERROR"; + + case CUSPARSE_STATUS_EXECUTION_FAILED: + return "CUSPARSE_STATUS_EXECUTION_FAILED"; + + case CUSPARSE_STATUS_INTERNAL_ERROR: + return "CUSPARSE_STATUS_INTERNAL_ERROR"; + + case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: + return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; + } + + return ""; +} +#endif + +#ifdef CURAND_H_ +// cuRAND API errors +static const char *_cudaGetErrorEnum(curandStatus_t error) +{ + switch (error) + { + case CURAND_STATUS_SUCCESS: + return "CURAND_STATUS_SUCCESS"; + + case CURAND_STATUS_VERSION_MISMATCH: + return "CURAND_STATUS_VERSION_MISMATCH"; + + case CURAND_STATUS_NOT_INITIALIZED: + return "CURAND_STATUS_NOT_INITIALIZED"; + + case CURAND_STATUS_ALLOCATION_FAILED: + return "CURAND_STATUS_ALLOCATION_FAILED"; + + case CURAND_STATUS_TYPE_ERROR: + return "CURAND_STATUS_TYPE_ERROR"; + + case CURAND_STATUS_OUT_OF_RANGE: + return "CURAND_STATUS_OUT_OF_RANGE"; + + case CURAND_STATUS_LENGTH_NOT_MULTIPLE: + return "CURAND_STATUS_LENGTH_NOT_MULTIPLE"; + + case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: + return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED"; + + case CURAND_STATUS_LAUNCH_FAILURE: + return "CURAND_STATUS_LAUNCH_FAILURE"; + + case CURAND_STATUS_PREEXISTING_FAILURE: + return "CURAND_STATUS_PREEXISTING_FAILURE"; + + case CURAND_STATUS_INITIALIZATION_FAILED: + return "CURAND_STATUS_INITIALIZATION_FAILED"; + + case CURAND_STATUS_ARCH_MISMATCH: + return "CURAND_STATUS_ARCH_MISMATCH"; + + case CURAND_STATUS_INTERNAL_ERROR: + return "CURAND_STATUS_INTERNAL_ERROR"; + } + + return ""; +} +#endif + +#ifdef NV_NPPIDEFS_H +// NPP API errors +static const char *_cudaGetErrorEnum(NppStatus error) +{ + switch (error) + { + case NPP_NOT_SUPPORTED_MODE_ERROR: + return "NPP_NOT_SUPPORTED_MODE_ERROR"; + + case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR: + return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR"; + + case NPP_RESIZE_NO_OPERATION_ERROR: + return "NPP_RESIZE_NO_OPERATION_ERROR"; + + case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY: + return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY"; + +#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 + + case NPP_BAD_ARG_ERROR: + return "NPP_BAD_ARGUMENT_ERROR"; + + case NPP_COEFF_ERROR: + return "NPP_COEFFICIENT_ERROR"; + + case NPP_RECT_ERROR: + return "NPP_RECTANGLE_ERROR"; + + case NPP_QUAD_ERROR: + return "NPP_QUADRANGLE_ERROR"; + + case NPP_MEM_ALLOC_ERR: + return "NPP_MEMORY_ALLOCATION_ERROR"; + + case NPP_HISTO_NUMBER_OF_LEVELS_ERROR: + return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; + + case NPP_INVALID_INPUT: + return "NPP_INVALID_INPUT"; + + case NPP_POINTER_ERROR: + return "NPP_POINTER_ERROR"; + + case NPP_WARNING: + return "NPP_WARNING"; + + case NPP_ODD_ROI_WARNING: + return "NPP_ODD_ROI_WARNING"; +#else + + // These are for CUDA 5.5 or higher + case NPP_BAD_ARGUMENT_ERROR: + return "NPP_BAD_ARGUMENT_ERROR"; + + case NPP_COEFFICIENT_ERROR: + return "NPP_COEFFICIENT_ERROR"; + + case NPP_RECTANGLE_ERROR: + return "NPP_RECTANGLE_ERROR"; + + case NPP_QUADRANGLE_ERROR: + return "NPP_QUADRANGLE_ERROR"; + + case NPP_MEMORY_ALLOCATION_ERR: + return "NPP_MEMORY_ALLOCATION_ERROR"; + + case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR: + return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; + + case NPP_INVALID_HOST_POINTER_ERROR: + return "NPP_INVALID_HOST_POINTER_ERROR"; + + case NPP_INVALID_DEVICE_POINTER_ERROR: + return "NPP_INVALID_DEVICE_POINTER_ERROR"; +#endif + + case NPP_LUT_NUMBER_OF_LEVELS_ERROR: + return "NPP_LUT_NUMBER_OF_LEVELS_ERROR"; + + case NPP_TEXTURE_BIND_ERROR: + return "NPP_TEXTURE_BIND_ERROR"; + + case NPP_WRONG_INTERSECTION_ROI_ERROR: + return "NPP_WRONG_INTERSECTION_ROI_ERROR"; + + case NPP_NOT_EVEN_STEP_ERROR: + return "NPP_NOT_EVEN_STEP_ERROR"; + + case NPP_INTERPOLATION_ERROR: + return "NPP_INTERPOLATION_ERROR"; + + case NPP_RESIZE_FACTOR_ERROR: + return "NPP_RESIZE_FACTOR_ERROR"; + + case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR: + return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR"; + + +#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 + + case NPP_MEMFREE_ERR: + return "NPP_MEMFREE_ERR"; + + case NPP_MEMSET_ERR: + return "NPP_MEMSET_ERR"; + + case NPP_MEMCPY_ERR: + return "NPP_MEMCPY_ERROR"; + + case NPP_MIRROR_FLIP_ERR: + return "NPP_MIRROR_FLIP_ERR"; +#else + + case NPP_MEMFREE_ERROR: + return "NPP_MEMFREE_ERROR"; + + case NPP_MEMSET_ERROR: + return "NPP_MEMSET_ERROR"; + + case NPP_MEMCPY_ERROR: + return "NPP_MEMCPY_ERROR"; + + case NPP_MIRROR_FLIP_ERROR: + return "NPP_MIRROR_FLIP_ERROR"; +#endif + + case NPP_ALIGNMENT_ERROR: + return "NPP_ALIGNMENT_ERROR"; + + case NPP_STEP_ERROR: + return "NPP_STEP_ERROR"; + + case NPP_SIZE_ERROR: + return "NPP_SIZE_ERROR"; + + case NPP_NULL_POINTER_ERROR: + return "NPP_NULL_POINTER_ERROR"; + + case NPP_CUDA_KERNEL_EXECUTION_ERROR: + return "NPP_CUDA_KERNEL_EXECUTION_ERROR"; + + case NPP_NOT_IMPLEMENTED_ERROR: + return "NPP_NOT_IMPLEMENTED_ERROR"; + + case NPP_ERROR: + return "NPP_ERROR"; + + case NPP_SUCCESS: + return "NPP_SUCCESS"; + + case NPP_WRONG_INTERSECTION_QUAD_WARNING: + return "NPP_WRONG_INTERSECTION_QUAD_WARNING"; + + case NPP_MISALIGNED_DST_ROI_WARNING: + return "NPP_MISALIGNED_DST_ROI_WARNING"; + + case NPP_AFFINE_QUAD_INCORRECT_WARNING: + return "NPP_AFFINE_QUAD_INCORRECT_WARNING"; + + case NPP_DOUBLE_SIZE_WARNING: + return "NPP_DOUBLE_SIZE_WARNING"; + + case NPP_WRONG_INTERSECTION_ROI_WARNING: + return "NPP_WRONG_INTERSECTION_ROI_WARNING"; + +#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000 + /* These are 6.0 or higher */ + case NPP_LUT_PALETTE_BITSIZE_ERROR: + return "NPP_LUT_PALETTE_BITSIZE_ERROR"; + + case NPP_ZC_MODE_NOT_SUPPORTED_ERROR: + return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR"; + + case NPP_QUALITY_INDEX_ERROR: + return "NPP_QUALITY_INDEX_ERROR"; + + case NPP_CHANNEL_ORDER_ERROR: + return "NPP_CHANNEL_ORDER_ERROR"; + + case NPP_ZERO_MASK_VALUE_ERROR: + return "NPP_ZERO_MASK_VALUE_ERROR"; + + case NPP_NUMBER_OF_CHANNELS_ERROR: + return "NPP_NUMBER_OF_CHANNELS_ERROR"; + + case NPP_COI_ERROR: + return "NPP_COI_ERROR"; + + case NPP_DIVISOR_ERROR: + return "NPP_DIVISOR_ERROR"; + + case NPP_CHANNEL_ERROR: + return "NPP_CHANNEL_ERROR"; + + case NPP_STRIDE_ERROR: + return "NPP_STRIDE_ERROR"; + + case NPP_ANCHOR_ERROR: + return "NPP_ANCHOR_ERROR"; + + case NPP_MASK_SIZE_ERROR: + return "NPP_MASK_SIZE_ERROR"; + + case NPP_MOMENT_00_ZERO_ERROR: + return "NPP_MOMENT_00_ZERO_ERROR"; + + case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR: + return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR"; + + case NPP_THRESHOLD_ERROR: + return "NPP_THRESHOLD_ERROR"; + + case NPP_CONTEXT_MATCH_ERROR: + return "NPP_CONTEXT_MATCH_ERROR"; + + case NPP_FFT_FLAG_ERROR: + return "NPP_FFT_FLAG_ERROR"; + + case NPP_FFT_ORDER_ERROR: + return "NPP_FFT_ORDER_ERROR"; + + case NPP_SCALE_RANGE_ERROR: + return "NPP_SCALE_RANGE_ERROR"; + + case NPP_DATA_TYPE_ERROR: + return "NPP_DATA_TYPE_ERROR"; + + case NPP_OUT_OFF_RANGE_ERROR: + return "NPP_OUT_OFF_RANGE_ERROR"; + + case NPP_DIVIDE_BY_ZERO_ERROR: + return "NPP_DIVIDE_BY_ZERO_ERROR"; + + case NPP_RANGE_ERROR: + return "NPP_RANGE_ERROR"; + + case NPP_NO_MEMORY_ERROR: + return "NPP_NO_MEMORY_ERROR"; + + case NPP_ERROR_RESERVED: + return "NPP_ERROR_RESERVED"; + + case NPP_NO_OPERATION_WARNING: + return "NPP_NO_OPERATION_WARNING"; + + case NPP_DIVIDE_BY_ZERO_WARNING: + return "NPP_DIVIDE_BY_ZERO_WARNING"; +#endif + + } + + return ""; +} +#endif + +#ifdef __DRIVER_TYPES_H__ +#ifndef DEVICE_RESET +#define DEVICE_RESET cudaDeviceReset(); +#endif +#else +#ifndef DEVICE_RESET +#define DEVICE_RESET +#endif +#endif + +template< typename T > +void check(T result, char const *const func, const char *const file, int const line) +{ + if (result) + { + fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", + file, line, static_cast(result), _cudaGetErrorEnum(result), func); + DEVICE_RESET + // Make sure we call CUDA Device Reset before exiting + exit(EXIT_FAILURE); + } +} + +#ifdef __DRIVER_TYPES_H__ +// This will output the proper CUDA error strings in the event that a CUDA host call returns an error +#define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ ) + +// This will output the proper error string when calling cudaGetLastError +#define getLastCudaError(msg) __getLastCudaError (msg, __FILE__, __LINE__) + +inline void __getLastCudaError(const char *errorMessage, const char *file, const int line) +{ + cudaError_t err = cudaGetLastError(); + + if (cudaSuccess != err) + { + fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n", + file, line, errorMessage, (int)err, cudaGetErrorString(err)); + DEVICE_RESET + exit(EXIT_FAILURE); + } +} +#endif + +#ifndef MAX +#define MAX(a,b) (a > b ? a : b) +#endif + +// Beginning of GPU Architecture definitions +inline int _ConvertSMVer2Cores(int major, int minor) +{ + // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM + typedef struct + { + int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version + int Cores; + } sSMtoCores; + + sSMtoCores nGpuArchCoresPerSM[] = + { + { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class + { 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class + { 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class + { 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class + { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class + { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class + { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class + { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class + { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class + { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class + { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class + { -1, -1 } + }; + + int index = 0; + + while (nGpuArchCoresPerSM[index].SM != -1) + { + if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) + { + return nGpuArchCoresPerSM[index].Cores; + } + + index++; + } + + // If we don't find the values, we default use the previous one to run properly + printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores); + return nGpuArchCoresPerSM[index-1].Cores; +} +// end of GPU Architecture definitions + +#ifdef __CUDA_RUNTIME_H__ +// General GPU Device CUDA Initialization +inline int gpuDeviceInit(int devID) +{ + int device_count; + checkCudaErrors(cudaGetDeviceCount(&device_count)); + + if (device_count == 0) + { + fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n"); + exit(EXIT_FAILURE); + } + + if (devID < 0) + { + devID = 0; + } + + if (devID > device_count-1) + { + fprintf(stderr, "\n"); + fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count); + fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID); + fprintf(stderr, "\n"); + return -devID; + } + + cudaDeviceProp deviceProp; + checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID)); + + if (deviceProp.computeMode == cudaComputeModeProhibited) + { + fprintf(stderr, "Error: device is running in , no threads can use ::cudaSetDevice().\n"); + return -1; + } + + if (deviceProp.major < 1) + { + fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n"); + exit(EXIT_FAILURE); + } + + checkCudaErrors(cudaSetDevice(devID)); + printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name); + + return devID; +} + +// This function returns the best GPU (with maximum GFLOPS) +inline int gpuGetMaxGflopsDeviceId() +{ + int current_device = 0, sm_per_multiproc = 0; + int max_perf_device = 0; + int device_count = 0, best_SM_arch = 0; + int devices_prohibited = 0; + + unsigned long long max_compute_perf = 0; + cudaDeviceProp deviceProp; + cudaGetDeviceCount(&device_count); + + checkCudaErrors(cudaGetDeviceCount(&device_count)); + + if (device_count == 0) + { + fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n"); + exit(EXIT_FAILURE); + } + + // Find the best major SM Architecture GPU device + while (current_device < device_count) + { + cudaGetDeviceProperties(&deviceProp, current_device); + + // If this GPU is not running on Compute Mode prohibited, then we can add it to the list + if (deviceProp.computeMode != cudaComputeModeProhibited) + { + if (deviceProp.major > 0 && deviceProp.major < 9999) + { + best_SM_arch = MAX(best_SM_arch, deviceProp.major); + } + } + else + { + devices_prohibited++; + } + + current_device++; + } + + if (devices_prohibited == device_count) + { + fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: all devices have compute mode prohibited.\n"); + exit(EXIT_FAILURE); + } + + // Find the best CUDA capable GPU device + current_device = 0; + + while (current_device < device_count) + { + cudaGetDeviceProperties(&deviceProp, current_device); + + // If this GPU is not running on Compute Mode prohibited, then we can add it to the list + if (deviceProp.computeMode != cudaComputeModeProhibited) + { + if (deviceProp.major == 9999 && deviceProp.minor == 9999) + { + sm_per_multiproc = 1; + } + else + { + sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor); + } + + unsigned long long compute_perf = (unsigned long long) deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate; + + if (compute_perf > max_compute_perf) + { + // If we find GPU with SM major > 2, search only these + if (best_SM_arch > 2) + { + // If our device==dest_SM_arch, choose this, or else pass + if (deviceProp.major == best_SM_arch) + { + max_compute_perf = compute_perf; + max_perf_device = current_device; + } + } + else + { + max_compute_perf = compute_perf; + max_perf_device = current_device; + } + } + } + + ++current_device; + } + + return max_perf_device; +} + + +// Initialization code to find the best CUDA Device +inline int findCudaDevice(int argc, const char **argv) +{ + cudaDeviceProp deviceProp; + int devID = 0; + + // If the command-line has a device number specified, use it + if (checkCmdLineFlag(argc, argv, "device")) + { + devID = getCmdLineArgumentInt(argc, argv, "device="); + + if (devID < 0) + { + printf("Invalid command line parameter\n "); + exit(EXIT_FAILURE); + } + else + { + devID = gpuDeviceInit(devID); + + if (devID < 0) + { + printf("exiting...\n"); + exit(EXIT_FAILURE); + } + } + } + else + { + // Otherwise pick the device with highest Gflops/s + devID = gpuGetMaxGflopsDeviceId(); + checkCudaErrors(cudaSetDevice(devID)); + checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID)); + printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor); + } + + return devID; +} + +// General check for CUDA GPU SM Capabilities +inline bool checkCudaCapabilities(int major_version, int minor_version) +{ + cudaDeviceProp deviceProp; + deviceProp.major = 0; + deviceProp.minor = 0; + int dev; + + checkCudaErrors(cudaGetDevice(&dev)); + checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev)); + + if ((deviceProp.major > major_version) || + (deviceProp.major == major_version && deviceProp.minor >= minor_version)) + { + printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor); + return true; + } + else + { + printf(" No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version); + return false; + } +} +#endif + +// end of CUDA Helper Functions + + +#endif diff --git a/helper_string.h b/helper_string.h new file mode 100644 index 0000000..cdf35df --- /dev/null +++ b/helper_string.h @@ -0,0 +1,498 @@ +/** + * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. + * + * Please refer to the NVIDIA end user license agreement (EULA) associated + * with this source code for terms and conditions that govern your use of + * this software. Any use, reproduction, disclosure, or distribution of + * this software and related documentation outside the terms of the EULA + * is strictly prohibited. + * + */ + +// These are helper functions for the SDK samples (string parsing, timers, etc) +#ifndef STRING_HELPER_H +#define STRING_HELPER_H + +#include +#include +#include +#include + +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +#ifndef _CRT_SECURE_NO_DEPRECATE +#define _CRT_SECURE_NO_DEPRECATE +#endif +#ifndef STRCASECMP +#define STRCASECMP _stricmp +#endif +#ifndef STRNCASECMP +#define STRNCASECMP _strnicmp +#endif +#ifndef STRCPY +#define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath) +#endif + +#ifndef FOPEN +#define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode) +#endif +#ifndef FOPEN_FAIL +#define FOPEN_FAIL(result) (result != 0) +#endif +#ifndef SSCANF +#define SSCANF sscanf_s +#endif +#ifndef SPRINTF +#define SPRINTF sprintf_s +#endif +#else // Linux Includes +#include +#include + +#ifndef STRCASECMP +#define STRCASECMP strcasecmp +#endif +#ifndef STRNCASECMP +#define STRNCASECMP strncasecmp +#endif +#ifndef STRCPY +#define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath) +#endif + +#ifndef FOPEN +#define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode)) +#endif +#ifndef FOPEN_FAIL +#define FOPEN_FAIL(result) (result == NULL) +#endif +#ifndef SSCANF +#define SSCANF sscanf +#endif +#ifndef SPRINTF +#define SPRINTF sprintf +#endif +#endif + +#ifndef EXIT_WAIVED +#define EXIT_WAIVED 2 +#endif + +// CUDA Utility Helper Functions +inline int stringRemoveDelimiter(char delimiter, const char *string) +{ + int string_start = 0; + + while (string[string_start] == delimiter) + { + string_start++; + } + + if (string_start >= (int)strlen(string)-1) + { + return 0; + } + + return string_start; +} + +inline int getFileExtension(char *filename, char **extension) +{ + int string_length = (int)strlen(filename); + + while (filename[string_length--] != '.') + { + if (string_length == 0) + break; + } + + if (string_length > 0) string_length += 2; + + if (string_length == 0) + *extension = NULL; + else + *extension = &filename[string_length]; + + return string_length; +} + + +inline bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref) +{ + bool bFound = false; + + if (argc >= 1) + { + for (int i=1; i < argc; i++) + { + int string_start = stringRemoveDelimiter('-', argv[i]); + const char *string_argv = &argv[i][string_start]; + + const char *equal_pos = strchr(string_argv, '='); + int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv); + + int length = (int)strlen(string_ref); + + if (length == argv_length && !STRNCASECMP(string_argv, string_ref, length)) + { + bFound = true; + continue; + } + } + } + + return bFound; +} + +// This function wraps the CUDA Driver API into a template function +template +inline bool getCmdLineArgumentValue(const int argc, const char **argv, const char *string_ref, T *value) +{ + bool bFound = false; + + if (argc >= 1) + { + for (int i=1; i < argc; i++) + { + int string_start = stringRemoveDelimiter('-', argv[i]); + const char *string_argv = &argv[i][string_start]; + int length = (int)strlen(string_ref); + + if (!STRNCASECMP(string_argv, string_ref, length)) + { + if (length+1 <= (int)strlen(string_argv)) + { + int auto_inc = (string_argv[length] == '=') ? 1 : 0; + *value = (T)atoi(&string_argv[length + auto_inc]); + } + + bFound = true; + i=argc; + } + } + } + + return bFound; +} + +inline int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref) +{ + bool bFound = false; + int value = -1; + + if (argc >= 1) + { + for (int i=1; i < argc; i++) + { + int string_start = stringRemoveDelimiter('-', argv[i]); + const char *string_argv = &argv[i][string_start]; + int length = (int)strlen(string_ref); + + if (!STRNCASECMP(string_argv, string_ref, length)) + { + if (length+1 <= (int)strlen(string_argv)) + { + int auto_inc = (string_argv[length] == '=') ? 1 : 0; + value = atoi(&string_argv[length + auto_inc]); + } + else + { + value = 0; + } + + bFound = true; + continue; + } + } + } + + if (bFound) + { + return value; + } + else + { + return 0; + } +} + +inline float getCmdLineArgumentFloat(const int argc, const char **argv, const char *string_ref) +{ + bool bFound = false; + float value = -1; + + if (argc >= 1) + { + for (int i=1; i < argc; i++) + { + int string_start = stringRemoveDelimiter('-', argv[i]); + const char *string_argv = &argv[i][string_start]; + int length = (int)strlen(string_ref); + + if (!STRNCASECMP(string_argv, string_ref, length)) + { + if (length+1 <= (int)strlen(string_argv)) + { + int auto_inc = (string_argv[length] == '=') ? 1 : 0; + value = (float)atof(&string_argv[length + auto_inc]); + } + else + { + value = 0.f; + } + + bFound = true; + continue; + } + } + } + + if (bFound) + { + return value; + } + else + { + return 0; + } +} + +inline bool getCmdLineArgumentString(const int argc, const char **argv, + const char *string_ref, char **string_retval) +{ + bool bFound = false; + + if (argc >= 1) + { + for (int i=1; i < argc; i++) + { + int string_start = stringRemoveDelimiter('-', argv[i]); + char *string_argv = (char *)&argv[i][string_start]; + int length = (int)strlen(string_ref); + + if (!STRNCASECMP(string_argv, string_ref, length)) + { + *string_retval = &string_argv[length+1]; + bFound = true; + continue; + } + } + } + + if (!bFound) + { + *string_retval = NULL; + } + + return bFound; +} + +////////////////////////////////////////////////////////////////////////////// +//! Find the path for a file assuming that +//! files are found in the searchPath. +//! +//! @return the path if succeeded, otherwise 0 +//! @param filename name of the file +//! @param executable_path optional absolute path of the executable +////////////////////////////////////////////////////////////////////////////// +inline char *sdkFindFilePath(const char *filename, const char *executable_path) +{ + // defines a variable that is replaced with the name of the executable + + // Typical relative search paths to locate needed companion files (e.g. sample input data, or JIT source files) + // The origin for the relative search may be the .exe file, a .bat file launching an .exe, a browser .exe launching the .exe or .bat, etc + const char *searchPath[] = + { + "./", // same dir + "./common/", // "/common/" subdir + "./common/data/", // "/common/data/" subdir + "./data/", // "/data/" subdir + "./src/", // "/src/" subdir + "./src//data/", // "/src//data/" subdir + "./inc/", // "/inc/" subdir + "./0_Simple/", // "/0_Simple/" subdir + "./1_Utilities/", // "/1_Utilities/" subdir + "./2_Graphics/", // "/2_Graphics/" subdir + "./3_Imaging/", // "/3_Imaging/" subdir + "./4_Financial/", // "/4_Financial/" subdir + "./5_Simulations/", // "/5_Simulations/" subdir + "./6_Advanced/", // "/6_Advanced/" subdir + "./7_CUDALibraries/", // "/7_CUDALibraries/" subdir + "./8_Android/", // "/8_Android/" subdir + "./samples/", // "/samples/" subdir + + "../", // up 1 in tree + "../common/", // up 1 in tree, "/common/" subdir + "../common/data/", // up 1 in tree, "/common/data/" subdir + "../data/", // up 1 in tree, "/data/" subdir + "../src/", // up 1 in tree, "/src/" subdir + "../inc/", // up 1 in tree, "/inc/" subdir + + "../0_Simple//data/", // up 1 in tree, "/0_Simple//" subdir + "../1_Utilities//data/", // up 1 in tree, "/1_Utilities//" subdir + "../2_Graphics//data/", // up 1 in tree, "/2_Graphics//" subdir + "../3_Imaging//data/", // up 1 in tree, "/3_Imaging//" subdir + "../4_Financial//data/", // up 1 in tree, "/4_Financial//" subdir + "../5_Simulations//data/", // up 1 in tree, "/5_Simulations//" subdir + "../6_Advanced//data/", // up 1 in tree, "/6_Advanced//" subdir + "../7_CUDALibraries//data/",// up 1 in tree, "/7_CUDALibraries//" subdir + "../8_Android//data/", // up 1 in tree, "/8_Android//" subdir + "../samples//data/", // up 1 in tree, "/samples//" subdir + "../../", // up 2 in tree + "../../common/", // up 2 in tree, "/common/" subdir + "../../common/data/", // up 2 in tree, "/common/data/" subdir + "../../data/", // up 2 in tree, "/data/" subdir + "../../src/", // up 2 in tree, "/src/" subdir + "../../inc/", // up 2 in tree, "/inc/" subdir + "../../sandbox//data/", // up 2 in tree, "/sandbox//" subdir + "../../0_Simple//data/", // up 2 in tree, "/0_Simple//" subdir + "../../1_Utilities//data/", // up 2 in tree, "/1_Utilities//" subdir + "../../2_Graphics//data/", // up 2 in tree, "/2_Graphics//" subdir + "../../3_Imaging//data/", // up 2 in tree, "/3_Imaging//" subdir + "../../4_Financial//data/", // up 2 in tree, "/4_Financial//" subdir + "../../5_Simulations//data/", // up 2 in tree, "/5_Simulations//" subdir + "../../6_Advanced//data/", // up 2 in tree, "/6_Advanced//" subdir + "../../7_CUDALibraries//data/", // up 2 in tree, "/7_CUDALibraries//" subdir + "../../8_Android//data/", // up 2 in tree, "/8_Android//" subdir + "../../samples//data/", // up 2 in tree, "/samples//" subdir + "../../../", // up 3 in tree + "../../../src//", // up 3 in tree, "/src//" subdir + "../../../src//data/", // up 3 in tree, "/src//data/" subdir + "../../../src//src/", // up 3 in tree, "/src//src/" subdir + "../../../src//inc/", // up 3 in tree, "/src//inc/" subdir + "../../../sandbox//", // up 3 in tree, "/sandbox//" subdir + "../../../sandbox//data/", // up 3 in tree, "/sandbox//data/" subdir + "../../../sandbox//src/", // up 3 in tree, "/sandbox//src/" subdir + "../../../sandbox//inc/", // up 3 in tree, "/sandbox//inc/" subdir + "../../../0_Simple//data/", // up 3 in tree, "/0_Simple//" subdir + "../../../1_Utilities//data/", // up 3 in tree, "/1_Utilities//" subdir + "../../../2_Graphics//data/", // up 3 in tree, "/2_Graphics//" subdir + "../../../3_Imaging//data/", // up 3 in tree, "/3_Imaging//" subdir + "../../../4_Financial//data/", // up 3 in tree, "/4_Financial//" subdir + "../../../5_Simulations//data/", // up 3 in tree, "/5_Simulations//" subdir + "../../../6_Advanced//data/", // up 3 in tree, "/6_Advanced//" subdir + "../../../7_CUDALibraries//data/", // up 3 in tree, "/7_CUDALibraries//" subdir + "../../../8_Android//data/", // up 3 in tree, "/8_Android//" subdir + "../../../samples//data/", // up 3 in tree, "/samples//" subdir + "../../../common/", // up 3 in tree, "../../../common/" subdir + "../../../common/data/", // up 3 in tree, "../../../common/data/" subdir + "../../../data/", // up 3 in tree, "../../../data/" subdir + "../../../../", // up 4 in tree + "../../../../src//", // up 4 in tree, "/src//" subdir + "../../../../src//data/", // up 4 in tree, "/src//data/" subdir + "../../../../src//src/", // up 4 in tree, "/src//src/" subdir + "../../../../src//inc/", // up 4 in tree, "/src//inc/" subdir + "../../../../sandbox//", // up 4 in tree, "/sandbox//" subdir + "../../../../sandbox//data/", // up 4 in tree, "/sandbox//data/" subdir + "../../../../sandbox//src/", // up 4 in tree, "/sandbox//src/" subdir + "../../../../sandbox//inc/", // up 4 in tree, "/sandbox//inc/" subdir + "../../../../0_Simple//data/", // up 4 in tree, "/0_Simple//" subdir + "../../../../1_Utilities//data/", // up 4 in tree, "/1_Utilities//" subdir + "../../../../2_Graphics//data/", // up 4 in tree, "/2_Graphics//" subdir + "../../../../3_Imaging//data/", // up 4 in tree, "/3_Imaging//" subdir + "../../../../4_Financial//data/", // up 4 in tree, "/4_Financial//" subdir + "../../../../5_Simulations//data/",// up 4 in tree, "/5_Simulations//" subdir + "../../../../6_Advanced//data/", // up 4 in tree, "/6_Advanced//" subdir + "../../../../7_CUDALibraries//data/", // up 4 in tree, "/7_CUDALibraries//" subdir + "../../../../8_Android//data/", // up 4 in tree, "/8_Android//" subdir + "../../../../samples//data/", // up 4 in tree, "/samples//" subdir + "../../../../common/", // up 4 in tree, "../../../common/" subdir + "../../../../common/data/", // up 4 in tree, "../../../common/data/" subdir + "../../../../data/", // up 4 in tree, "../../../data/" subdir + "../../../../../", // up 5 in tree + "../../../../../src//", // up 5 in tree, "/src//" subdir + "../../../../../src//data/", // up 5 in tree, "/src//data/" subdir + "../../../../../src//src/", // up 5 in tree, "/src//src/" subdir + "../../../../../src//inc/", // up 5 in tree, "/src//inc/" subdir + "../../../../../sandbox//", // up 5 in tree, "/sandbox//" subdir + "../../../../../sandbox//data/", // up 5 in tree, "/sandbox//data/" subdir + "../../../../../sandbox//src/", // up 5 in tree, "/sandbox//src/" subdir + "../../../../../sandbox//inc/", // up 5 in tree, "/sandbox//inc/" subdir + "../../../../../0_Simple//data/", // up 5 in tree, "/0_Simple//" subdir + "../../../../../1_Utilities//data/", // up 5 in tree, "/1_Utilities//" subdir + "../../../../../2_Graphics//data/", // up 5 in tree, "/2_Graphics//" subdir + "../../../../../3_Imaging//data/", // up 5 in tree, "/3_Imaging//" subdir + "../../../../../4_Financial//data/", // up 5 in tree, "/4_Financial//" subdir + "../../../../../5_Simulations//data/",// up 5 in tree, "/5_Simulations//" subdir + "../../../../../6_Advanced//data/", // up 5 in tree, "/6_Advanced//" subdir + "../../../../../7_CUDALibraries//data/", // up 5 in tree, "/7_CUDALibraries//" subdir + "../../../../../8_Android//data/", // up 5 in tree, "/8_Android//" subdir + "../../../../../samples//data/", // up 5 in tree, "/samples//" subdir + "../../../../../common/", // up 5 in tree, "../../../common/" subdir + "../../../../../common/data/", // up 5 in tree, "../../../common/data/" subdir + }; + + // Extract the executable name + std::string executable_name; + + if (executable_path != 0) + { + executable_name = std::string(executable_path); + +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + // Windows path delimiter + size_t delimiter_pos = executable_name.find_last_of('\\'); + executable_name.erase(0, delimiter_pos + 1); + + if (executable_name.rfind(".exe") != std::string::npos) + { + // we strip .exe, only if the .exe is found + executable_name.resize(executable_name.size() - 4); + } + +#else + // Linux & OSX path delimiter + size_t delimiter_pos = executable_name.find_last_of('/'); + executable_name.erase(0,delimiter_pos+1); +#endif + } + + // Loop over all search paths and return the first hit + for (unsigned int i = 0; i < sizeof(searchPath)/sizeof(char *); ++i) + { + std::string path(searchPath[i]); + size_t executable_name_pos = path.find(""); + + // If there is executable_name variable in the searchPath + // replace it with the value + if (executable_name_pos != std::string::npos) + { + if (executable_path != 0) + { + path.replace(executable_name_pos, strlen(""), executable_name); + } + else + { + // Skip this path entry if no executable argument is given + continue; + } + } + +#ifdef _DEBUG + printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str()); +#endif + + // Test if the file exists + path.append(filename); + FILE *fp; + FOPEN(fp, path.c_str(), "rb"); + + if (fp != NULL) + { + fclose(fp); + // File found + // returning an allocated array here for backwards compatibility reasons + char *file_path = (char *) malloc(path.length() + 1); + STRCPY(file_path, path.length() + 1, path.c_str()); + return file_path; + } + + if (fp) + { + fclose(fp); + } + } + + // File not found + return 0; +} + +#endif diff --git a/imageinfo.h b/imageinfo.h new file mode 100644 index 0000000..b84a815 --- /dev/null +++ b/imageinfo.h @@ -0,0 +1,15 @@ +#pragma once +#include "managed.h" + +class __align__(128) ImageInfo : public Managed{ +public: + // Image size + int cols; + int rows; + + // Total number of pixels + int np; + + // Total number of bytes (may be different when padded) + int nb; +}; diff --git a/linestate.h b/linestate.h new file mode 100644 index 0000000..3d0c8f1 --- /dev/null +++ b/linestate.h @@ -0,0 +1,23 @@ +#pragma once +#include // memset() +#include "algorithmparameters.h" +#include "cameraparameters.h" +#include "managed.h" +#include // float4 + +class __align__(128) LineState : public Managed { +public: + char *used_pixels; // disparity*/*/ + int n; + int s; // stride + int l; // length + void resize(int n) + { + cudaMallocManaged (&used_pixels, sizeof(char) * n); + memset (used_pixels, 0, sizeof(char) * n); + } + ~LineState() + { + cudaFree (used_pixels); + } +}; diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..3370ef7 --- /dev/null +++ b/main.cpp @@ -0,0 +1,879 @@ +#ifdef _WIN32 +#include +#include +#include +#endif + +#include +#include +#include +#include + +#include +#include + + +// Includes CUDA +#include +#include +#include +#include +#include +#include + +// CUDA helper functions +#include "helper_cuda.h" // helper functions for CUDA error check + +#include // multimap + +#include // mkdir +#include // mkdir + + +//#include "camera.h" +#include "algorithmparameters.h" +#include "globalstate.h" +#include "fusibile.h" + +#include "main.h" +#include "fileIoUtils.h" +#include "cameraGeometryUtils.h" +#include "mathUtils.h" +#include "displayUtils.h" +#include "point_cloud_list.h" + +#define MAX_NR_POINTS 500000 + +struct InputData{ + string path; + //int id; + string id; + int camId; + Camera cam; + Mat_ depthMap; + Mat_ inputImage; + Mat_ normals; +}; + +int getCameraFromId(string id, vector &cameras){ + for(int i =0; i< cameras.size(); i++) { + //cout << "Checking camera id " << i << " cameraid " << cameras[i].id << endl; + if(cameras[i].id.compare(id) == 0) + return i; + } + return -1; +} +static void get_subfolders( + const char *dirname, + vector &subfolders) +{ + DIR *dir; + struct dirent *ent; + + // Open directory stream + dir = opendir (dirname); + if (dir != NULL) { + //cout << "Dirname is " << dirname << endl; + //cout << "Dirname type is " << ent->d_type << endl; + //cout << "Dirname type DT_DIR " << DT_DIR << endl; + + // Print all files and directories within the directory + while ((ent = readdir (dir)) != NULL) { + //cout << "INSIDE" << endl; + //if(ent->d_type == DT_DIR) + { + char* name = ent->d_name; + if(strcmp(name,".") == 0 || strcmp(ent->d_name,"..") == 0) + continue; + //printf ("dir %s/\n", name); + subfolders.push_back(string(name)); + } + } + + closedir (dir); + + } else { + // Could not open directory + printf ("Cannot open directory %s\n", dirname); + exit (EXIT_FAILURE); + } +} + +static void print_help () +{ + printf ( "\nfusibile\n" ); +} + +/* process command line arguments + * Input: argc, argv - command line arguments + * Output: inputFiles, outputFiles, parameters, gt_parameters, no_display - algorithm parameters + */ +static int getParametersFromCommandLine ( int argc, + char** argv, + InputFiles &inputFiles, + OutputFiles &outputFiles, + AlgorithmParameters ¶meters, + GTcheckParameters >_parameters, + bool &no_display ) +{ + const char* algorithm_opt = "--algorithm="; + const char* maxdisp_opt = "--max-disparity="; + const char* blocksize_opt = "--blocksize="; + const char* cost_tau_color_opt = "--cost_tau_color="; + const char* cost_tau_gradient_opt = "--cost_tau_gradient="; + const char* cost_alpha_opt = "--cost_alpha="; + const char* cost_gamma_opt = "--cost_gamma="; + const char* disparity_tolerance_opt = "--disp_tol="; + const char* normal_tolerance_opt = "--norm_tol="; + const char* border_value = "--border_value="; //either constant scalar or -1 = REPLICATE + const char* gtDepth_divFactor_opt = "--gtDepth_divisionFactor="; + const char* gtDepth_tolerance_opt = "--gtDepth_tolerance="; + const char* gtDepth_tolerance2_opt = "--gtDepth_tolerance2="; + const char* nodisplay_opt = "-no_display"; + const char* colorProc_opt = "-color_processing"; + const char* num_iterations_opt = "--iterations="; + const char* self_similariy_n_opt = "--ss_n="; + const char* ct_epsilon_opt = "--ct_eps="; + const char* cam_scale_opt = "--cam_scale="; + const char* num_img_processed_opt = "--num_img_processed="; + const char* n_best_opt = "--n_best="; + const char* cost_comb_opt = "--cost_comb="; + const char* cost_good_factor_opt = "--good_factor="; + const char* depth_min_opt = "--depth_min="; + const char* depth_max_opt = "--depth_max="; + // const char* scale_opt = "--scale="; + const char* outputPath_opt = "-output_folder"; + const char* calib_opt = "-calib_file"; + const char* gt_opt = "-gt"; + const char* gt_nocc_opt = "-gt_nocc"; + const char* occl_mask_opt = "-occl_mask"; + const char* gt_normal_opt = "-gt_normal"; + const char* images_input_folder_opt = "-images_folder"; + const char* p_input_folder_opt = "-p_folder"; + const char* krt_file_opt = "-krt_file"; + const char* camera_input_folder_opt = "-camera_folder"; + const char* bounding_folder_opt = "-bounding_folder"; + const char* viewSelection_opt = "-view_selection"; + const char* initial_seed_opt = "--initial_seed"; + + const char* disp_thresh_opt = "--disp_thresh="; + const char* normal_thresh_opt = "--normal_thresh="; + const char* num_consistent_opt = "--num_consistent="; + + //read in arguments + for ( int i = 1; i < argc; i++ ) + { + if ( argv[i][0] != '-' ) + { + inputFiles.img_filenames.push_back ( argv[i] ); + /*if( inputFiles.imgLeft_filename.empty() ) + inputFiles.imgLeft_filename = argv[i]; + else if( inputFiles.imgRight_filename.empty() ) + inputFiles.imgRight_filename = argv[i]; + */ + } + else if ( strncmp ( argv[i], algorithm_opt, strlen ( algorithm_opt ) ) == 0 ) + { + char* _alg = argv[i] + strlen ( algorithm_opt ); + parameters.algorithm = strcmp ( _alg, "pm" ) == 0 ? PM_COST : + strcmp ( _alg, "ct" ) == 0 ? CENSUS_TRANSFORM : + strcmp ( _alg, "sct" ) == 0 ? SPARSE_CENSUS : + strcmp ( _alg, "ct_ss" ) == 0 ? CENSUS_SELFSIMILARITY : + strcmp ( _alg, "adct" ) == 0 ? ADCENSUS : + strcmp ( _alg, "adct_ss" ) == 0 ? ADCENSUS_SELFSIMILARITY : + strcmp ( _alg, "pm_ss" ) == 0 ? PM_SELFSIMILARITY : -1; + if ( parameters.algorithm < 0 ) + { + printf ( "Command-line parameter error: Unknown stereo algorithm\n\n" ); + print_help (); + return -1; + } + } + else if ( strncmp ( argv[i], cost_comb_opt, strlen ( cost_comb_opt ) ) == 0 ) + { + char* _alg = argv[i] + strlen ( algorithm_opt ); + parameters.cost_comb = strcmp ( _alg, "all" ) == 0 ? COMB_ALL : + strcmp ( _alg, "best_n" ) == 0 ? COMB_BEST_N : + strcmp ( _alg, "angle" ) == 0 ? COMB_ANGLE : + strcmp ( _alg, "good" ) == 0 ? COMB_GOOD : -1; + if ( parameters.cost_comb < 0 ) + { + printf ( "Command-line parameter error: Unknown cost combination method\n\n" ); + print_help (); + return -1; + } + } + else if ( strncmp ( argv[i], maxdisp_opt, strlen ( maxdisp_opt ) ) == 0 ) + { + if ( sscanf ( argv[i] + strlen ( maxdisp_opt ), "%f", ¶meters.max_disparity ) != 1 || + parameters.max_disparity < 1 ) + { + printf ( "Command-line parameter error: The max disparity (--maxdisparity=<...>) must be a positive integer \n" ); + print_help (); + return -1; + } + } + else if ( strncmp ( argv[i], blocksize_opt, strlen ( blocksize_opt ) ) == 0 ) + { + int k_size; + if ( sscanf ( argv[i] + strlen ( blocksize_opt ), "%d", &k_size ) != 1 || + k_size < 1 || k_size % 2 != 1 ) + { + printf ( "Command-line parameter error: The block size (--blocksize=<...>) must be a positive odd number\n" ); + return -1; + } + parameters.box_hsize = k_size; + parameters.box_vsize = k_size; + } + else if ( strncmp ( argv[i], cost_good_factor_opt, strlen ( cost_good_factor_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( cost_good_factor_opt ), "%f", ¶meters.good_factor ); + } + else if ( strncmp ( argv[i], cost_tau_color_opt, strlen ( cost_tau_color_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( cost_tau_color_opt ), "%f", ¶meters.tau_color ); + } + else if ( strncmp ( argv[i], cost_tau_gradient_opt, strlen ( cost_tau_gradient_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( cost_tau_gradient_opt ), "%f", ¶meters.tau_gradient ); + } + else if ( strncmp ( argv[i], cost_alpha_opt, strlen ( cost_alpha_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( cost_alpha_opt ), "%f", ¶meters.alpha ); + } + else if ( strncmp ( argv[i], cost_gamma_opt, strlen ( cost_gamma_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( cost_gamma_opt ), "%f", ¶meters.gamma ); + } + else if ( strncmp ( argv[i], border_value, strlen ( border_value ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( border_value ), "%d", ¶meters.border_value ); + } + else if ( strncmp ( argv[i], num_iterations_opt, strlen ( num_iterations_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( num_iterations_opt ), "%d", ¶meters.iterations ); + } + else if ( strncmp ( argv[i], disparity_tolerance_opt, strlen ( disparity_tolerance_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( disparity_tolerance_opt ), "%f", ¶meters.dispTol ); + } + else if ( strncmp ( argv[i], normal_tolerance_opt, strlen ( normal_tolerance_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( normal_tolerance_opt ), "%f", ¶meters.normTol ); + } + else if ( strncmp ( argv[i], self_similariy_n_opt, strlen ( self_similariy_n_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( self_similariy_n_opt ), "%d", ¶meters.self_similarity_n ); + } + else if ( strncmp ( argv[i], ct_epsilon_opt, strlen ( ct_epsilon_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( ct_epsilon_opt ), "%f", ¶meters.census_epsilon ); + } + else if ( strncmp ( argv[i], cam_scale_opt, strlen ( cam_scale_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( cam_scale_opt ), "%f", ¶meters.cam_scale ); + } + else if ( strncmp ( argv[i], num_img_processed_opt, strlen ( num_img_processed_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( num_img_processed_opt ), "%d", ¶meters.num_img_processed ); + } + else if ( strncmp ( argv[i], n_best_opt, strlen ( n_best_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( n_best_opt ), "%d", ¶meters.n_best ); + } + else if ( strncmp ( argv[i], gtDepth_divFactor_opt, strlen ( gtDepth_divFactor_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( gtDepth_divFactor_opt ), "%f", >_parameters.divFactor ); + } + else if ( strncmp ( argv[i], gtDepth_tolerance_opt, strlen ( gtDepth_tolerance_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( gtDepth_tolerance_opt ), "%f", >_parameters.dispTolGT ); + } + else if ( strncmp ( argv[i], gtDepth_tolerance2_opt, strlen ( gtDepth_tolerance2_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( gtDepth_tolerance2_opt ), "%f", >_parameters.dispTolGT2 ); + } + else if ( strncmp ( argv[i], depth_min_opt, strlen ( depth_min_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( depth_min_opt ), "%f", ¶meters.depthMin ); + } + else if ( strncmp ( argv[i], depth_max_opt, strlen ( depth_max_opt ) ) == 0 ) + { + sscanf ( argv[i] + strlen ( depth_max_opt ), "%f", ¶meters.depthMax ); + } + else if ( strcmp ( argv[i], viewSelection_opt ) == 0 ) + parameters.viewSelection = true; + else if ( strcmp ( argv[i], nodisplay_opt ) == 0 ) + no_display = true; + else if ( strcmp ( argv[i], colorProc_opt ) == 0 ) + parameters.color_processing = true; + else if ( strcmp ( argv[i], "-o" ) == 0 ) + outputFiles.disparity_filename = argv[++i]; + else if ( strcmp ( argv[i], outputPath_opt ) == 0 ) + outputFiles.parentFolder = argv[++i]; + else if ( strcmp ( argv[i], calib_opt ) == 0 ) + inputFiles.calib_filename = argv[++i]; + else if ( strcmp ( argv[i], gt_opt ) == 0 ) + inputFiles.gt_filename = argv[++i]; + else if ( strcmp ( argv[i], gt_nocc_opt ) == 0 ) + inputFiles.gt_nocc_filename = argv[++i]; + else if ( strcmp ( argv[i], occl_mask_opt ) == 0 ) + inputFiles.occ_filename = argv[++i]; + else if ( strcmp ( argv[i], gt_normal_opt ) == 0 ) + inputFiles.gt_normal_filename = argv[++i]; + else if ( strcmp ( argv[i], images_input_folder_opt ) == 0 ) + inputFiles.images_folder = argv[++i]; + else if ( strcmp ( argv[i], p_input_folder_opt ) == 0 ) + inputFiles.p_folder = argv[++i]; + else if ( strcmp ( argv[i], krt_file_opt ) == 0 ) + inputFiles.krt_file = argv[++i]; + else if ( strcmp ( argv[i], camera_input_folder_opt ) == 0 ) + inputFiles.camera_folder = argv[++i]; + else if ( strcmp ( argv[i], initial_seed_opt ) == 0 ) + inputFiles.seed_file = argv[++i]; + else if ( strcmp ( argv[i], bounding_folder_opt ) == 0 ) + inputFiles.bounding_folder = argv[++i]; + else if ( strncmp ( argv[i], disp_thresh_opt, strlen (disp_thresh_opt) ) == 0 ) + sscanf ( argv[i] + strlen (disp_thresh_opt), "%f", ¶meters.depthThresh ); + else if ( strncmp ( argv[i], normal_thresh_opt, strlen (normal_thresh_opt) ) == 0 ) { + float angle_degree; + sscanf ( argv[i] + strlen (normal_thresh_opt), "%f", &angle_degree ); + parameters.normalThresh = angle_degree * M_PI / 180.0f; + } + else if ( strncmp ( argv[i], num_consistent_opt, strlen (num_consistent_opt) ) == 0 ) + sscanf ( argv[i] + strlen (num_consistent_opt), "%d", ¶meters.numConsistentThresh ); + else + { + printf ( "Command-line parameter error: unknown option %s\n", argv[i] ); + //return -1; + } + } + //cout << "KRt file is " << inputFiles.krt_file << endl; + + return 0; +} + +static void selectViews ( CameraParameters &cameraParams, int imgWidth, int imgHeight, bool viewSel ) { + vector cameras = cameraParams.cameras; + Camera ref = cameras[cameraParams.idRef]; + + int x = imgWidth / 2; + int y = imgHeight / 2; + + cameraParams.viewSelectionSubset.clear (); + + Vec3f viewVectorRef = getViewVector ( ref, x, y, cameraParams.rectified ); + + // TODO hardcoded value makes it a parameter + float minimum_angle_degree = 10; + float maximum_angle_degree = 30; + float minimum_angle_radians = minimum_angle_degree * M_PI / 180.0f; + float maximum_angle_radians = maximum_angle_degree * M_PI / 180.0f; + printf("Accepted intersection angle of central rays is %f to %f degrees\n", minimum_angle_degree, maximum_angle_degree); + for ( size_t i = 0; i < cameras.size (); i++ ) { + //if ( i == cameraParams.idRef && !cameraParams.rectified ) + // continue; + + if ( !viewSel ) { //select all views, dont perform selection + cameraParams.viewSelectionSubset.push_back ( i ); + continue; + } + + Vec3f vec = getViewVector ( cameras[i], x, y, cameraParams.rectified ); + + float angle = getAngle ( viewVectorRef, vec ); + if ( angle > minimum_angle_radians && angle < maximum_angle_radians ) //0.6 select if angle between 5.7 and 34.8 (0.6) degrees (10 and 30 degrees suggested by some paper) + { + cameraParams.viewSelectionSubset.push_back ( i ); + printf("Accepting camera %ld with angle\t %f degree (%f radians)\n", i, angle*180.0f/M_PI, angle); + } + else + printf("Discarding camera %ld with angle\t %f degree (%f radians)\n", i, angle*180.0f/M_PI, angle); + } + +} + +static void addImageToTextureUint (vector > &imgs, cudaTextureObject_t texs[]) +{ + for (unsigned int i=0; i(); + // Allocate array with correct size and number of channels + cudaArray *cuArray; + checkCudaErrors(cudaMallocArray(&cuArray, + &channelDesc, + cols, + rows)); + + checkCudaErrors (cudaMemcpy2DToArray (cuArray, + 0, + 0, + imgs[i].ptr(), + imgs[i].step[0], + cols*sizeof(uint8_t), + rows, + cudaMemcpyHostToDevice)); + + // Specify texture + struct cudaResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = cudaResourceTypeArray; + resDesc.res.array.array = cuArray; + + // Specify texture object parameters + struct cudaTextureDesc texDesc; + memset(&texDesc, 0, sizeof(texDesc)); + texDesc.addressMode[0] = cudaAddressModeWrap; + texDesc.addressMode[1] = cudaAddressModeWrap; + texDesc.filterMode = cudaFilterModePoint; + texDesc.readMode = cudaReadModeElementType; + texDesc.normalizedCoords = 0; + + // Create texture object + cudaTextureObject_t &texObj = texs[i]; + checkCudaErrors(cudaCreateTextureObject(&(texs[i]), &resDesc, &texDesc, NULL)); + //texs[i] = texObj; + } + return; +} +static void addImageToTextureFloatColor (vector &imgs, cudaTextureObject_t texs[]) +{ + for (int i=0; i(); + + // Allocate array with correct size and number of channels + cudaArray *cuArray; + checkCudaErrors(cudaMallocArray(&cuArray, + &channelDesc, + cols, + rows)); + + checkCudaErrors (cudaMemcpy2DToArray (cuArray, + 0, + 0, + imgs[i].ptr(), + imgs[i].step[0], + cols*sizeof(float)*4, + rows, + cudaMemcpyHostToDevice)); + + // Specify texture + struct cudaResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = cudaResourceTypeArray; + resDesc.res.array.array = cuArray; + + // Specify texture object parameters + struct cudaTextureDesc texDesc; + memset(&texDesc, 0, sizeof(texDesc)); + texDesc.addressMode[0] = cudaAddressModeWrap; + texDesc.addressMode[1] = cudaAddressModeWrap; + texDesc.filterMode = cudaFilterModeLinear; + texDesc.readMode = cudaReadModeElementType; + texDesc.normalizedCoords = 0; + + // Create texture object + cudaTextureObject_t &texObj = texs[i]; + checkCudaErrors(cudaCreateTextureObject(&(texs[i]), &resDesc, &texDesc, NULL)); + } + return; +} + +static void addImageToTextureFloatGray (vector &imgs, cudaTextureObject_t texs[]) +{ + for (int i=0; i(), + imgs[i].step[0], + cols*sizeof(float), + rows, + cudaMemcpyHostToDevice)); + + // Specify texture + struct cudaResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = cudaResourceTypeArray; + resDesc.res.array.array = cuArray; + + // Specify texture object parameters + struct cudaTextureDesc texDesc; + memset(&texDesc, 0, sizeof(texDesc)); + texDesc.addressMode[0] = cudaAddressModeWrap; + texDesc.addressMode[1] = cudaAddressModeWrap; + texDesc.filterMode = cudaFilterModeLinear; + texDesc.readMode = cudaReadModeElementType; + texDesc.normalizedCoords = 0; + + // Create texture object + cudaTextureObject_t &texObj = texs[i]; + checkCudaErrors(cudaCreateTextureObject(&(texs[i]), &resDesc, &texDesc, NULL)); + //texs[i] = texObj; + } + return; +} +static int runFusibile (int argc, + char **argv, + OutputFiles &outputFiles, + AlgorithmParameters &algParameters, + bool no_display, + Results &results + ) +{ + InputFiles inputFiles; + string ext = ".png"; + + string results_folder = "results/"; + + const char* results_folder_opt = "-input_folder"; + const char* p_input_folder_opt = "-p_folder"; + const char* krt_file_opt = "-krt_file"; + const char* images_input_folder_opt = "-images_folder"; + const char* gt_opt = "-gt"; + const char* gt_nocc_opt = "-gt_nocc"; + const char* pmvs_folder_opt = "--pmvs_folder"; + //read in arguments + for ( int i = 1; i < argc; i++ ) + { + if ( strcmp ( argv[i], results_folder_opt ) == 0 ){ + results_folder = argv[++i]; + cout << "input folder is " << results_folder << endl; + + }else if ( strcmp ( argv[i], p_input_folder_opt ) == 0 ){ + inputFiles.p_folder = argv[++i]; + } + else if ( strcmp ( argv[i], krt_file_opt ) == 0 ) + inputFiles.krt_file = argv[++i]; + else if ( strcmp ( argv[i], images_input_folder_opt ) == 0 ){ + inputFiles.images_folder = argv[++i]; + + }else if ( strcmp ( argv[i], gt_opt ) == 0 ){ + inputFiles.gt_filename = argv[++i]; + + }else if ( strcmp ( argv[i], gt_nocc_opt ) == 0 ){ + inputFiles.gt_nocc_filename = argv[++i]; + } + else if ( strncmp ( argv[i], pmvs_folder_opt, strlen ( pmvs_folder_opt ) ) == 0 ) { + inputFiles.pmvs_folder = argv[++i]; + } + } + + if (inputFiles.pmvs_folder.size()>0) { + inputFiles.images_folder = inputFiles.pmvs_folder + "/visualize/"; + inputFiles.p_folder = inputFiles.pmvs_folder + "/txt/"; + } + cout <<"image folder is " << inputFiles.images_folder << endl; + cout <<"p folder is " << inputFiles.images_folder << endl; + cout <<"pmvs folder is " << inputFiles.pmvs_folder << endl; + + GTcheckParameters gtParameters; + + gtParameters.dispTolGT = 0.1f; + gtParameters.dispTolGT2 = 0.02f; + gtParameters.divFactor = 1.0f; + // create folder to store result images + time_t timeObj; + time ( &timeObj ); + tm *pTime = localtime ( &timeObj ); + + vector > view_vectors; + + + time(&timeObj); + pTime = localtime(&timeObj); + + char output_folder[256]; + sprintf(output_folder, "%s/consistencyCheck-%04d%02d%02d-%02d%02d%02d/",results_folder.c_str(), pTime->tm_year+1900, pTime->tm_mon+1,pTime->tm_mday,pTime->tm_hour, pTime->tm_min, pTime->tm_sec); +#if defined(_WIN32) + _mkdir(output_folder); +#else + mkdir(output_folder, 0777); +#endif + + vector subfolders; + get_subfolders(results_folder.c_str(), subfolders); + std::sort(subfolders.begin(), subfolders.end()); + + vector< Mat_ > warpedImages; + vector< Mat_ > warpedImages_inverse; + //vector< Mat_ > depthMaps; + vector< Mat_ > updateMaps; + vector< Mat_ > updateNormals; + vector< Mat_ > depthMapConsistent; + vector< Mat_ > normalsConsistent; + vector< Mat_ > groundTruthNormals; + vector< Mat_ > valid; + + + map< int,string> consideredIds; + for(int i=0;i(i,id_string)); + //cout << "id_string is " << id_string << endl; + //cout << "i is " << i << endl; + //char outputPath[256]; + //sprintf(outputPath, "%s.png", id_string); + + if( access( (id_string + ".png").c_str(), R_OK ) != -1 ) + inputFiles.img_filenames.push_back((id_string + ".png")); + else if( access( (id_string + ".jpg").c_str(), R_OK ) != -1 ) + inputFiles.img_filenames.push_back((id_string + ".jpg")); + else if( access( (id_string + ".ppm").c_str(), R_OK ) != -1 ) + inputFiles.img_filenames.push_back((id_string + ".ppm")); + } + size_t numImages = inputFiles.img_filenames.size (); + cout << "numImages is " << numImages << endl; + cout << "img_filenames is " << inputFiles.img_filenames.size() << endl; + algParameters.num_img_processed = min ( ( int ) numImages, algParameters.num_img_processed ); + + vector > img_color; // imgLeft_color, imgRight_color; + vector > img_grayscale; + for ( size_t i = 0; i < numImages; i++ ) { + //printf ( "Opening image %ld: %s\n", i, ( inputFiles.images_folder + inputFiles.img_filenames[i] ).c_str () ); + img_grayscale.push_back ( imread ( ( " ", inputFiles.images_folder + inputFiles.img_filenames[i] ), IMREAD_GRAYSCALE ) ); + if ( algParameters.color_processing ) { + img_color.push_back ( imread ( ( " ", inputFiles.images_folder + inputFiles.img_filenames[i] ), IMREAD_COLOR ) ); + } + + if ( img_grayscale[i].rows == 0 ) { + printf ( "Image seems to be invalid\n" ); + return -1; + } + } + + size_t avail; + size_t total; + cudaMemGetInfo( &avail, &total ); + size_t used = total - avail; + printf("Device memory used: %fMB\n", used/1000000.0f); + + GlobalState *gs = new GlobalState; + gs->cameras = new CameraParameters_cu; + gs->pc = new PointCloud; + cudaMemGetInfo( &avail, &total ); + used = total - avail; + printf("Device memory used: %fMB\n", used/1000000.0f); + + + uint32_t numPixels = ( uint32_t ) img_grayscale[0].rows * ( uint32_t ) img_grayscale[0].cols; + + uint32_t rows = img_grayscale[0].rows; + uint32_t cols = img_grayscale[0].cols; + + CameraParameters camParams = getCameraParameters (*(gs->cameras), + inputFiles,algParameters.depthMin, + algParameters.depthMax, + algParameters.cam_scale, + false); + printf("Camera size is %lu\n", camParams.cameras.size()); + + for ( int i = 0; i < algParameters.num_img_processed; i++ ) { + algParameters.min_disparity = disparityDepthConversion ( camParams.f, camParams.cameras[i].baseline, camParams.cameras[i].depthMax ); + algParameters.max_disparity = disparityDepthConversion ( camParams.f, camParams.cameras[i].baseline, camParams.cameras[i].depthMin ); + } + + selectViews ( camParams, cols, rows, false); + int numSelViews = camParams.viewSelectionSubset.size (); + cout << "Selected views: " << numSelViews << endl; + gs->cameras->viewSelectionSubsetNumber = numSelViews; + ofstream myfile; + for ( int i = 0; i < numSelViews; i++ ) { + cout << camParams.viewSelectionSubset[i] << ", "; + gs->cameras->viewSelectionSubset[i] = camParams.viewSelectionSubset[i]; + } + cout << endl; + + vector inputData; + + cout << "Reading normals and depth from disk" << endl; + cout << "Size consideredIds is " << consideredIds.size() << endl; + for (map::iterator it=consideredIds.begin(); it!=consideredIds.end(); ++it){ + + //get corresponding camera + int i = it->first; + string id = it->second;//consideredIds[i]; + //int id = atoi(id_string.c_str()); + int camIdx = getCameraFromId(id,camParams.cameras); + //cout << "id is " << id << endl; + //cout << "camIdx is " << camIdx << endl; + if(camIdx < 0)// || camIdx == camParams.idRef) + continue; + + InputData dat; + dat.id = id; + dat.camId = camIdx; + dat.cam = camParams.cameras[camIdx]; + dat.path = results_folder + subfolders[i]; + dat.inputImage = imread((" ",inputFiles.images_folder + id + ext), IMREAD_COLOR); + + //read normal + cout << "Reading normal " << i << endl; + readDmbNormal((dat.path + "/normals.dmb").c_str(),dat.normals); + + //read depth + cout << "Reading disp " << i << endl; + readDmb((dat.path + "/disp.dmb").c_str(),dat.depthMap); + + //inputData.push_back(move(dat)); + inputData.push_back(dat); + + } + // run gpu run + // Init parameters + gs->params = &algParameters; + + + // Init ImageInfo + //gs->iminfo.cols = img_grayscale[0].cols; + //gs->iminfo.rows = img_grayscale[0].rows; + gs->cameras->cols = img_grayscale[0].cols; + gs->cameras->rows = img_grayscale[0].rows; + gs->params->cols = img_grayscale[0].cols; + gs->params->rows = img_grayscale[0].rows; + gs->resize (img_grayscale.size()); + gs->pc->resize (img_grayscale[0].rows * img_grayscale[0].cols); + PointCloudList pc_list; + pc_list.resize (img_grayscale[0].rows * img_grayscale[0].cols); + pc_list.size=0; + pc_list.rows = img_grayscale[0].rows; + pc_list.cols = img_grayscale[0].cols; + gs->pc->rows = img_grayscale[0].rows; + gs->pc->cols = img_grayscale[0].cols; + + // Resize lines + for (int i = 0; ilines[i].resize(img_grayscale[0].rows * img_grayscale[0].cols); + gs->lines[i].n = img_grayscale[0].rows * img_grayscale[0].cols; + //gs->lines.s = img_grayscale[0].step[0]; + gs->lines[i].s = img_grayscale[0].cols; + gs->lines[i].l = img_grayscale[0].cols; + } + + vector img_grayscale_float (img_grayscale.size()); + vector img_color_float (img_grayscale.size()); + vector img_color_float_alpha (img_grayscale.size()); + vector normals_and_depth (img_grayscale.size()); + vector > img_grayscale_uint (img_grayscale.size()); + for (int i = 0; i > rgbChannels ( 3 ); + img_color_float_alpha[i] = Mat::zeros ( img_grayscale[0].rows, img_grayscale[0].cols, CV_32FC4 ); + img_color[i].convertTo (img_color_float[i], CV_32FC3); // or CV_32F works (too) + Mat alpha( img_grayscale[0].rows, img_grayscale[0].cols, CV_32FC1 ); + split (img_color_float[i], rgbChannels); + rgbChannels.push_back( alpha); + merge (rgbChannels, img_color_float_alpha[i]); + } + /* Create vector of normals and disparities */ + vector > normal ( 3 ); + normals_and_depth[i] = Mat::zeros ( img_grayscale[0].rows, img_grayscale[0].cols, CV_32FC4 ); + split (inputData[i].normals, normal); + normal.push_back( inputData[i].depthMap); + merge (normal, normals_and_depth[i]); + + } + int64_t t = getTickCount () - t; + + // Copy images to texture memory + if (algParameters.saveTexture) + if (algParameters.color_processing) + addImageToTextureFloatColor (img_color_float_alpha, gs->imgs); + else + addImageToTextureFloatGray (img_grayscale_float, gs->imgs); + + addImageToTextureFloatColor (normals_and_depth, gs->normals_depths); + +#define pow2(x) ((x)*(x)) +#define get_pow2_norm(x,y) (pow2(x)+pow2(y)) + + runcuda(*gs, pc_list, numSelViews); + Mat_ norm0 = Mat::zeros ( img_grayscale[0].rows, img_grayscale[0].cols, CV_32FC3 ); + Mat_ distImg; + char plyFile[256]; + sprintf ( plyFile, "%s/final3d_model.ply", output_folder); + printf("Writing ply file %s\n", plyFile); + //storePlyFileAsciiPointCloud ( plyFile, pc_list, inputData[0].cam, distImg); + storePlyFileBinaryPointCloud ( plyFile, pc_list, inputData[0].cam, distImg); + char xyzFile[256]; + sprintf ( xyzFile, "%s/final3d_model.xyz", output_folder); + printf("Writing ply file %s\n", xyzFile); + //storeXYZPointCloud ( xyzFile, pc_list, inputData[0].cam, distImg); + + return 0; +} + +int main(int argc, char **argv) +{ + if ( argc < 3 ) + { + print_help (); + return 0; + } + + InputFiles inputFiles; + OutputFiles outputFiles; + AlgorithmParameters* algParameters = new AlgorithmParameters; + GTcheckParameters gtParameters; + bool no_display = false; + + int ret = getParametersFromCommandLine ( argc, argv, inputFiles, outputFiles, *algParameters, gtParameters, no_display ); + if ( ret != 0 ) + return ret; + + Results results; + ret = runFusibile ( argc, argv, outputFiles, *algParameters, no_display, results); + + return 0; +} + diff --git a/main.h b/main.h new file mode 100644 index 0000000..4d368d5 --- /dev/null +++ b/main.h @@ -0,0 +1,99 @@ +#pragma once + +#include "opencv2/calib3d/calib3d.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include "opencv2/core/core.hpp" +#include "opencv2/highgui/highgui.hpp" +//#include "opencv2/contrib/contrib.hpp" +#if CV_MAJOR_VERSION == 3 +#include "opencv2/core/utility.hpp" +#endif + +#include +#include + +using namespace cv; +using namespace std; + +typedef Vec Vec2us; + +// parameters for comparison with ground truth +struct GTcheckParameters { + GTcheckParameters () : gtCheck ( false ), noccCheck ( false ), scale ( 150.0f ), dispTolGT ( 0.5f ), divFactor ( 4.0f ) {} + bool gtCheck; + bool noccCheck; + float scale; // scaling factor just for visualization of error + float dispTolGT; + float dispTolGT2; + //division factor dependent on ground truth data (to get disparity value: imgValue/divFactor) + float divFactor; //Middleburry small images: 4, big images third: 3, Kitti: 255 +}; + +//pathes to input images (camera images, ground truth, ...) +struct InputFiles { + InputFiles () : gt_filename ( "" ), gt_nocc_filename ( "" ), occ_filename ( "" ), gt_normal_filename ( "" ), calib_filename ( "" ), images_folder ( "" ), p_folder ( "" ), camera_folder ( "" ), pmvs_folder("") {} + vector img_filenames; // input camera images (only filenames, path is set in images_folder), names can also be used for calibration data (e.g. for Strecha P, camera) + string gt_filename; // ground truth image + string gt_nocc_filename; // non-occluded ground truth image (as provided e.g. by Kitti) + string occ_filename; // occlusion mask (binary map of all the points that are occluded) (as provided e.g. by Middleburry) + string gt_normal_filename; // ground truth normal map (for Strecha) + string calib_filename; // calibration file containing camera matrices (P) (as provided e.g. by Kitti) + string images_folder; // path to camera input images + string p_folder; // path to camera projection matrix P (Strecha) + string krt_file; // path to camera projection matrix P (Strecha) + string camera_folder; // path to camera calibration matrix K (Strecha) + string bounding_folder; //path to bounding volume (Strecha) + string seed_file; //path to bounding volume (Strecha) + string pmvs_folder; //path to bounding volume (Strecha) +}; + +//pathes to output files +struct OutputFiles { + OutputFiles () : parentFolder ( "results" ), disparity_filename ( 0 ) {} + const char* parentFolder; + char* disparity_filename; +}; + +struct Camera { + Camera () : baseline ( 0.54f ), P ( Mat::eye ( 3,4,CV_32F ) ), R ( Mat::eye ( 3,3,CV_32F ) ), reference ( false ), depthMin ( 2.0f ), depthMax ( 20.0f ) {} + Mat_ P; + Mat_ P_inv; + Mat_ M_inv; + //Mat_ K; + Mat_ R; + Mat_ t; + Vec3f C; + float baseline; + bool reference; + float depthMin; //this could be figured out from the bounding volume (not done right now, but that's why this parameter is here as well and not only in AlgorithmParameters) + float depthMax; //this could be figured out from the bounding volume (not done right now, but that's why this parameter is here as well and not only in AlgorithmParameters) + //int id; //corresponds to the image name id (eg. 0-10), independent of order in argument list, just dependent on name + string id; + Mat_ K; + Mat_ K_inv; + //float f; +}; + +//parameters for camera geometry setup (assuming that K1 = K2 = K, P1 = K [I | 0] and P2 = K [R | t]) +struct CameraParameters { + CameraParameters () : rectified ( false ), idRef ( 0 ) {} + Mat_ K; //if K varies from camera to camera: K and f need to be stored within Camera + Mat_ K_inv; //if K varies from camera to camera: K and f need to be stored within Camera + float f; + bool rectified; + vector cameras; + int idRef; + vector viewSelectionSubset; +}; + +struct Results { + Results () : error_occ ( 1.0f ), error_noc ( 1.0f ), valid_pixels ( 0.0f ), error_valid ( 1.0f ), error_valid_all ( 1.0f ), total_runtime ( 0.0f ), runtime_per_pixel ( 0.0f ) {} + float error_occ; + float error_noc; + float valid_pixels; // passed occlusion check + float valid_pixels_gt; + float error_valid; + float error_valid_all; + double total_runtime; + double runtime_per_pixel; +}; diff --git a/managed.h b/managed.h new file mode 100644 index 0000000..6e25388 --- /dev/null +++ b/managed.h @@ -0,0 +1,13 @@ +#pragma once +class Managed { +public: + void *operator new(size_t len) { + void *ptr; + cudaMallocManaged(&ptr, len); + return ptr; + } + + void operator delete(void *ptr) { + cudaFree(ptr); + } +}; diff --git a/mathUtils.h b/mathUtils.h new file mode 100644 index 0000000..9e14445 --- /dev/null +++ b/mathUtils.h @@ -0,0 +1,31 @@ +/* + * some math helper functions + */ + +#pragma once + +#ifndef M_PI +#define M_PI 3.14159265358979323846f +#endif +#define M_PI_float 3.14159265358979323846f + +//rounding of positive float values +#if defined(_WIN32) +static float roundf ( float val ) { + return floor ( val+0.5f ); +}; +#endif + +/* get angle between two vectors in 3D + * Input: v1,v2 - vectors + * Output: angle in radian + */ +static float getAngle ( Vec3f v1, Vec3f v2 ) { + float angle = acosf ( v1.dot ( v2 ) ); + //if angle is not a number the dot product was 1 and thus the two vectors should be identical --> return 0 + if ( angle != angle ) + return 0.0f; + if ( acosf ( v1.dot ( v2 ) ) != acosf ( v1.dot ( v2 ) ) ) + cout << acosf ( v1.dot ( v2 ) ) << " / " << v1.dot ( v2 )<< " / " << v1<< " / " << v2 << endl; + return angle; +}; diff --git a/mia-utils.h b/mia-utils.h new file mode 100644 index 0000000..cc63e49 --- /dev/null +++ b/mia-utils.h @@ -0,0 +1,50 @@ +#ifndef __MIA_UTILS__ +#define __MIA_UTILS__ + +#include +#include +#include + +void alloc_vector +(float **vector, /* vector */ + long n); /* size */ + +void alloc_matrix +(float ***matrix, /* matrix */ + long nx, /* size in x direction */ + long ny); /* size in y direction */ + +void alloc_matrix_d +(double ***matrix, /* matrix */ + long nx, /* size in x direction */ + long ny); /* size in y direction */ + +/* allocates storage for matrix of size nx * ny */ +void disalloc_vector + +(float *vector, /* vector */ + long n); /* size */ + +/* disallocates storage for a vector of size n */ +void disalloc_vector + +(float *vector, /* vector */ + long n); /* size */ + + +/*--------------------------------------------------------------------------*/ + +void disalloc_matrix + +(float **matrix, /* matrix */ + long nx, /* size in x direction */ + long ny); /* size in y direction */ + +void disalloc_matrix_d + +(double **matrix, /* matrix */ + long nx, /* size in x direction */ + long ny); /* size in y direction */ +/* disallocates storage for matrix of size nx * ny */ + +#endif diff --git a/point_cloud.h b/point_cloud.h new file mode 100644 index 0000000..588315a --- /dev/null +++ b/point_cloud.h @@ -0,0 +1,31 @@ +#pragma once +#include // memset() +#include "algorithmparameters.h" +#include "cameraparameters.h" +#include "managed.h" +#include // float4 + +class __align__(128) Point_cu : public Managed { +public: + float4 normal; // Normal + float4 coord; // Point coordinate + float texture; // Average texture color +}; + + +class __align__(128) PointCloud : public Managed { +public: + Point_cu *points; + int rows; + int cols; + int size; + void resize(int n) + { + cudaMallocManaged (&points, sizeof(Point_cu) * n); + memset (points, 0, sizeof(Point_cu) * n); + } + ~PointCloud() + { + cudaFree (points); + } +}; diff --git a/point_cloud_list.h b/point_cloud_list.h new file mode 100644 index 0000000..a0cfd2f --- /dev/null +++ b/point_cloud_list.h @@ -0,0 +1,40 @@ +#pragma once +#include // memset() +#include // malloc(), realloc(), free() +#include "algorithmparameters.h" +#include "cameraparameters.h" +#include "managed.h" +#include // float4 + +class Point_li { +public: + float4 normal; // Normal + float4 coord; // Point coordinate + float texture; // Average texture color +}; + + +class PointCloudList { +public: + Point_li *points; + int rows; + int cols; + unsigned int size; + unsigned int maximum; + void resize(int n) + { + maximum=n; + points = (Point_li *) malloc (sizeof(Point_li) * n); + memset (points, 0, sizeof(Point_li) * n); + } + void increase_size(int n) + { + maximum=n; + points = (Point_li *) realloc (points, n * sizeof(Point_li)); + printf("New size of point cloud list is %d\n", n); + } + ~PointCloudList() + { + free (points); + } +}; diff --git a/test.cpp b/test.cpp new file mode 100644 index 0000000..bdefefc --- /dev/null +++ b/test.cpp @@ -0,0 +1,15 @@ +#include "cuda_runtime.h" + +#include +#include "globalstate.h" + + + +int main() +{ + int *c; + GlobalState *gs = new GlobalState; + CHECK(cudaMallocManaged(&c, sizeof(int))); + *c = 0; + return 0; +} diff --git a/vector_operations.h b/vector_operations.h new file mode 100644 index 0000000..6f619e9 --- /dev/null +++ b/vector_operations.h @@ -0,0 +1,52 @@ +/* vim: ft=cpp + * */ +#pragma once +#include // float4 +#include "config.h" +//__device__ float4 operator*(float4 a, float4 b); +//__device__ float4 operator-(float4 a, float4 b); +//__device__ float4 operator-(float4 a); +//__device__ float4 operator+(float4 a, float4 b); +//__device__ float4 operator/(float4 a, float k); +static __device__ float4 operator*(float4 a, float4 b) { + return make_float4(a.x*b.x, + a.y*b.y, + a.z*b.z, + 0); +} +static __device__ float4 operator-(float4 a, float4 b) { + return make_float4(a.x-b.x, + a.y-b.y, + a.z-b.z, + 0); +} +static __device__ float4 operator-(float4 a) { + return make_float4(-a.x, + -a.y, + -a.z, + 0); +} +static __device__ float4 operator+(float4 a, float4 b) { + return make_float4(a.x+b.x, + a.y+b.y, + a.z+b.z, + 0); +} +static __device__ float4 operator/(float4 a, float k) { + return make_float4(a.x/k, + a.y/k, + a.z/k, + 0); +} +static __device__ float l1_float4 (float4 a) { + return ( fabsf (a.x) + + fabsf (a.y) + + fabsf (a.z))*0.3333333f; + +} +static __device__ float l2_float4 (float4 a) { + return sqrtf( pow2 (a.x) + + pow2 (a.y) + + pow2 (a.z)); + +}