forked from Celebrandil/CudaSift
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cudaImage.cu
127 lines (113 loc) · 3.98 KB
/
cudaImage.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/**
* Author: Marten Bjorkman
* Contributor: Ethan Stewart
*/
#include "cudaImage.h"
#include <cstdio>
#include "cudautils.h"
CudaImage::CudaImage() :
width(0), height(0), d_data(NULL), h_data(NULL), t_data(NULL), d_internalAlloc(false), h_internalAlloc(false) {}
CudaImage::~CudaImage() {
if (d_internalAlloc && d_data != NULL)
safeCall(cudaFree(d_data));
d_data = NULL;
if (h_internalAlloc && h_data != NULL)
free(h_data);
h_data = NULL;
if (t_data != NULL)
safeCall(cudaFreeArray((cudaArray *) t_data));
t_data = NULL;
}
void CudaImage::Allocate(int w, int h, int p, bool host, float *devmem, float *hostmem) {
width = w;
height = h;
pitch = p;
d_data = devmem;
h_data = hostmem;
t_data = NULL;
if (devmem == NULL) {
safeCall(cudaMallocPitch((void **) &d_data, (size_t *) &pitch, (size_t) (sizeof(float) * width),
(size_t) height));
pitch /= sizeof(float);
if (d_data == NULL)
printf("Failed to allocate device data\n");
d_internalAlloc = true;
}
if (host && hostmem == NULL) {
h_data = (float *) malloc(sizeof(float) * pitch * height);
h_internalAlloc = true;
}
}
/**
* Copy data from host to device.Pay attention to parameter p and width, the former depicts the offset in row
* in device memory, and the latter depicts the offset in row in host memory.
* @return The elapsed time running the code
*/
double CudaImage::Download() {
TimerGPU timer(0);
int p = sizeof(float) * pitch;
if (d_data != NULL && h_data != NULL)
safeCall(cudaMemcpy2D(d_data, p, h_data, sizeof(float) * width, sizeof(float) * width, height,
cudaMemcpyHostToDevice));
double gpuTime = timer.read();
#ifdef VERBOSE
printf("Download time = %.2f ms\n", gpuTime);
#endif
return gpuTime;
}
double CudaImage::Readback() {
TimerGPU timer(0);
int p = sizeof(float) * pitch;
safeCall(cudaMemcpy2D(h_data, sizeof(float) * width, d_data, p, sizeof(float) * width, height,
cudaMemcpyDeviceToHost));
double gpuTime = timer.read();
#ifdef VERBOSE
printf("Readback time = %.2f ms\n", gpuTime);
#endif
return gpuTime;
}
double CudaImage::InitTexture() {
TimerGPU timer(0);
cudaChannelFormatDesc t_desc = cudaCreateChannelDesc<float>();
safeCall(cudaMallocArray((cudaArray * *) & t_data, &t_desc, pitch, height));
if (t_data == NULL)
printf("Failed to allocated texture data\n");
double gpuTime = timer.read();
#ifdef VERBOSE
printf("InitTexture time = %.2f ms\n", gpuTime);
#endif
return gpuTime;
}
double CudaImage::CopyToTexture(CudaImage &dst, bool host) {
if (dst.t_data == NULL) {
printf("Error CopyToTexture: No texture data\n");
return 0.0;
}
if ((!host || h_data == NULL) && (host || d_data == NULL)) {
printf("Error CopyToTexture: No source data\n");
return 0.0;
}
TimerGPU timer(0);
if (host)
safeCall(cudaMemcpyToArray((cudaArray *) dst.t_data, 0, 0, h_data, sizeof(float) * pitch * dst.height,
cudaMemcpyHostToDevice));
else
safeCall(cudaMemcpyToArray((cudaArray *) dst.t_data, 0, 0, d_data, sizeof(float) * pitch * dst.height,
cudaMemcpyDeviceToDevice));
safeCall(cudaDeviceSynchronize());
double gpuTime = timer.read();
#ifdef VERBOSE
printf("CopyToTexture time = %.2f ms\n", gpuTime);
#endif
return gpuTime;
}
int iDivUp(int dividend, int divisor) {
return (dividend % divisor != 0) ? (dividend / divisor + 1) : (dividend / divisor);
}
int iDivDown(int dividend, int divisor) {
return dividend / divisor;
}
int iAlignUp(int number, int alignment) {
return (number % alignment != 0) ? (number - number % alignment + alignment) : number;
}
int iAlignDown(int number, int alignment) { return number - number % alignment; }