Skip to content

Commit

Permalink
refactor: use #pragma unroll
Browse files Browse the repository at this point in the history
Signed-off-by: Barış Zeren <[email protected]>
  • Loading branch information
StepTurtle committed Dec 3, 2024
1 parent 8b32c19 commit 88a42c1
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions perception/autoware_tensorrt_rtmdet/src/preprocess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ __global__ void resize_bilinear_kernel(
}

index = C * w + C * W * h;
// Unroll
#pragma unroll
for (int c = 0; c < C; c++) {
f00 = n * src_h * src_w * C + src_h_idx * src_w * C + src_w_idx * C + c;
f01 = n * src_h * src_w * C + src_h_idx * src_w * C + (src_w_idx + 1) * C + c;
Expand Down Expand Up @@ -128,8 +128,8 @@ __global__ void letterbox_kernel(
int h = index / W;

index = (C * w) + (C * W * h);
// Unroll
int index2 = (C * w) + (C * src_w * h);
#pragma unroll
for (int c = 0; c < C; c++) {
dst_img[index + c] =
(w >= letter_right || h >= letter_bot) ? (unsigned int)114 : src_img[index2 + c];
Expand Down Expand Up @@ -257,7 +257,7 @@ __global__ void resize_bilinear_letterbox_kernel(
}

index = (C * w) + (C * W * h);
// Unroll
#pragma unroll
for (int c = 0; c < C; c++) {
f00 = n * src_h * src_w * C + src_h_idx * src_w * C + src_w_idx * C + c;
f01 = n * src_h * src_w * C + src_h_idx * src_w * C + (src_w_idx + 1) * C + c;
Expand Down Expand Up @@ -310,8 +310,10 @@ __global__ void resize_bilinear_letterbox_nhwc_to_nchw32_kernel(
src_h_idx = (src_h_idx >= (src_h - 1)) ? src_h - 2 : src_h_idx;
src_w_idx = (src_w_idx < 0) ? 0 : src_w_idx;
src_w_idx = (src_w_idx >= (src_w - 1)) ? src_w - 2 : src_w_idx;
// Unroll

int stride = src_w * C;

#pragma unroll
for (int c = 0; c < C; c++) {
f00 = src_h_idx * stride + src_w_idx * C + c;
f01 = src_h_idx * stride + (src_w_idx + 1) * C + c;
Expand Down Expand Up @@ -371,10 +373,11 @@ __global__ void resize_bilinear_letterbox_nhwc_to_nchw32_batch_kernel(
src_h_idx = (src_h_idx >= (src_h - 1)) ? src_h - 2 : src_h_idx;
src_w_idx = (src_w_idx < 0) ? 0 : src_w_idx;
src_w_idx = (src_w_idx >= (src_w - 1)) ? src_w - 2 : src_w_idx;
// Unroll

int stride = src_w * C;
int b_stride = src_h * src_w * C;

#pragma unroll
for (int b = 0; b < batch; b++) {
for (int c = 0; c < C; c++) {
// NHWC
Expand Down

0 comments on commit 88a42c1

Please sign in to comment.