From fffb00025bbe7fea3085b03a32b013fb3a5a381d Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Mon, 27 Jan 2025 14:07:12 +0900 Subject: [PATCH] feat: moved from personal repository https://github.com/knzo25/bevfusion_ros2 Signed-off-by: Kenzo Lobos-Tsunekawa --- .../autoware_lidar_bevfusion/CMakeLists.txt | 213 +++++++ .../config/bevfusion.param.yaml | 23 + .../config/bevfusion_ml_package.param.yaml | 25 + .../detection_class_remapper.param.yaml | 38 ++ .../include/autoware/bev_ops/bev_pool_cuda.h | 12 + .../lidar_bevfusion/bevfusion_config.hpp | 195 +++++++ .../lidar_bevfusion/bevfusion_trt.hpp | 150 +++++ .../detection_class_remapper.hpp | 48 ++ .../lidar_bevfusion/lidar_bevfusion_node.hpp | 97 ++++ .../postprocess/circle_nms_kernel.hpp | 34 ++ .../postprocess/non_maximum_suppression.hpp | 82 +++ .../postprocess/postprocess_kernel.hpp | 47 ++ .../lidar_bevfusion/preprocess/point_type.hpp | 35 ++ .../preprocess/pointcloud_densification.hpp | 104 ++++ .../preprocess/precomputed_features.hpp | 89 +++ .../preprocess/preprocess_kernel.hpp | 76 +++ .../preprocess/voxel_generator.hpp | 64 +++ .../autoware/lidar_bevfusion/ros_utils.hpp | 67 +++ .../autoware/lidar_bevfusion/utils.hpp | 57 ++ .../lidar_bevfusion/visibility_control.hpp | 37 ++ .../get_indice_pairs_implicit_gemm_plugin.hpp | 140 +++++ ...ice_pairs_implicit_gemm_plugin_creator.hpp | 65 +++ .../tensorrt_plugins/implicit_gemm_plugin.hpp | 138 +++++ .../implicit_gemm_plugin_creator.hpp | 59 ++ .../tensorrt_plugins/plugin_registration.hpp | 29 + .../tensorrt_plugins/plugin_utils.hpp | 35 ++ .../quick_cumsum_cuda_plugin.hpp | 123 ++++ .../quick_cumsum_cuda_plugin_creator.hpp | 62 ++ .../launch/lidar_bevfusion.launch.xml | 124 ++++ .../lib/bevfusion_trt.cpp | 532 ++++++++++++++++++ .../lib/detection_class_remapper.cpp | 74 +++ .../lib/postprocess/circle_nms_kernel.cu | 147 +++++ .../postprocess/non_maximum_suppression.cpp | 114 ++++ .../lib/postprocess/postprocess_kernel.cu | 141 +++++ .../preprocess/pointcloud_densification.cpp | 111 ++++ .../lib/preprocess/precomputed_features.cpp | 376 +++++++++++++ .../lib/preprocess/preprocess_kernel.cu | 228 ++++++++ .../lib/preprocess/voxel_generator.cpp | 89 +++ .../lib/ros_utils.cpp | 126 +++++ .../autoware_lidar_bevfusion/package.xml | 36 ++ .../schema/bevfusion.schema.dummy.json | 121 ++++ .../schema/bevfusion_ml_package.schema.json | 71 +++ .../detection_class_remapper.schema.json | 72 +++ .../src/bev_ops/bev_pool_cuda.cu | 55 ++ .../src/lidar_bevfusion_node.cpp | 342 +++++++++++ .../get_indice_pairs_implicit_gemm_plugin.cpp | 421 ++++++++++++++ ...ice_pairs_implicit_gemm_plugin_creator.cpp | 324 +++++++++++ .../tensorrt_plugins/implicit_gemm_plugin.cpp | 314 +++++++++++ .../implicit_gemm_plugin_creator.cpp | 174 ++++++ .../tensorrt_plugins/plugin_registration.cpp | 73 +++ .../src/tensorrt_plugins/plugin_utils.cpp | 57 ++ .../quick_cumsum_cuda_plugin.cpp | 245 ++++++++ .../quick_cumsum_cuda_plugin_creator.cpp | 143 +++++ .../autoware/cuda_utils/cuda_utils.hpp | 36 ++ 54 files changed, 6690 insertions(+) create mode 100644 perception/autoware_lidar_bevfusion/CMakeLists.txt create mode 100644 perception/autoware_lidar_bevfusion/config/bevfusion.param.yaml create mode 100644 perception/autoware_lidar_bevfusion/config/bevfusion_ml_package.param.yaml create mode 100644 perception/autoware_lidar_bevfusion/config/detection_class_remapper.param.yaml create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/bev_ops/bev_pool_cuda.h create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/bevfusion_config.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/bevfusion_trt.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/detection_class_remapper.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/lidar_bevfusion_node.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/postprocess/circle_nms_kernel.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/postprocess/non_maximum_suppression.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/postprocess/postprocess_kernel.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/point_type.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/pointcloud_densification.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/precomputed_features.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/preprocess_kernel.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/voxel_generator.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/ros_utils.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/utils.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/visibility_control.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin_creator.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/implicit_gemm_plugin.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/implicit_gemm_plugin_creator.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/plugin_registration.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/plugin_utils.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/quick_cumsum_cuda_plugin.hpp create mode 100644 perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/quick_cumsum_cuda_plugin_creator.hpp create mode 100644 perception/autoware_lidar_bevfusion/launch/lidar_bevfusion.launch.xml create mode 100644 perception/autoware_lidar_bevfusion/lib/bevfusion_trt.cpp create mode 100644 perception/autoware_lidar_bevfusion/lib/detection_class_remapper.cpp create mode 100644 perception/autoware_lidar_bevfusion/lib/postprocess/circle_nms_kernel.cu create mode 100644 perception/autoware_lidar_bevfusion/lib/postprocess/non_maximum_suppression.cpp create mode 100644 perception/autoware_lidar_bevfusion/lib/postprocess/postprocess_kernel.cu create mode 100644 perception/autoware_lidar_bevfusion/lib/preprocess/pointcloud_densification.cpp create mode 100644 perception/autoware_lidar_bevfusion/lib/preprocess/precomputed_features.cpp create mode 100644 perception/autoware_lidar_bevfusion/lib/preprocess/preprocess_kernel.cu create mode 100644 perception/autoware_lidar_bevfusion/lib/preprocess/voxel_generator.cpp create mode 100644 perception/autoware_lidar_bevfusion/lib/ros_utils.cpp create mode 100644 perception/autoware_lidar_bevfusion/package.xml create mode 100644 perception/autoware_lidar_bevfusion/schema/bevfusion.schema.dummy.json create mode 100644 perception/autoware_lidar_bevfusion/schema/bevfusion_ml_package.schema.json create mode 100644 perception/autoware_lidar_bevfusion/schema/detection_class_remapper.schema.json create mode 100644 perception/autoware_lidar_bevfusion/src/bev_ops/bev_pool_cuda.cu create mode 100644 perception/autoware_lidar_bevfusion/src/lidar_bevfusion_node.cpp create mode 100644 perception/autoware_lidar_bevfusion/src/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin.cpp create mode 100644 perception/autoware_lidar_bevfusion/src/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin_creator.cpp create mode 100644 perception/autoware_lidar_bevfusion/src/tensorrt_plugins/implicit_gemm_plugin.cpp create mode 100644 perception/autoware_lidar_bevfusion/src/tensorrt_plugins/implicit_gemm_plugin_creator.cpp create mode 100644 perception/autoware_lidar_bevfusion/src/tensorrt_plugins/plugin_registration.cpp create mode 100644 perception/autoware_lidar_bevfusion/src/tensorrt_plugins/plugin_utils.cpp create mode 100644 perception/autoware_lidar_bevfusion/src/tensorrt_plugins/quick_cumsum_cuda_plugin.cpp create mode 100644 perception/autoware_lidar_bevfusion/src/tensorrt_plugins/quick_cumsum_cuda_plugin_creator.cpp create mode 100644 sensing/autoware_cuda_utils/include/autoware/cuda_utils/cuda_utils.hpp diff --git a/perception/autoware_lidar_bevfusion/CMakeLists.txt b/perception/autoware_lidar_bevfusion/CMakeLists.txt new file mode 100644 index 0000000000000..64fd656713537 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/CMakeLists.txt @@ -0,0 +1,213 @@ +cmake_minimum_required(VERSION 3.14) +project(autoware_lidar_bevfusion) + +find_package(autoware_cmake REQUIRED) +autoware_package() + +add_compile_options(-Wno-deprecated-declarations) + +option(CUDA_VERBOSE "Verbose output of CUDA modules" OFF) + +# set flags for CUDA availability +option(CUDA_AVAIL "CUDA available" OFF) +find_package(CUDA) +if(CUDA_FOUND) + find_library(CUBLAS_LIBRARIES cublas HINTS + ${CUDA_TOOLKIT_ROOT_DIR}/lib64 + ${CUDA_TOOLKIT_ROOT_DIR}/lib + ) + if(CUDA_VERBOSE) + message("CUDA is available!") + message("CUDA Libs: ${CUDA_LIBRARIES}") + message("CUDA Headers: ${CUDA_INCLUDE_DIRS}") + endif() + # Note: cublas_device was depreciated in CUDA version 9.2 + # https://forums.developer.nvidia.com/t/where-can-i-find-libcublas-device-so-or-libcublas-device-a/67251/4 + # In LibTorch, CUDA_cublas_device_LIBRARY is used. + unset(CUDA_cublas_device_LIBRARY CACHE) + set(CUDA_AVAIL ON) +else() + message("CUDA NOT FOUND") + set(CUDA_AVAIL OFF) +endif() + +# set flags for TensorRT availability +option(TRT_AVAIL "TensorRT available" OFF) +# try to find the tensorRT modules +find_library(NVINFER nvinfer) +find_library(NVONNXPARSER nvonnxparser) +if(NVINFER AND NVONNXPARSER) + if(CUDA_VERBOSE) + message("TensorRT is available!") + message("NVINFER: ${NVINFER}") + message("NVONNXPARSER: ${NVONNXPARSER}") + endif() + set(TRT_AVAIL ON) +else() + message("TensorRT is NOT Available") + set(TRT_AVAIL OFF) +endif() + +# set flags for CUDNN availability +option(CUDNN_AVAIL "CUDNN available" OFF) +# try to find the CUDNN module +find_library(CUDNN_LIBRARY +NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name} +PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} +PATH_SUFFIXES lib lib64 bin +DOC "CUDNN library." +) +if(CUDNN_LIBRARY) + if(CUDA_VERBOSE) + message(STATUS "CUDNN is available!") + message(STATUS "CUDNN_LIBRARY: ${CUDNN_LIBRARY}") + endif() + set(CUDNN_AVAIL ON) +else() + message("CUDNN is NOT Available") + set(CUDNN_AVAIL OFF) +endif() + +# set flags for spconv availability +option(SPCONV_AVAIL "spconv available" OFF) +# try to find spconv +find_package(cumm) +find_package(spconv) +if(${cumm_FOUND} AND ${spconv_FOUND}) + message("spconv is available!") + set(SPCONV_AVAIL ON) +else() + message("spconv is NOT Available") + set(SPCONV_AVAIL OFF) +endif() + +if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL AND SPCONV_AVAIL) + find_package(ament_cmake_auto REQUIRED) + ament_auto_find_build_dependencies() + + include_directories( + include + ${CUDA_INCLUDE_DIRS} + ) + + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-g -G") + endif() + + # Find Eigen3 + find_package(Eigen3 3.3 REQUIRED NO_MODULE) + + add_definitions("-DTV_CUDA") + + list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr -diag-suppress 1675 --extended-lambda") + + cuda_add_library(bev_ops SHARED + src/bev_ops/bev_pool_cuda.cu + ) + + add_library(autoware_tensorrt_plugins SHARED + src/tensorrt_plugins/plugin_utils.cpp + src/tensorrt_plugins/quick_cumsum_cuda_plugin.cpp + src/tensorrt_plugins/quick_cumsum_cuda_plugin_creator.cpp + src/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin_creator.cpp + src/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin.cpp + src/tensorrt_plugins/implicit_gemm_plugin_creator.cpp + src/tensorrt_plugins/implicit_gemm_plugin.cpp + + src/tensorrt_plugins/plugin_registration.cpp + ) + + target_compile_definitions(autoware_tensorrt_plugins PRIVATE _GLIBCXX_USE_CXX11_ABI=1) + + target_link_libraries(autoware_tensorrt_plugins PRIVATE + ${NVINFER} + CUDA::cudart + bev_ops + spconv::spconv + ) + + cuda_add_library(${PROJECT_NAME}_cuda_lib SHARED + lib/postprocess/circle_nms_kernel.cu + lib/postprocess/postprocess_kernel.cu + lib/preprocess/preprocess_kernel.cu + ) + + target_link_libraries(${PROJECT_NAME}_cuda_lib + spconv::spconv + ) + + target_include_directories(${PROJECT_NAME}_cuda_lib + SYSTEM PUBLIC + ${autoware_cuda_utils_INCLUDE_DIRS} + ) + + ament_auto_add_library(${PROJECT_NAME}_lib SHARED + lib/detection_class_remapper.cpp + lib/postprocess/non_maximum_suppression.cpp + lib/preprocess/voxel_generator.cpp + lib/preprocess/pointcloud_densification.cpp + lib/preprocess/precomputed_features.cpp + lib/ros_utils.cpp + lib/bevfusion_trt.cpp + ) + + target_compile_definitions(${PROJECT_NAME}_lib PRIVATE + TENSORRT_VERSION_MAJOR=${TENSORRT_VERSION_MAJOR} + ) + + target_link_libraries(${PROJECT_NAME}_lib + ${NVINFER} + ${NVONNXPARSER} + ${CUDA_LIBRARIES} + ${CUBLAS_LIBRARIES} + ${CUDA_curand_LIBRARY} + ${CUDNN_LIBRARY} + ${PROJECT_NAME}_cuda_lib + ) + + # To suppress unknown-pragmas error. The root-cause is CUB library in CUDA 11.6. + # This issue was fixed by https://github.com/NVIDIA/cub/commit/7d608bf1dc14553e2fb219eabeed80b76621b6fe + target_include_directories(${PROJECT_NAME}_lib + SYSTEM PUBLIC + ${CUDA_INCLUDE_DIRS} + $(autoware_point_types_INCLUDE_DIRS) + ) + + ament_auto_add_library(${PROJECT_NAME}_component SHARED + src/lidar_bevfusion_node.cpp + ) + + target_link_libraries(${PROJECT_NAME}_component + ${PROJECT_NAME}_lib + ) + + rclcpp_components_register_node(${PROJECT_NAME}_component + PLUGIN "autoware::lidar_bevfusion::LidarBEVFusionNode" + EXECUTABLE ${PROJECT_NAME}_node + ) + + install( + TARGETS ${PROJECT_NAME}_cuda_lib + DESTINATION lib + ) + + install( + TARGETS autoware_tensorrt_plugins + DESTINATION share/${PROJECT_NAME}/plugins + ) + + ament_auto_package( + INSTALL_TO_SHARE + launch + config + ) + +else() + find_package(ament_cmake_auto REQUIRED) + ament_auto_find_build_dependencies() + + ament_auto_package( + INSTALL_TO_SHARE + launch + ) +endif() diff --git a/perception/autoware_lidar_bevfusion/config/bevfusion.param.yaml b/perception/autoware_lidar_bevfusion/config/bevfusion.param.yaml new file mode 100644 index 0000000000000..3958123c76086 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/config/bevfusion.param.yaml @@ -0,0 +1,23 @@ +/**: + ros__parameters: + # modality + sensor_fusion: true + # non-network params + max_camera_lidar_delay: 0.12 + # plugins + plugins_path: $(find-pkg-share autoware_lidar_bevfusion)/plugins/libautoware_tensorrt_plugins.so + # network + trt_precision: fp32 + cloud_capacity: 2000000 + onnx_path: "$(var model_path)/bevfusion_camera_lidar_v2.onnx" + engine_path: "$(var model_path)/bevfusion_camera_lidar_v2.engine" + # pre-process params + densification_num_past_frames: 0 + densification_world_frame_id: map + # post-process params + circle_nms_dist_threshold: 0.5 + iou_nms_target_class_names: ["CAR"] + iou_nms_search_distance_2d: 10.0 + iou_nms_threshold: 0.1 + yaw_norm_thresholds: [0.3, 0.3, 0.3, 0.3, 0.0] # refers to the class_names + score_threshold: 0.1 diff --git a/perception/autoware_lidar_bevfusion/config/bevfusion_ml_package.param.yaml b/perception/autoware_lidar_bevfusion/config/bevfusion_ml_package.param.yaml new file mode 100644 index 0000000000000..48e30a5d2b879 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/config/bevfusion_ml_package.param.yaml @@ -0,0 +1,25 @@ +/**: + ros__parameters: + class_names: ["CAR", "TRUCK", "BUS", "BICYCLE", "PEDESTRIAN"] + voxels_num: [1, 128000, 256000] # [min, opt, max] + point_cloud_range: [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] # [x_min, y_min, z_min, x_max, y_max, z_max] + voxel_size: [0.17, 0.17, 0.2] # [x, y, z] + num_proposals: 500 + out_size_factor: 8 + max_points_per_voxel: 10 + + dbound: [1.0, 166.2, 1.4] + xbound: [-122.4, 122.4, 0.68] + ybound: [-122.4, 122.4, 0.68] + zbound: [-10.0, 10.0, 20.0] + num_cameras: 6 + raw_image_height: 1080 + raw_image_width: 1440 + img_aug_scale_x: 0.489 + img_aug_scale_y: 0.489 + img_aug_offset_y: -262.0 + roi_height: 384 + roi_width: 704 + features_height: 48 + features_width: 88 + num_depth_features: 118 diff --git a/perception/autoware_lidar_bevfusion/config/detection_class_remapper.param.yaml b/perception/autoware_lidar_bevfusion/config/detection_class_remapper.param.yaml new file mode 100644 index 0000000000000..40275d750f0d9 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/config/detection_class_remapper.param.yaml @@ -0,0 +1,38 @@ +/**: + ros__parameters: + allow_remapping_by_area_matrix: + # NOTE(knzo25): We turn all vehicles into trailers if they go over 3x12 [m^2]. + # NOTE(knzo25): We turn cars into trucks if they have an area between 2.2 x 5.5 and 3.0 * 12.0 [m^2] + # row: original class. column: class to remap to + #UNKNOWN, CAR, TRUCK, BUS, TRAILER, MOTORBIKE, BICYCLE,PEDESTRIAN + [0, 0, 0, 0, 0, 0, 0, 0, #UNKNOWN + 0, 0, 1, 0, 1, 0, 0, 0, #CAR + 0, 0, 0, 0, 1, 0, 0, 0, #TRUCK + 0, 0, 0, 0, 1, 0, 0, 0, #BUS + 0, 0, 0, 0, 0, 0, 0, 0, #TRAILER + 0, 0, 0, 0, 0, 0, 0, 0, #MOTORBIKE + 0, 0, 0, 0, 0, 0, 0, 0, #BICYCLE + 0, 0, 0, 0, 0, 0, 0, 0] #PEDESTRIAN + + min_area_matrix: + #UNKNOWN, CAR, TRUCK, BUS, TRAILER, MOTORBIKE, BICYCLE, PEDESTRIAN + [ 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, #UNKNOWN + 0.000, 0.000, 12.100, 0.000, 36.000, 0.000, 0.000, 0.000, #CAR + 0.000, 0.000, 0.000, 0.000, 36.000, 0.000, 0.000, 0.000, #TRUCK + 0.000, 0.000, 0.000, 0.000, 36.000, 0.000, 0.000, 0.000, #BUS + 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, #TRAILER + 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, #MOTORBIKE + 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, #BICYCLE + 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000] #PEDESTRIAN + + + max_area_matrix: + #UNKNOWN, CAR, TRUCK, BUS, TRAILER, MOTORBIKE, BICYCLE, PEDESTRIAN + [ 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, #UNKNOWN + 0.000, 0.000, 36.000, 0.000, 999.999, 0.000, 0.000, 0.000, #CAR + 0.000, 0.000, 0.000, 0.000, 999.999, 0.000, 0.000, 0.000, #TRUCK + 0.000, 0.000, 0.000, 0.000, 999.999, 0.000, 0.000, 0.000, #BUS + 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, #TRAILER + 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, #MOTORBIKE + 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, #BICYCLE + 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000] #PEDESTRIAN diff --git a/perception/autoware_lidar_bevfusion/include/autoware/bev_ops/bev_pool_cuda.h b/perception/autoware_lidar_bevfusion/include/autoware/bev_ops/bev_pool_cuda.h new file mode 100644 index 0000000000000..f169d8f240ce7 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/bev_ops/bev_pool_cuda.h @@ -0,0 +1,12 @@ + +#ifndef AUTOWARE__BEV_OPS__BEV_POOL_CUDA_H_ +#define AUTOWARE__BEV_OPS__BEV_POOL_CUDA_H_ + +#include + +void bev_pool( + int b, int d, int h, int w, int n, int c, int n_intervals, const float * x, + const int * geom_feats, const int * interval_starts, const int * interval_lengths, float * out, + cudaStream_t & stream); + +#endif // AUTOWARE__BEV_OPS__BEV_POOL_CUDA_H_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/bevfusion_config.hpp b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/bevfusion_config.hpp new file mode 100644 index 0000000000000..5be7f08f71b9d --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/bevfusion_config.hpp @@ -0,0 +1,195 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__LIDAR_BEVFUSION__BEVFUSION_CONFIG_HPP_ +#define AUTOWARE__LIDAR_BEVFUSION__BEVFUSION_CONFIG_HPP_ + +#include +#include +#include +#include +#include + +namespace autoware::lidar_bevfusion +{ + +class BEVFusionConfig +{ +public: + // cSpell:ignore dbound, xbound, ybound, zbound + BEVFusionConfig( + const bool sensor_fusion, const std::string & plugins_path, const std::int64_t out_size_factor, + const std::int64_t cloud_capacity, const std::int64_t max_points_per_voxel, + const std::vector & voxels_num, const std::vector & point_cloud_range, + const std::vector & voxel_size, const std::vector & dbound, + const std::vector & xbound, const std::vector & ybound, + const std::vector & zbound, const std::int64_t num_cameras, + const std::int64_t raw_image_height, const std::int64_t raw_image_width, + const float img_aug_scale_x, const float img_aug_scale_y, const std::int64_t roi_height, + const std::int64_t roi_width, const std::int64_t features_height, + const std::int64_t features_width, const std::int64_t num_depth_features, + const std::int64_t num_proposals, const float circle_nms_dist_threshold, + const std::vector & yaw_norm_thresholds, const float score_threshold) + { + sensor_fusion_ = sensor_fusion; + plugins_path_ = plugins_path; + + out_size_factor_ = out_size_factor; + + cloud_capacity_ = cloud_capacity; + max_points_per_voxel_ = max_points_per_voxel; + + if (voxels_num.size() == 3) { + min_num_voxels_ = voxels_num[0]; + max_num_voxels_ = voxels_num[2]; + + voxels_num_[0] = voxels_num[0]; + voxels_num_[1] = voxels_num[1]; + voxels_num_[2] = voxels_num[2]; + } + if (point_cloud_range.size() == 6) { + min_x_range_ = point_cloud_range[0]; + min_y_range_ = point_cloud_range[1]; + min_z_range_ = point_cloud_range[2]; + max_x_range_ = point_cloud_range[3]; + max_y_range_ = point_cloud_range[4]; + max_z_range_ = point_cloud_range[5]; + } + if (voxel_size.size() == 3) { + voxel_x_size_ = voxel_size[0]; + voxel_y_size_ = voxel_size[1]; + voxel_z_size_ = voxel_size[2]; + } + if (dbound.size() == 3 && xbound.size() == 3 && ybound.size() == 3 && zbound.size() == 3) { + dbound_ = dbound; + xbound_ = xbound; + ybound_ = ybound; + zbound_ = zbound; + } + + num_cameras_ = num_cameras; + raw_image_height_ = raw_image_height; + raw_image_width_ = raw_image_width; + img_aug_scale_x_ = img_aug_scale_x; + img_aug_scale_y_ = img_aug_scale_y; + roi_height_ = roi_height; + roi_width_ = roi_width; + features_height_ = features_height; + features_width_ = features_width; + num_depth_features_ = num_depth_features; + resized_height_ = raw_image_height_ * img_aug_scale_y_; + resized_width_ = raw_image_width_ * img_aug_scale_x_; + + if (num_proposals > 0) { + num_proposals_ = num_proposals; + } + if (score_threshold > 0.0) { + score_threshold_ = score_threshold; + } + if (circle_nms_dist_threshold > 0.0) { + circle_nms_dist_threshold_ = circle_nms_dist_threshold; + } + yaw_norm_thresholds_ = + std::vector(yaw_norm_thresholds.begin(), yaw_norm_thresholds.end()); + for (auto & yaw_norm_threshold : yaw_norm_thresholds_) { + yaw_norm_threshold = + (yaw_norm_threshold >= 0.0 && yaw_norm_threshold < 1.0) ? yaw_norm_threshold : 0.0; + } + grid_x_size_ = static_cast((max_x_range_ - min_x_range_) / voxel_x_size_); + grid_y_size_ = static_cast((max_y_range_ - min_y_range_) / voxel_y_size_); + grid_z_size_ = static_cast((max_z_range_ - min_z_range_) / voxel_z_size_); + } + + ///// MODALITY ///// + bool sensor_fusion_{}; + + // CUDA parameters + const std::uint32_t threads_per_block_{256}; // threads number for a block + + // TensorRT parameters + std::string plugins_path_{}; + + ///// NETWORK PARAMETERS ///// + + // Common network parameters + std::int64_t out_size_factor_{}; + + std::int64_t cloud_capacity_{}; + std::int64_t min_num_voxels_{}; + std::int64_t max_num_voxels_{}; + std::int64_t max_points_per_voxel_; + const std::int64_t num_point_feature_size_{5}; // x, y, z, intensity, lag + + // Pointcloud range in meters + float min_x_range_{}; + float max_x_range_{}; + float min_y_range_{}; + float max_y_range_{}; + float min_z_range_{}; + float max_z_range_{}; + + // Voxel size in meters + float voxel_x_size_{}; + float voxel_y_size_{}; + float voxel_z_size_{}; + + // Grid size + std::int64_t grid_x_size_{}; + std::int64_t grid_y_size_{}; + std::int64_t grid_z_size_{}; + + // Camera branch parameters + std::vector dbound_{}; + std::vector xbound_{}; + std::vector ybound_{}; + std::vector zbound_{}; + + std::int64_t num_cameras_{}; + std::int64_t raw_image_height_{}; + std::int64_t raw_image_width_{}; + + float img_aug_scale_x_{}; + float img_aug_scale_y_{}; + + std::int64_t roi_height_{}; + std::int64_t roi_width_{}; + + std::int64_t resized_height_{}; + std::int64_t resized_width_{}; + + std::int64_t features_height_{}; + std::int64_t features_width_{}; + std::int64_t num_depth_features_{}; + + // Head parameters + std::int64_t num_proposals_{}; + const std::size_t num_classes_{5}; + + // Post processing parameters + + // the score threshold for classification + float score_threshold_{}; + + float circle_nms_dist_threshold_{}; + std::vector yaw_norm_thresholds_{}; + // the detected boxes result decode by (x, y, z, w, l, h, yaw, vx, vy) + const std::int64_t num_box_values_{10}; + + ///// RUNTIME DIMENSIONS ///// + std::array voxels_num_{}; +}; + +} // namespace autoware::lidar_bevfusion + +#endif // AUTOWARE__LIDAR_BEVFUSION__BEVFUSION_CONFIG_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/bevfusion_trt.hpp b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/bevfusion_trt.hpp new file mode 100644 index 0000000000000..9a20a90ba39fc --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/bevfusion_trt.hpp @@ -0,0 +1,150 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__LIDAR_BEVFUSION__BEVFUSION_TRT_HPP_ +#define AUTOWARE__LIDAR_BEVFUSION__BEVFUSION_TRT_HPP_ + +#include "autoware/lidar_bevfusion/postprocess/postprocess_kernel.hpp" +#include "autoware/lidar_bevfusion/preprocess/pointcloud_densification.hpp" +#include "autoware/lidar_bevfusion/preprocess/preprocess_kernel.hpp" +#include "autoware/lidar_bevfusion/preprocess/voxel_generator.hpp" +#include "autoware/lidar_bevfusion/utils.hpp" +#include "autoware/lidar_bevfusion/visibility_control.hpp" + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace autoware::lidar_bevfusion +{ + +using autoware::cuda_utils::CudaUniquePtr; + +class NetworkParam +{ +public: + NetworkParam(std::string onnx_path, std::string engine_path, std::string trt_precision) + : onnx_path_(std::move(onnx_path)), + engine_path_(std::move(engine_path)), + trt_precision_(std::move(trt_precision)) + { + } + + std::string onnx_path() const { return onnx_path_; } + std::string engine_path() const { return engine_path_; } + std::string trt_precision() const { return trt_precision_; } + +private: + std::string onnx_path_; + std::string engine_path_; + std::string trt_precision_; +}; + +class LIDAR_BEVFUSION_PUBLIC BEVFusionTRT +{ +public: + using Matrix4fRowM = Eigen::Matrix; + + explicit BEVFusionTRT( + const tensorrt_common::TrtCommonConfig & trt_config, + const DensificationParam & densification_param, const BEVFusionConfig & config); + virtual ~BEVFusionTRT(); + + bool detect( + const sensor_msgs::msg::PointCloud2::ConstSharedPtr & msg, + const std::vector & image_msgs, + const std::vector & camera_masks, const tf2_ros::Buffer & tf_buffer, + std::vector & det_boxes3d, std::unordered_map & proc_timing); + + void setIntrinsicsExtrinsics( + std::vector & camera_info_vector, + std::vector & lidar2camera_vector); + +protected: + void initPtr(); + void initTrt(const tensorrt_common::TrtCommonConfig & trt_config); + + bool preProcess( + const sensor_msgs::msg::PointCloud2::ConstSharedPtr & pc_msg, + const std::vector & image_msgs, + const std::vector & camera_masks, const tf2_ros::Buffer & tf_buffer); + + bool inference(); + + bool postProcess(std::vector & det_boxes3d); + + std::unique_ptr network_trt_ptr_{nullptr}; + std::unique_ptr vg_ptr_{nullptr}; + std::unique_ptr> stop_watch_ptr_{ + nullptr}; + std::unique_ptr pre_ptr_{nullptr}; + std::unique_ptr post_ptr_{nullptr}; + cudaStream_t stream_{nullptr}; + std::vector camera_streams_{}; + + BEVFusionConfig config_; + std::vector roi_start_y_vector_; + + // pre-process inputs + + unsigned int voxel_features_size_{0}; + unsigned int voxel_coords_size_{0}; + unsigned int bbox_pred_size_{0}; + + // lidar buffers + CudaUniquePtr points_d_{nullptr}; + CudaUniquePtr voxel_features_d_{nullptr}; + CudaUniquePtr voxel_coords_d_{nullptr}; + CudaUniquePtr num_points_per_voxel_d_{nullptr}; + + // pre computed tensors + std::int64_t num_geom_feats_{}; + std::int64_t num_kept_{}; + std::int64_t num_ranks_{}; + std::int64_t num_indices_{}; + CudaUniquePtr lidar2image_d_{}; + CudaUniquePtr geom_feats_d_{}; + CudaUniquePtr kept_d_{}; + CudaUniquePtr ranks_d_{}; + CudaUniquePtr indices_d_{}; + + // image buffers + CudaUniquePtr roi_tensor_d_{nullptr}; + std::vector> image_buffers_d_{}; + CudaUniquePtr camera_masks_d_{nullptr}; + + // output buffers + CudaUniquePtr label_pred_output_d_{nullptr}; + CudaUniquePtr bbox_pred_output_d_{nullptr}; + CudaUniquePtr score_output_d_{nullptr}; +}; + +} // namespace autoware::lidar_bevfusion + +#endif // AUTOWARE__LIDAR_BEVFUSION__BEVFUSION_TRT_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/detection_class_remapper.hpp b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/detection_class_remapper.hpp new file mode 100644 index 0000000000000..5658684f9790e --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/detection_class_remapper.hpp @@ -0,0 +1,48 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__LIDAR_BEVFUSION__DETECTION_CLASS_REMAPPER_HPP_ +#define AUTOWARE__LIDAR_BEVFUSION__DETECTION_CLASS_REMAPPER_HPP_ + +#include + +#include +#include +#include +#include + +#include +#include + +namespace autoware::lidar_bevfusion +{ + +class DetectionClassRemapper +{ +public: + void setParameters( + const std::vector & allow_remapping_by_area_matrix, + const std::vector & min_area_matrix, const std::vector & max_area_matrix); + void mapClasses(autoware_perception_msgs::msg::DetectedObjects & msg); + +protected: + Eigen::Matrix allow_remapping_by_area_matrix_; + Eigen::MatrixXd min_area_matrix_; + Eigen::MatrixXd max_area_matrix_; + int num_labels_; +}; + +} // namespace autoware::lidar_bevfusion + +#endif // AUTOWARE__LIDAR_BEVFUSION__DETECTION_CLASS_REMAPPER_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/lidar_bevfusion_node.hpp b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/lidar_bevfusion_node.hpp new file mode 100644 index 0000000000000..1ee0d2c0071cf --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/lidar_bevfusion_node.hpp @@ -0,0 +1,97 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +//    http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__LIDAR_BEVFUSION__LIDAR_BEVFUSION_NODE_HPP_ +#define AUTOWARE__LIDAR_BEVFUSION__LIDAR_BEVFUSION_NODE_HPP_ + +#include "autoware/lidar_bevfusion/bevfusion_trt.hpp" +#include "autoware/lidar_bevfusion/detection_class_remapper.hpp" +#include "autoware/lidar_bevfusion/postprocess/non_maximum_suppression.hpp" +#include "autoware/lidar_bevfusion/preprocess/pointcloud_densification.hpp" +#include "autoware/lidar_bevfusion/visibility_control.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace autoware::lidar_bevfusion +{ + +class LIDAR_BEVFUSION_PUBLIC LidarBEVFusionNode : public rclcpp::Node +{ +public: + using Matrix4f = Eigen::Matrix; + + explicit LidarBEVFusionNode(const rclcpp::NodeOptions & options); + +private: + void cloudCallback(const sensor_msgs::msg::PointCloud2::ConstSharedPtr msg); + void imageCallback(const sensor_msgs::msg::Image::ConstSharedPtr msg, std::size_t camera_id); + void cameraInfoCallback(const sensor_msgs::msg::CameraInfo & msg, std::size_t camera_id); + + rclcpp::Subscription::ConstSharedPtr cloud_sub_{nullptr}; + std::vector::ConstSharedPtr> image_subs_; + std::vector::ConstSharedPtr> camera_info_subs_; + rclcpp::Publisher::SharedPtr objects_pub_{ + nullptr}; + + tf2_ros::Buffer tf_buffer_; + tf2_ros::TransformListener tf_listener_{tf_buffer_}; + + DetectionClassRemapper detection_class_remapper_; + float score_threshold_{0.0}; + std::vector class_names_; + std::optional lidar_frame_; + float max_camera_lidar_delay_; + + std::vector image_msgs_; + std::vector camera_masks_; + std::vector> camera_info_msgs_; + std::vector> lidar2camera_extrinsics_; + + bool sensor_fusion_{false}; + bool images_available_{false}; + bool intrinsics_available_{false}; + bool extrinsics_available_{false}; + bool intrinsics_extrinsics_precomputed_{false}; + + NonMaximumSuppression iou_bev_nms_; + + std::unique_ptr detector_ptr_{nullptr}; + + // debugger + std::unique_ptr> stop_watch_ptr_{ + nullptr}; + std::unique_ptr debug_publisher_ptr_{nullptr}; + std::unique_ptr published_time_pub_{nullptr}; +}; +} // namespace autoware::lidar_bevfusion + +#endif // AUTOWARE__LIDAR_BEVFUSION__LIDAR_BEVFUSION_NODE_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/postprocess/circle_nms_kernel.hpp b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/postprocess/circle_nms_kernel.hpp new file mode 100644 index 0000000000000..f6b00c19c5f06 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/postprocess/circle_nms_kernel.hpp @@ -0,0 +1,34 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__LIDAR_BEVFUSION__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_ +#define AUTOWARE__LIDAR_BEVFUSION__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_ + +#include "autoware/lidar_bevfusion/utils.hpp" + +#include + +#include + +namespace autoware::lidar_bevfusion +{ +// Non-maximum suppression (NMS) uses the distance on the xy plane instead of +// intersection over union (IoU) to suppress overlapped objects. +std::size_t circleNMS( + thrust::device_vector & boxes3d, const float distance_threshold, + thrust::device_vector & keep_mask, cudaStream_t stream); + +} // namespace autoware::lidar_bevfusion + +#endif // AUTOWARE__LIDAR_BEVFUSION__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/postprocess/non_maximum_suppression.hpp b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/postprocess/non_maximum_suppression.hpp new file mode 100644 index 0000000000000..c8d64a76b41c6 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/postprocess/non_maximum_suppression.hpp @@ -0,0 +1,82 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__LIDAR_BEVFUSION__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_ +#define AUTOWARE__LIDAR_BEVFUSION__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_ + +#include "autoware/lidar_bevfusion/ros_utils.hpp" + +#include + +#include + +#include +#include +#include + +namespace autoware::lidar_bevfusion +{ +using autoware_perception_msgs::msg::DetectedObject; + +// TODO(knzo25): Update this implementation to mach changes from centerpoint +enum class NMS_TYPE { + IoU_BEV + // IoU_3D + // Distance_2D + // Distance_3D +}; + +struct NMSParams +{ + NMS_TYPE nms_type_{}; + std::vector target_class_names_{}; + double search_distance_2d_{}; + double iou_threshold_{}; + // double distance_threshold_{}; +}; + +std::vector classNamesToBooleanMask(const std::vector & class_names) +{ + std::vector mask; + constexpr std::size_t num_object_classification = 8; + mask.resize(num_object_classification); + for (const auto & class_name : class_names) { + const auto semantic_type = getSemanticType(class_name); + mask.at(semantic_type) = true; + } + + return mask; +} + +class NonMaximumSuppression +{ +public: + void setParameters(const NMSParams &); + + std::vector apply(const std::vector &); + +private: + bool isTargetLabel(const std::uint8_t); + + bool isTargetPairObject(const DetectedObject &, const DetectedObject &); + + Eigen::MatrixXd generateIoUMatrix(const std::vector &); + + NMSParams params_{}; + std::vector target_class_mask_{}; +}; + +} // namespace autoware::lidar_bevfusion + +#endif // AUTOWARE__LIDAR_BEVFUSION__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/postprocess/postprocess_kernel.hpp b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/postprocess/postprocess_kernel.hpp new file mode 100644 index 0000000000000..4a7c8845e85ee --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/postprocess/postprocess_kernel.hpp @@ -0,0 +1,47 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__LIDAR_BEVFUSION__POSTPROCESS__POSTPROCESS_KERNEL_HPP_ +#define AUTOWARE__LIDAR_BEVFUSION__POSTPROCESS__POSTPROCESS_KERNEL_HPP_ + +#include "autoware/lidar_bevfusion/bevfusion_config.hpp" +#include "autoware/lidar_bevfusion/utils.hpp" + +#include +#include + +#include + +namespace autoware::lidar_bevfusion +{ + +class PostprocessCuda +{ +public: + explicit PostprocessCuda(const BEVFusionConfig & config, cudaStream_t & stream); + + cudaError_t generateDetectedBoxes3D_launch( + const std::int64_t * label_pred_output, const float * bbox_pred_output, + const float * score_output, std::vector & det_boxes3d, cudaStream_t stream); + +private: + BEVFusionConfig config_; + cudaStream_t stream_; + cudaStream_t stream_event_; + cudaEvent_t start_, stop_; +}; + +} // namespace autoware::lidar_bevfusion + +#endif // AUTOWARE__LIDAR_BEVFUSION__POSTPROCESS__POSTPROCESS_KERNEL_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/point_type.hpp b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/point_type.hpp new file mode 100644 index 0000000000000..35cf87e7d6c3d --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/point_type.hpp @@ -0,0 +1,35 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__LIDAR_BEVFUSION__PREPROCESS__POINT_TYPE_HPP_ +#define AUTOWARE__LIDAR_BEVFUSION__PREPROCESS__POINT_TYPE_HPP_ + +#include + +namespace autoware::lidar_bevfusion +{ + +struct InputPointType +{ + float x; + float y; + float z; + std::uint8_t intensity; + std::uint8_t return_type; + std::uint16_t channel; +}; + +} // namespace autoware::lidar_bevfusion + +#endif // AUTOWARE__LIDAR_BEVFUSION__PREPROCESS__POINT_TYPE_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/pointcloud_densification.hpp b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/pointcloud_densification.hpp new file mode 100644 index 0000000000000..c02ba430568cf --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/pointcloud_densification.hpp @@ -0,0 +1,104 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__LIDAR_BEVFUSION__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_ +#define AUTOWARE__LIDAR_BEVFUSION__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_ + +#include "autoware/lidar_bevfusion/preprocess/point_type.hpp" + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include + +namespace autoware::lidar_bevfusion +{ + +using autoware::cuda_utils::CudaUniquePtr; + +class DensificationParam +{ +public: + DensificationParam(const std::string & world_frame_id, const unsigned int num_past_frames) + : world_frame_id_(std::move(world_frame_id)), + pointcloud_cache_size_(num_past_frames + /*current frame*/ 1) + { + } + + std::string world_frame_id() const { return world_frame_id_; } + unsigned int pointcloud_cache_size() const { return pointcloud_cache_size_; } + +private: + std::string world_frame_id_; + unsigned int pointcloud_cache_size_{1}; +}; + +struct PointCloudWithTransform +{ + CudaUniquePtr data_d{nullptr}; + std_msgs::msg::Header header; + std::size_t num_points{0}; + Eigen::Affine3f affine_past2world; +}; + +class PointCloudDensification +{ +public: + explicit PointCloudDensification(const DensificationParam & param, cudaStream_t & stream); + + bool enqueuePointCloud( + const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer); + + double getCurrentTimestamp() const { return current_timestamp_; } + Eigen::Affine3f getAffineWorldToCurrent() const { return affine_world2current_; } + std::list::iterator getPointCloudCacheIter() + { + return pointcloud_cache_.begin(); + } + bool isCacheEnd(std::list::iterator iter) + { + return iter == pointcloud_cache_.end(); + } + std::size_t getIdx(std::list::iterator iter) + { + return std::distance(pointcloud_cache_.begin(), iter); + } + std::size_t getCacheSize() + { + return std::distance(pointcloud_cache_.begin(), pointcloud_cache_.end()); + } + unsigned int pointcloud_cache_size() const { return param_.pointcloud_cache_size(); } + +private: + void enqueue(const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine); + void dequeue(); + + DensificationParam param_; + double current_timestamp_{0.0}; + Eigen::Affine3f affine_world2current_; + std::list pointcloud_cache_; + cudaStream_t stream_; +}; + +} // namespace autoware::lidar_bevfusion + +#endif // AUTOWARE__LIDAR_BEVFUSION__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/precomputed_features.hpp b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/precomputed_features.hpp new file mode 100644 index 0000000000000..5962c0b0e6620 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/precomputed_features.hpp @@ -0,0 +1,89 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__LIDAR_BEVFUSION__PREPROCESS__PRECOMPUTED_FEATURES_HPP_ +#define AUTOWARE__LIDAR_BEVFUSION__PREPROCESS__PRECOMPUTED_FEATURES_HPP_ + +#include "autoware/lidar_bevfusion/bevfusion_config.hpp" + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace autoware::lidar_bevfusion +{ + +using Tensor1D = Eigen::Tensor; +using Tensor2D = Eigen::Tensor; +using Tensor3D = Eigen::Tensor; +using Tensor4D = Eigen::Tensor; +using Tensor5D = Eigen::Tensor; +using Tensor6D = Eigen::Tensor; +using Tensor7D = Eigen::Tensor; + +using TensorMap1D = Eigen::TensorMap; +using TensorMap2D = Eigen::TensorMap; +using TensorMap3D = Eigen::TensorMap; +using TensorMap4D = Eigen::TensorMap; +using TensorMap5D = Eigen::TensorMap; +using TensorMap6D = Eigen::TensorMap; +using TensorMap7D = Eigen::TensorMap; + +using Matrix4fRowM = Eigen::Matrix; +using Matrix3fRowM = Eigen::Matrix; + +using Vector3fRowM = Eigen::Matrix; + +Tensor4D create_frustum(const BEVFusionConfig & config); + +Tensor5D get_geometry( + const Tensor4D & frustum, // [D, H, W, 3] + const Tensor3D & camera2lidar_rots, // [N, 3, 3] + const Tensor2D & camera2lidar_trans, // [N, 3] + const Tensor3D & intrins_inverse, // [N, 3, 3] + const Tensor3D & post_rots_inverse, // [N, 3, 3] + const Tensor2D & post_trans // [N, 3] +); + +// Define the function +std::tuple< + Eigen::Matrix, + Eigen::Matrix, + Eigen::Matrix, + Eigen::Matrix > +bev_pool_aux(const Tensor5D & geom_feats_input, const BEVFusionConfig & config); + +std::tuple< + Eigen::VectorXf, // lidar2image + Eigen::Matrix, + Eigen::Matrix, + Eigen::Matrix, + Eigen::Matrix > +precompute_features( + const std::vector & lidar2camera_transforms, + const std::vector & camera_aug_matrices, + const std::vector & camera_info_vector, + const BEVFusionConfig & config); + +} // namespace autoware::lidar_bevfusion + +#endif // AUTOWARE__LIDAR_BEVFUSION__PREPROCESS__PRECOMPUTED_FEATURES_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/preprocess_kernel.hpp b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/preprocess_kernel.hpp new file mode 100644 index 0000000000000..c14ee72cfd1c3 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/preprocess_kernel.hpp @@ -0,0 +1,76 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/* + * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. + * All rights reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AUTOWARE__LIDAR_BEVFUSION__PREPROCESS__PREPROCESS_KERNEL_HPP_ +#define AUTOWARE__LIDAR_BEVFUSION__PREPROCESS__PREPROCESS_KERNEL_HPP_ + +#include "autoware/lidar_bevfusion/bevfusion_config.hpp" +#include "autoware/lidar_bevfusion/preprocess/point_type.hpp" +#include "autoware/lidar_bevfusion/utils.hpp" + +#include + +#include +#include + +#include +#include + +namespace autoware::lidar_bevfusion +{ + +class PreprocessCuda +{ +public: + PreprocessCuda(const BEVFusionConfig & config, cudaStream_t & stream, bool allocate_buffers); + + cudaError_t generateSweepPoints_launch( + const InputPointType * input_data, std::size_t points_size, float time_lag, + const float * transform, float * output_points); + + std::size_t generateVoxels( + const float * points, unsigned int points_size, float * voxel_features, + std::int32_t * voxel_coords, std::int32_t * num_points_per_voxel); + + cudaError_t resize_and_extract_roi_launch( + const std::uint8_t * input_img, std::uint8_t * output_img, int H, int W, int H2, int W2, int H3, + int W3, int y_start, int x_start, cudaStream_t stream); + +private: + BEVFusionConfig config_; + cudaStream_t stream_; + + tv::Tensor hash_key_value_; + tv::Tensor point_indice_data_; + tv::Tensor points_voxel_id_; +}; +} // namespace autoware::lidar_bevfusion + +#endif // AUTOWARE__LIDAR_BEVFUSION__PREPROCESS__PREPROCESS_KERNEL_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/voxel_generator.hpp b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/voxel_generator.hpp new file mode 100644 index 0000000000000..7b90c4e86de06 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/preprocess/voxel_generator.hpp @@ -0,0 +1,64 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__LIDAR_BEVFUSION__PREPROCESS__VOXEL_GENERATOR_HPP_ +#define AUTOWARE__LIDAR_BEVFUSION__PREPROCESS__VOXEL_GENERATOR_HPP_ + +#include "autoware/lidar_bevfusion/bevfusion_config.hpp" +#include "autoware/lidar_bevfusion/preprocess/pointcloud_densification.hpp" +#include "autoware/lidar_bevfusion/preprocess/preprocess_kernel.hpp" +#include "autoware/lidar_bevfusion/ros_utils.hpp" + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace autoware::lidar_bevfusion +{ + +class VoxelGenerator +{ +public: + explicit VoxelGenerator( + const DensificationParam & densification_param, const BEVFusionConfig & config, + cudaStream_t & stream); + std::size_t generateSweepPoints(CudaUniquePtr & points_d); + bool enqueuePointCloud( + const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer); + void initCloudInfo(const sensor_msgs::msg::PointCloud2 & msg); + std::tuple getFieldInfo( + const sensor_msgs::msg::PointCloud2 & msg, const std::string & field_name); + +private: + std::unique_ptr pd_ptr_{nullptr}; + std::unique_ptr pre_ptr_{nullptr}; + BEVFusionConfig config_; + CudaUniquePtr affine_past2current_d_{nullptr}; + std::vector points_; + cudaStream_t stream_; +}; + +} // namespace autoware::lidar_bevfusion + +#endif // AUTOWARE__LIDAR_BEVFUSION__PREPROCESS__VOXEL_GENERATOR_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/ros_utils.hpp b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/ros_utils.hpp new file mode 100644 index 0000000000000..62128024a15aa --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/ros_utils.hpp @@ -0,0 +1,67 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__LIDAR_BEVFUSION__ROS_UTILS_HPP_ +#define AUTOWARE__LIDAR_BEVFUSION__ROS_UTILS_HPP_ + +#include "autoware/lidar_bevfusion/preprocess/point_type.hpp" +#include "autoware/lidar_bevfusion/utils.hpp" + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#define CHECK_OFFSET(structure1, structure2, field) \ + static_assert( \ + offsetof(structure1, field) == offsetof(structure2, field), \ + "Offset of " #field " in " #structure1 " does not match the one in " #structure2 ".") +#define CHECK_TYPE(structure1, structure2, field) \ + static_assert( \ + std::is_same_v, \ + "Type of " #field " in " #structure1 " and " #structure1 " have different types.") +#define CHECK_FIELD(structure1, structure2, field) \ + CHECK_OFFSET(structure1, structure2, field); \ + CHECK_TYPE(structure1, structure2, field) + +namespace autoware::lidar_bevfusion +{ +using sensor_msgs::msg::PointField; + +CHECK_FIELD(InputPointType, autoware::point_types::PointXYZIRC, x); +CHECK_FIELD(InputPointType, autoware::point_types::PointXYZIRC, y); +CHECK_FIELD(InputPointType, autoware::point_types::PointXYZIRC, z); +CHECK_FIELD(InputPointType, autoware::point_types::PointXYZIRC, intensity); +static_assert(sizeof(InputPointType) == sizeof(autoware::point_types::PointXYZIRC)); + +// TODO(knzo25): will move this from the pointcloud preprocessor to aut autoware_point_types after +// this is merged +bool is_data_layout_compatible_with_point_xyzirc(const sensor_msgs::msg::PointCloud2 & input); + +void box3DToDetectedObject( + const Box3D & box3d, const std::vector & class_names, + autoware_perception_msgs::msg::DetectedObject & obj); + +uint8_t getSemanticType(const std::string & class_name); + +} // namespace autoware::lidar_bevfusion + +#endif // AUTOWARE__LIDAR_BEVFUSION__ROS_UTILS_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/utils.hpp b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/utils.hpp new file mode 100644 index 0000000000000..e0cbe9a7ebabf --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/utils.hpp @@ -0,0 +1,57 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__LIDAR_BEVFUSION__UTILS_HPP_ +#define AUTOWARE__LIDAR_BEVFUSION__UTILS_HPP_ + +#include +#include +#include +#include + +namespace autoware::lidar_bevfusion +{ + +struct Box3D +{ + int label; + float score; + float x; + float y; + float z; + float width; + float length; + float height; + float yaw; + float vx; + float vy; +}; + +// cspell: ignore divup +template +unsigned int divup(const T1 a, const T2 b) +{ + if (a == 0) { + throw std::runtime_error("A dividend of divup isn't positive."); + } + if (b == 0) { + throw std::runtime_error("A divisor of divup isn't positive."); + } + + return (a + b - 1) / b; +} + +} // namespace autoware::lidar_bevfusion + +#endif // AUTOWARE__LIDAR_BEVFUSION__UTILS_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/visibility_control.hpp b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/visibility_control.hpp new file mode 100644 index 0000000000000..96e64e770ded3 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/lidar_bevfusion/visibility_control.hpp @@ -0,0 +1,37 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +//    http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__LIDAR_BEVFUSION__VISIBILITY_CONTROL_HPP_ +#define AUTOWARE__LIDAR_BEVFUSION__VISIBILITY_CONTROL_HPP_ + +//////////////////////////////////////////////////////////////////////////////// +#if defined(__WIN32) +#if defined(LIDAR_BEVFUSION_BUILDING_DLL) || defined(LIDAR_BEVFUSION_EXPORTS) +#define LIDAR_BEVFUSION_PUBLIC __declspec(dllexport) +#define LIDAR_BEVFUSION_LOCAL +#else // defined(LIDAR_BEVFUSION_BUILDING_DLL) || defined(LIDAR_BEVFUSION_EXPORTS) +#define LIDAR_BEVFUSION_PUBLIC __declspec(dllimport) +#define LIDAR_BEVFUSION_LOCAL +#endif // defined(LIDAR_BEVFUSION_BUILDING_DLL) || defined(LIDAR_BEVFUSION_EXPORTS) +#elif defined(__linux__) +#define LIDAR_BEVFUSION_PUBLIC __attribute__((visibility("default"))) +#define LIDAR_BEVFUSION_LOCAL __attribute__((visibility("hidden"))) +#elif defined(__APPLE__) +#define LIDAR_BEVFUSION_PUBLIC __attribute__((visibility("default"))) +#define LIDAR_BEVFUSION_LOCAL __attribute__((visibility("hidden"))) +#else +#error "Unsupported Build Configuration" +#endif + +#endif // AUTOWARE__LIDAR_BEVFUSION__VISIBILITY_CONTROL_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin.hpp b/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin.hpp new file mode 100644 index 0000000000000..9e43e8fa8c894 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin.hpp @@ -0,0 +1,140 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__TENSORRT_PLUGINS__GET_INDICE_PAIRS_IMPLICIT_GEMM_PLUGIN_HPP_ +#define AUTOWARE__TENSORRT_PLUGINS__GET_INDICE_PAIRS_IMPLICIT_GEMM_PLUGIN_HPP_ + +#include +#include +#include + +#include +#include +#include +#include +#include + +constexpr char const * const kGET_INDICE_PAIRS_IMPLICIT_GEMM_PLUGIN_NAME{ + "GetIndicePairsImplicitGemm"}; +constexpr char const * const kGET_INDICE_PAIRS_IMPLICIT_GEMM_PLUGIN_VERSION{"1"}; +constexpr char const * const kGET_INDICE_PAIRS_IMPLICIT_GEMM_PLUGIN_NAMESPACE{""}; + +namespace nvinfer1 +{ +namespace plugin +{ + +struct GetIndicePairsImplicitGemmParameters +{ + std::int32_t batch_size; + std::int32_t algo; + std::int32_t is_train; + std::vector dilation; + std::vector ksize; + std::vector out_padding; + std::vector padding; + std::vector spatial_shape; + std::vector stride; + std::int32_t subm; + std::int32_t transpose; + + nvinfer1::Dims dilation_dims; + nvinfer1::Dims ksize_dims; + nvinfer1::Dims out_padding_dims; + nvinfer1::Dims padding_dims; + nvinfer1::Dims spatial_shape_dims; + nvinfer1::Dims stride_dims; +}; + +class GetIndicePairsImplicitGemmPlugin : public IPluginV3, + public IPluginV3OneCore, + public IPluginV3OneBuild, + public IPluginV3OneRuntime +{ +public: + GetIndicePairsImplicitGemmPlugin( + const std::string & name, GetIndicePairsImplicitGemmParameters const & params); + + ~GetIndicePairsImplicitGemmPlugin() override = default; + + // IPluginV3 Methods + + IPluginCapability * getCapabilityInterface(PluginCapabilityType type) noexcept override; + + IPluginV3 * clone() noexcept override; + + // IPluginV3OneCore Methods + + char const * getPluginName() const noexcept override; + + char const * getPluginVersion() const noexcept override; + + char const * getPluginNamespace() const noexcept override; + + // IPluginV3OneBuild Methods + + std::int32_t getNbOutputs() const noexcept override; + + std::int32_t configurePlugin( + DynamicPluginTensorDesc const * in, std::int32_t num_inputs, + DynamicPluginTensorDesc const * out, std::int32_t num_outputs) noexcept override; + + bool supportsFormatCombination( + std::int32_t pos, DynamicPluginTensorDesc const * in_out, std::int32_t num_inputs, + std::int32_t num_outputs) noexcept override; + + std::int32_t getOutputDataTypes( + DataType * output_types, std::int32_t num_outputs, DataType const * input_types, + std::int32_t num_inputs) const noexcept override; + + std::int32_t getOutputShapes( + DimsExprs const * inputs, std::int32_t num_inputs, DimsExprs const * shape_inputs, + std::int32_t num_shape_inputs, DimsExprs * outputs, std::int32_t num_outputs, + IExprBuilder & expr_builder) noexcept override; + + // IPluginV3OneRuntime Methods + + std::int32_t enqueue( + PluginTensorDesc const * input_desc, PluginTensorDesc const * output_desc, + void const * const * inputs, void * const * outputs, void * workspace, + cudaStream_t stream) noexcept override; + + std::int32_t onShapeChange( + PluginTensorDesc const * in, std::int32_t num_inputs, PluginTensorDesc const * out, + std::int32_t num_outputs) noexcept override; + + IPluginV3 * attachToContext(IPluginResourceContext * context) noexcept override; + + PluginFieldCollection const * getFieldsToSerialize() noexcept override; + + std::size_t getWorkspaceSize( + DynamicPluginTensorDesc const * inputs, std::int32_t num_inputs, + DynamicPluginTensorDesc const * outputs, std::int32_t num_outputs) const noexcept override; + +private: + void initFieldsToSerialize(); + + // upper bound of number of output indices. needed to bound memory usage. + static constexpr int out_inds_num_limit_{256000}; + + std::string layer_name_; + GetIndicePairsImplicitGemmParameters params_; + std::vector data_to_serialize_; + nvinfer1::PluginFieldCollection fc_to_serialize_; +}; + +} // namespace plugin +} // namespace nvinfer1 + +#endif // AUTOWARE__TENSORRT_PLUGINS__GET_INDICE_PAIRS_IMPLICIT_GEMM_PLUGIN_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin_creator.hpp b/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin_creator.hpp new file mode 100644 index 0000000000000..7b5eeb8cc1e9a --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin_creator.hpp @@ -0,0 +1,65 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__TENSORRT_PLUGINS__GET_INDICE_PAIRS_IMPLICIT_GEMM_PLUGIN_CREATOR_HPP_ +#define AUTOWARE__TENSORRT_PLUGINS__GET_INDICE_PAIRS_IMPLICIT_GEMM_PLUGIN_CREATOR_HPP_ + +#include "autoware/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin.hpp" + +#include + +#include + +namespace nvinfer1 +{ +namespace plugin +{ + +// Plugin factory class. +class GetIndicePairsImplicitGemmPluginCreator : public nvinfer1::IPluginCreatorV3One +{ +public: + GetIndicePairsImplicitGemmPluginCreator(); + + ~GetIndicePairsImplicitGemmPluginCreator() override = default; + + char const * getPluginNamespace() const noexcept override + { + return kGET_INDICE_PAIRS_IMPLICIT_GEMM_PLUGIN_NAMESPACE; + } + + char const * getPluginName() const noexcept override + { + return kGET_INDICE_PAIRS_IMPLICIT_GEMM_PLUGIN_NAME; + } + + char const * getPluginVersion() const noexcept override + { + return kGET_INDICE_PAIRS_IMPLICIT_GEMM_PLUGIN_VERSION; + } + + nvinfer1::PluginFieldCollection const * getFieldNames() noexcept override; + + IPluginV3 * createPlugin( + char const * name, PluginFieldCollection const * fc, TensorRTPhase phase) noexcept override; + +private: + nvinfer1::PluginFieldCollection fc_; + std::vector plugin_attributes_; +}; + +} // namespace plugin +} // namespace nvinfer1 + +#endif // AUTOWARE__TENSORRT_PLUGINS__GET_INDICE_PAIRS_IMPLICIT_GEMM_PLUGIN_CREATOR_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/implicit_gemm_plugin.hpp b/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/implicit_gemm_plugin.hpp new file mode 100644 index 0000000000000..4ddca60e6795f --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/implicit_gemm_plugin.hpp @@ -0,0 +1,138 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__TENSORRT_PLUGINS__IMPLICIT_GEMM_PLUGIN_HPP_ +#define AUTOWARE__TENSORRT_PLUGINS__IMPLICIT_GEMM_PLUGIN_HPP_ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +constexpr char const * const kIMPLICIT_GEMM_PLUGIN_NAME{"ImplicitGemm"}; +constexpr char const * const kIMPLICIT_GEMM_PLUGIN_VERSION{"1"}; +constexpr char const * const kIMPLICIT_GEMM_PLUGIN_NAMESPACE{""}; + +namespace nvinfer1 +{ +namespace plugin +{ + +struct ImplicitGemmParameters +{ + float act_alpha; + float act_beta; + std::int64_t is_subm; + std::int64_t is_train; + float output_add_scale; + float output_scale; +}; + +class ImplicitGemmPlugin : public IPluginV3, + public IPluginV3OneCore, + public IPluginV3OneBuild, + public IPluginV3OneRuntime +{ +public: + using ConvTunerSimple = spconvlib::spconv::csrc::sparse::convops::spops::ConvTuner; + ImplicitGemmPlugin(const std::string & name, ImplicitGemmParameters const & params); + + ~ImplicitGemmPlugin() override = default; + + // IPluginV3 Methods + + IPluginCapability * getCapabilityInterface(PluginCapabilityType type) noexcept override; + + IPluginV3 * clone() noexcept override; + + // IPluginV3OneCore Methods + + char const * getPluginName() const noexcept override; + + char const * getPluginVersion() const noexcept override; + + char const * getPluginNamespace() const noexcept override; + + // IPluginV3OneBuild Methods + + std::int32_t getNbOutputs() const noexcept override; + + std::int32_t configurePlugin( + DynamicPluginTensorDesc const * in, std::int32_t num_inputs, + DynamicPluginTensorDesc const * out, std::int32_t num_outputs) noexcept override; + + bool supportsFormatCombination( + std::int32_t pos, DynamicPluginTensorDesc const * in_out, std::int32_t num_inputs, + std::int32_t num_outputs) noexcept override; + + std::int32_t getOutputDataTypes( + DataType * output_types, std::int32_t num_outputs, DataType const * input_types, + std::int32_t num_inputs) const noexcept override; + + std::int32_t getOutputShapes( + DimsExprs const * inputs, std::int32_t num_inputs, DimsExprs const * shape_inputs, + std::int32_t num_shape_inputs, DimsExprs * outputs, std::int32_t num_outputs, + IExprBuilder & expr_builder) noexcept override; + + // IPluginV3OneRuntime Methods + + std::int32_t enqueue( + PluginTensorDesc const * input_desc, PluginTensorDesc const * output_desc, + void const * const * inputs, void * const * outputs, void * workspace, + cudaStream_t stream) noexcept override; + + std::int32_t onShapeChange( + PluginTensorDesc const * in, std::int32_t num_inputs, PluginTensorDesc const * out, + std::int32_t num_outputs) noexcept override; + + IPluginV3 * attachToContext(IPluginResourceContext * context) noexcept override; + + PluginFieldCollection const * getFieldsToSerialize() noexcept override; + + std::size_t getWorkspaceSize( + DynamicPluginTensorDesc const * inputs, std::int32_t num_inputs, + DynamicPluginTensorDesc const * outputs, std::int32_t num_outputs) const noexcept override; + +private: + static constexpr std::int32_t INOUT_IN_FEATURES_INDEX = 0; + static constexpr std::int32_t INOUT_FILTERS_INDEX = 1; + static constexpr std::int32_t INOUT_PAIR_FWD_INDEX = 2; + static constexpr std::int32_t INOUT_PAIR_MASK_FWD_SPLITS_INDEX = 3; + static constexpr std::int32_t INOUT_MASK_ARGSORT_FWD_SPLITS_INDEX = 4; + static constexpr std::int32_t INOUT_OUT_FEATURES_INDEX = 5; + + void initFieldsToSerialize(); + + std::string layer_name_; + ImplicitGemmParameters params_; + std::tuple arch_; + std::vector data_to_serialize_; + nvinfer1::PluginFieldCollection fc_to_serialize_; + + std::unique_ptr tunner_ptr_{}; +}; + +} // namespace plugin +} // namespace nvinfer1 + +#endif // AUTOWARE__TENSORRT_PLUGINS__IMPLICIT_GEMM_PLUGIN_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/implicit_gemm_plugin_creator.hpp b/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/implicit_gemm_plugin_creator.hpp new file mode 100644 index 0000000000000..c6450b2e9b1ab --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/implicit_gemm_plugin_creator.hpp @@ -0,0 +1,59 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__TENSORRT_PLUGINS__IMPLICIT_GEMM_PLUGIN_CREATOR_HPP_ +#define AUTOWARE__TENSORRT_PLUGINS__IMPLICIT_GEMM_PLUGIN_CREATOR_HPP_ + +#include "autoware/tensorrt_plugins/implicit_gemm_plugin.hpp" + +#include + +#include + +namespace nvinfer1 +{ +namespace plugin +{ + +// Plugin factory class. +class ImplicitGemmPluginCreator : public nvinfer1::IPluginCreatorV3One +{ +public: + ImplicitGemmPluginCreator(); + + ~ImplicitGemmPluginCreator() override = default; + + char const * getPluginNamespace() const noexcept override + { + return kIMPLICIT_GEMM_PLUGIN_NAMESPACE; + } + + char const * getPluginName() const noexcept override { return kIMPLICIT_GEMM_PLUGIN_NAME; } + + char const * getPluginVersion() const noexcept override { return kIMPLICIT_GEMM_PLUGIN_VERSION; } + + nvinfer1::PluginFieldCollection const * getFieldNames() noexcept override; + + IPluginV3 * createPlugin( + char const * name, PluginFieldCollection const * fc, TensorRTPhase phase) noexcept override; + +private: + nvinfer1::PluginFieldCollection fc_; + std::vector plugin_attributes_; +}; + +} // namespace plugin +} // namespace nvinfer1 + +#endif // AUTOWARE__TENSORRT_PLUGINS__IMPLICIT_GEMM_PLUGIN_CREATOR_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/plugin_registration.hpp b/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/plugin_registration.hpp new file mode 100644 index 0000000000000..4ce6e28a205aa --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/plugin_registration.hpp @@ -0,0 +1,29 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__TENSORRT_PLUGINS__PLUGIN_REGISTRATION_HPP_ +#define AUTOWARE__TENSORRT_PLUGINS__PLUGIN_REGISTRATION_HPP_ + +#include + +#include + +// These are the functions that TensorRT library will call at the runtime. + +extern "C" void setLoggerFinder(nvinfer1::ILoggerFinder * finder); + +extern "C" nvinfer1::IPluginCreatorInterface * const * getPluginCreators( + std::int32_t & num_creators); + +#endif // AUTOWARE__TENSORRT_PLUGINS__PLUGIN_REGISTRATION_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/plugin_utils.hpp b/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/plugin_utils.hpp new file mode 100644 index 0000000000000..c368f796b714d --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/plugin_utils.hpp @@ -0,0 +1,35 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__TENSORRT_PLUGINS__PLUGIN_UTILS_HPP_ +#define AUTOWARE__TENSORRT_PLUGINS__PLUGIN_UTILS_HPP_ + +#include + +#include +#include + +void caughtError(std::exception const & e); + +void logDebug(char const * msg); + +void logInfo(char const * msg); + +#define PLUGIN_ASSERT(val) reportAssertion((val), #val, __FILE__, __LINE__) +void reportAssertion(bool success, char const * msg, char const * file, std::int32_t line); + +#define PLUGIN_VALIDATE(val) reportValidation((val), #val, __FILE__, __LINE__) +void reportValidation(bool success, char const * msg, char const * file, std::int32_t line); + +#endif // AUTOWARE__TENSORRT_PLUGINS__PLUGIN_UTILS_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/quick_cumsum_cuda_plugin.hpp b/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/quick_cumsum_cuda_plugin.hpp new file mode 100644 index 0000000000000..e488b677ce90f --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/quick_cumsum_cuda_plugin.hpp @@ -0,0 +1,123 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__TENSORRT_PLUGINS__QUICK_CUMSUM_CUDA_PLUGIN_HPP_ +#define AUTOWARE__TENSORRT_PLUGINS__QUICK_CUMSUM_CUDA_PLUGIN_HPP_ + +#include +#include +#include + +#include +#include +#include +#include +#include + +constexpr char const * const kQUICK_CUMSUM_CUDA_PLUGIN_NAME{"QuickCumsumCuda"}; +constexpr char const * const kQUICK_CUMSUM_CUDA_PLUGIN_VERSION{"1"}; +constexpr char const * const kQUICK_CUMSUM_CUDA_PLUGIN_NAMESPACE{""}; + +namespace nvinfer1 +{ +namespace plugin +{ + +struct QuickCumsumCudaParameters +{ + std::int32_t batch_size; + std::int32_t dimension; + std::int32_t height; + std::int32_t width; +}; + +class QuickCumsumCudaPlugin : public IPluginV3, + public IPluginV3OneCore, + public IPluginV3OneBuild, + public IPluginV3OneRuntime +{ +public: + QuickCumsumCudaPlugin(const std::string & name, QuickCumsumCudaParameters const & params); + + ~QuickCumsumCudaPlugin() override = default; + + // IPluginV3 Methods + + IPluginCapability * getCapabilityInterface(PluginCapabilityType type) noexcept override; + + IPluginV3 * clone() noexcept override; + + // IPluginV3OneCore Methods + + char const * getPluginName() const noexcept override; + + char const * getPluginVersion() const noexcept override; + + char const * getPluginNamespace() const noexcept override; + + // IPluginV3OneBuild Methods + + std::int32_t getNbOutputs() const noexcept override; + + std::int32_t configurePlugin( + DynamicPluginTensorDesc const * in, std::int32_t num_inputs, + DynamicPluginTensorDesc const * out, std::int32_t num_outputs) noexcept override; + + bool supportsFormatCombination( + int32_t pos, DynamicPluginTensorDesc const * in_out, std::int32_t num_inputs, + std::int32_t num_outputs) noexcept override; + + std::int32_t getOutputDataTypes( + DataType * output_types, std::int32_t num_outputs, DataType const * input_types, + std::int32_t num_inputs) const noexcept override; + + std::int32_t getOutputShapes( + DimsExprs const * inputs, std::int32_t num_inputs, DimsExprs const * shape_inputs, + std::int32_t num_shape_inputs, DimsExprs * outputs, std::int32_t num_outputs, + IExprBuilder & expr_builder) noexcept override; + + // IPluginV3OneRuntime Methods + + std::int32_t enqueue( + PluginTensorDesc const * input_desc, PluginTensorDesc const * output_desc, + void const * const * inputs, void * const * outputs, void * workspace, + cudaStream_t stream) noexcept override; + + std::int32_t onShapeChange( + PluginTensorDesc const * in, std::int32_t num_inputs, PluginTensorDesc const * out, + std::int32_t num_outputs) noexcept override; + + IPluginV3 * attachToContext(IPluginResourceContext * context) noexcept override; + + PluginFieldCollection const * getFieldsToSerialize() noexcept override; + + std::size_t getWorkspaceSize( + DynamicPluginTensorDesc const * inputs, std::int32_t num_inputs, + DynamicPluginTensorDesc const * outputs, std::int32_t num_outputs) const noexcept override; + +private: + // TensorRT plugin parameters. + std::string layer_name_; + QuickCumsumCudaParameters params_; + + void initFieldsToSerialize(); + + std::vector data_to_serialize_; + nvinfer1::PluginFieldCollection fc_to_serialize_; +}; + +} // namespace plugin +} // namespace nvinfer1 + +#endif // AUTOWARE__TENSORRT_PLUGINS__QUICK_CUMSUM_CUDA_PLUGIN_HPP_ diff --git a/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/quick_cumsum_cuda_plugin_creator.hpp b/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/quick_cumsum_cuda_plugin_creator.hpp new file mode 100644 index 0000000000000..c7ddc4e6f9f0d --- /dev/null +++ b/perception/autoware_lidar_bevfusion/include/autoware/tensorrt_plugins/quick_cumsum_cuda_plugin_creator.hpp @@ -0,0 +1,62 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__TENSORRT_PLUGINS__QUICK_CUMSUM_CUDA_PLUGIN_CREATOR_HPP_ +#define AUTOWARE__TENSORRT_PLUGINS__QUICK_CUMSUM_CUDA_PLUGIN_CREATOR_HPP_ + +#include "autoware/tensorrt_plugins/quick_cumsum_cuda_plugin.hpp" + +#include + +#include + +namespace nvinfer1 +{ +namespace plugin +{ + +// Plugin factory class. +class QuickCumsumCudaPluginCreator : public nvinfer1::IPluginCreatorV3One +{ +public: + QuickCumsumCudaPluginCreator(); + + ~QuickCumsumCudaPluginCreator() override = default; + + char const * getPluginNamespace() const noexcept override + { + return kQUICK_CUMSUM_CUDA_PLUGIN_NAMESPACE; + } + + char const * getPluginName() const noexcept override { return kQUICK_CUMSUM_CUDA_PLUGIN_NAME; } + + char const * getPluginVersion() const noexcept override + { + return kQUICK_CUMSUM_CUDA_PLUGIN_VERSION; + } + + nvinfer1::PluginFieldCollection const * getFieldNames() noexcept override; + + IPluginV3 * createPlugin( + char const * name, PluginFieldCollection const * fc, TensorRTPhase phase) noexcept override; + +private: + nvinfer1::PluginFieldCollection fc_; + std::vector plugin_attributes_; +}; + +} // namespace plugin +} // namespace nvinfer1 + +#endif // AUTOWARE__TENSORRT_PLUGINS__QUICK_CUMSUM_CUDA_PLUGIN_CREATOR_HPP_ diff --git a/perception/autoware_lidar_bevfusion/launch/lidar_bevfusion.launch.xml b/perception/autoware_lidar_bevfusion/launch/lidar_bevfusion.launch.xml new file mode 100644 index 0000000000000..a2cecb2f72f62 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/launch/lidar_bevfusion.launch.xml @@ -0,0 +1,124 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/perception/autoware_lidar_bevfusion/lib/bevfusion_trt.cpp b/perception/autoware_lidar_bevfusion/lib/bevfusion_trt.cpp new file mode 100644 index 0000000000000..a7a00771d6b71 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/lib/bevfusion_trt.cpp @@ -0,0 +1,532 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/lidar_bevfusion/bevfusion_trt.hpp" + +#include "autoware/lidar_bevfusion/bevfusion_config.hpp" +#include "autoware/lidar_bevfusion/preprocess/point_type.hpp" +#include "autoware/lidar_bevfusion/preprocess/precomputed_features.hpp" +#include "autoware/lidar_bevfusion/preprocess/preprocess_kernel.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace autoware::lidar_bevfusion +{ + +BEVFusionTRT::BEVFusionTRT( + const tensorrt_common::TrtCommonConfig & trt_config, + const DensificationParam & densification_param, const BEVFusionConfig & config) +: config_(config) +{ + vg_ptr_ = std::make_unique(densification_param, config_, stream_); + + stop_watch_ptr_ = + std::make_unique>(); + stop_watch_ptr_->tic("processing/inner"); + + initPtr(); + initTrt(trt_config); + + CHECK_CUDA_ERROR(cudaStreamCreate(&stream_)); + + camera_streams_.resize(config_.num_cameras_); + for (std::int64_t i = 0; i < config_.num_cameras_; i++) { + CHECK_CUDA_ERROR(cudaStreamCreate(&camera_streams_[i])); + } +} + +BEVFusionTRT::~BEVFusionTRT() +{ + if (stream_) { + cudaStreamSynchronize(stream_); + cudaStreamDestroy(stream_); + } + + for (std::int64_t i = 0; i < config_.num_cameras_; i++) { + if (camera_streams_[i]) { + cudaStreamSynchronize(camera_streams_[i]); + cudaStreamDestroy(camera_streams_[i]); + } + } +} + +void BEVFusionTRT::initPtr() +{ + // point cloud to voxels + voxel_features_size_ = + config_.max_num_voxels_ * config_.max_points_per_voxel_ * config_.num_point_feature_size_; + voxel_coords_size_ = 3 * config_.max_num_voxels_; + + // output of TRT -- input of post-process + bbox_pred_size_ = config_.num_proposals_ * config_.num_box_values_; + label_pred_output_d_ = autoware::cuda_utils::make_unique(config_.num_proposals_); + bbox_pred_output_d_ = autoware::cuda_utils::make_unique(bbox_pred_size_); + score_output_d_ = autoware::cuda_utils::make_unique(config_.num_proposals_); + + // lidar branch + voxel_features_d_ = autoware::cuda_utils::make_unique(voxel_features_size_); + voxel_coords_d_ = autoware::cuda_utils::make_unique(voxel_coords_size_); + num_points_per_voxel_d_ = + autoware::cuda_utils::make_unique(config_.max_num_voxels_); + points_d_ = autoware::cuda_utils::make_unique( + config_.cloud_capacity_ * config_.num_point_feature_size_); + + // pre computed tensors + if (config_.sensor_fusion_) { + lidar2image_d_ = autoware::cuda_utils::make_unique(config_.num_cameras_ * 4 * 4); + std::int64_t num_geom_feats = config_.num_cameras_ * config_.features_height_ * + config_.features_width_ * config_.num_depth_features_; + geom_feats_d_ = autoware::cuda_utils::make_unique(4 * num_geom_feats); + kept_d_ = autoware::cuda_utils::make_unique(num_geom_feats); + ranks_d_ = autoware::cuda_utils::make_unique(num_geom_feats); + indices_d_ = autoware::cuda_utils::make_unique(num_geom_feats); + + // image branch + roi_tensor_d_ = autoware::cuda_utils::make_unique( + config_.num_cameras_ * config_.roi_height_ * config_.roi_width_ * 3); + for (std::int64_t camera_id = 0; camera_id < config_.num_cameras_; camera_id++) { + image_buffers_d_.emplace_back(autoware::cuda_utils::make_unique( + config_.raw_image_height_ * config_.raw_image_width_ * 3)); + } + camera_masks_d_ = autoware::cuda_utils::make_unique(config_.num_cameras_); + } + + pre_ptr_ = std::make_unique(config_, stream_, true); + post_ptr_ = std::make_unique(config_, stream_); +} + +void BEVFusionTRT::initTrt(const tensorrt_common::TrtCommonConfig & trt_config) +{ + std::vector network_io; + + // Lidar branch + network_io.emplace_back( + "voxels", + nvinfer1::Dims{3, {-1, config_.max_points_per_voxel_, config_.num_point_feature_size_}}); + network_io.emplace_back("num_points_per_voxel", nvinfer1::Dims{1, {-1}}); + network_io.emplace_back("coors", nvinfer1::Dims{2, {-1, 3}}); + + // Camera branch + if (config_.sensor_fusion_) { + network_io.emplace_back("points", nvinfer1::Dims{2, {-1, 5}}); + network_io.emplace_back("camera_mask", nvinfer1::Dims{1, {-1}}); + network_io.emplace_back( + "imgs", nvinfer1::Dims{4, {-1, 3, config_.roi_height_, config_.roi_width_}}); + network_io.emplace_back("lidar2image", nvinfer1::Dims{3, {-1, 4, 4}}); // 4x4 matrix + + network_io.emplace_back("geom_feats", nvinfer1::Dims{2, {-1, 4}}); + network_io.emplace_back("kept", nvinfer1::Dims{1, {-1}}); + network_io.emplace_back("ranks", nvinfer1::Dims{1, {-1}}); + network_io.emplace_back("indices", nvinfer1::Dims{1, {-1}}); + } + + // Outputs + network_io.emplace_back( + "bbox_pred", nvinfer1::Dims{2, {config_.num_box_values_, config_.num_proposals_}}); + network_io.emplace_back("score", nvinfer1::Dims{1, {config_.num_proposals_}}); + network_io.emplace_back("label_pred", nvinfer1::Dims{1, {config_.num_proposals_}}); + + std::vector profile_dims; + + // Lidar branch + profile_dims.emplace_back( + "voxels", + nvinfer1::Dims{ + 3, {config_.voxels_num_[0], config_.max_points_per_voxel_, config_.num_point_feature_size_}}, + nvinfer1::Dims{ + 3, {config_.voxels_num_[1], config_.max_points_per_voxel_, config_.num_point_feature_size_}}, + nvinfer1::Dims{ + 3, {config_.voxels_num_[2], config_.max_points_per_voxel_, config_.num_point_feature_size_}}); + + profile_dims.emplace_back( + "num_points_per_voxel", nvinfer1::Dims{1, {config_.voxels_num_[0]}}, + nvinfer1::Dims{1, {config_.voxels_num_[1]}}, nvinfer1::Dims{1, {config_.voxels_num_[2]}}); + + profile_dims.emplace_back( + "coors", nvinfer1::Dims{2, {config_.voxels_num_[0], 3}}, + nvinfer1::Dims{2, {config_.voxels_num_[1], 3}}, nvinfer1::Dims{2, {config_.voxels_num_[2], 3}}); + + // Camera branch + if (config_.sensor_fusion_) { + profile_dims.emplace_back( + "points", nvinfer1::Dims{2, {config_.voxels_num_[0], 5}}, + nvinfer1::Dims{2, {config_.voxels_num_[1], 5}}, + nvinfer1::Dims{2, {config_.voxels_num_[2], 5}}); + + profile_dims.emplace_back( + "camera_mask", nvinfer1::Dims{1, {1}}, nvinfer1::Dims{1, {config_.num_cameras_}}, + nvinfer1::Dims{1, {config_.num_cameras_}}); + + profile_dims.emplace_back( + "imgs", nvinfer1::Dims{4, {1, 3, config_.roi_height_, config_.roi_width_}}, + nvinfer1::Dims{4, {config_.num_cameras_, 3, config_.roi_height_, config_.roi_width_}}, + nvinfer1::Dims{4, {config_.num_cameras_, 3, config_.roi_height_, config_.roi_width_}}); + profile_dims.emplace_back( + "lidar2image", nvinfer1::Dims{3, {1, 4, 4}}, nvinfer1::Dims{3, {config_.num_cameras_, 4, 4}}, + nvinfer1::Dims{3, {config_.num_cameras_, 4, 4}}); + + const std::int64_t num_geom_feats = config_.num_cameras_ * config_.features_height_ * + config_.features_width_ * config_.num_depth_features_; + + profile_dims.emplace_back( + "geom_feats", nvinfer1::Dims{2, {0, 4}}, nvinfer1::Dims{2, {num_geom_feats, 4}}, + nvinfer1::Dims{2, {num_geom_feats, 4}}); + + profile_dims.emplace_back( + "kept", nvinfer1::Dims{1, {0}}, nvinfer1::Dims{1, {num_geom_feats}}, + nvinfer1::Dims{1, {num_geom_feats}}); + + profile_dims.emplace_back( + "ranks", nvinfer1::Dims{1, {0}}, nvinfer1::Dims{1, {num_geom_feats}}, + nvinfer1::Dims{1, {num_geom_feats}}); + + profile_dims.emplace_back( + "indices", nvinfer1::Dims{1, {0}}, nvinfer1::Dims{1, {num_geom_feats}}, + nvinfer1::Dims{1, {num_geom_feats}}); + } + + auto network_io_ptr = + std::make_unique>(network_io); + auto profile_dims_ptr = + std::make_unique>(profile_dims); + + network_trt_ptr_ = std::make_unique( + trt_config, std::make_shared(), + std::vector{config_.plugins_path_}); + + if (!network_trt_ptr_->setup(std::move(profile_dims_ptr), std::move(network_io_ptr))) { + throw std::runtime_error("Failed to setup TRT engine." + config_.plugins_path_); + } + + if (config_.sensor_fusion_) { + /* nvinfer1::Dims input_imgs_shape; + input_imgs_shape.nbDims = 4; + input_imgs_shape.d[0] = config_.num_cameras_; + input_imgs_shape.d[1] = 3; + input_imgs_shape.d[2] = config_.roi_height_; + input_imgs_shape.d[3] = config_.roi_width_; + + nvinfer1::Dims input_lidar2image_shape; + input_lidar2image_shape.nbDims = 3; + input_lidar2image_shape.d[0] = config_.num_cameras_; + input_lidar2image_shape.d[1] = 4; + input_lidar2image_shape.d[2] = 4; + + nvinfer1::Dims input_geom_feats_shape; + input_geom_feats_shape.nbDims = 2; + input_geom_feats_shape.d[0] = num_ranks_; + input_geom_feats_shape.d[1] = 4; + + nvinfer1::Dims input_kept_shape; + input_kept_shape.nbDims = 1; + input_kept_shape.d[0] = num_kept_; + + nvinfer1::Dims input_ranks_shape; + input_ranks_shape.nbDims = 1; + input_ranks_shape.d[0] = num_ranks_; + + nvinfer1::Dims input_indices_shape; + input_indices_shape.nbDims = 1; + input_indices_shape.d[0] = num_indices_; */ + + std::vector lidar2image_host(config_.num_cameras_ * 4 * 4); + cudaMemcpy( + lidar2image_host.data(), lidar2image_d_.get(), config_.num_cameras_ * 4 * 4 * sizeof(float), + cudaMemcpyDeviceToHost); + + network_trt_ptr_->setInputShape("camera_mask", nvinfer1::Dims{1, {config_.num_cameras_}}); + network_trt_ptr_->setInputShape( + "imgs", + nvinfer1::Dims{4, {config_.num_cameras_, 3, config_.roi_height_, config_.roi_width_}}); + network_trt_ptr_->setInputShape("lidar2image", nvinfer1::Dims{3, {config_.num_cameras_, 4, 4}}); + network_trt_ptr_->setInputShape("geom_feats", nvinfer1::Dims{2, {num_ranks_, 4}}); + network_trt_ptr_->setInputShape("kept", nvinfer1::Dims{1, {num_kept_}}); + network_trt_ptr_->setInputShape("ranks", nvinfer1::Dims{1, {num_ranks_}}); + network_trt_ptr_->setInputShape("indices", nvinfer1::Dims{1, {num_indices_}}); + } + + network_trt_ptr_->setTensorAddress("voxels", voxel_features_d_.get()); + network_trt_ptr_->setTensorAddress("num_points_per_voxel", num_points_per_voxel_d_.get()); + network_trt_ptr_->setTensorAddress("coors", voxel_coords_d_.get()); + + if (config_.sensor_fusion_) { + network_trt_ptr_->setTensorAddress("points", points_d_.get()); + network_trt_ptr_->setTensorAddress("camera_mask", camera_masks_d_.get()); + network_trt_ptr_->setTensorAddress("imgs", roi_tensor_d_.get()); + network_trt_ptr_->setTensorAddress("lidar2image", lidar2image_d_.get()); + network_trt_ptr_->setTensorAddress("geom_feats", geom_feats_d_.get()); + network_trt_ptr_->setTensorAddress("kept", kept_d_.get()); + network_trt_ptr_->setTensorAddress("ranks", ranks_d_.get()); + network_trt_ptr_->setTensorAddress("indices", indices_d_.get()); + } + + network_trt_ptr_->setTensorAddress("label_pred", label_pred_output_d_.get()); + network_trt_ptr_->setTensorAddress("bbox_pred", bbox_pred_output_d_.get()); + network_trt_ptr_->setTensorAddress("score", score_output_d_.get()); +} + +bool BEVFusionTRT::detect( + const sensor_msgs::msg::PointCloud2::ConstSharedPtr & pc_msg, + const std::vector & image_msgs, + const std::vector & camera_masks, const tf2_ros::Buffer & tf_buffer, + std::vector & det_boxes3d, std::unordered_map & proc_timing) +{ + stop_watch_ptr_->toc("processing/inner", true); + if (!preProcess(pc_msg, image_msgs, camera_masks, tf_buffer)) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_bevfusion"), "Pre-process failed. Skipping detection."); + return false; + } + proc_timing.emplace( + "debug/processing_time/preprocess_ms", stop_watch_ptr_->toc("processing/inner", true)); + + if (!inference()) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_bevfusion"), "Inference failed. Skipping detection."); + return false; + } + proc_timing.emplace( + "debug/processing_time/inference_ms", stop_watch_ptr_->toc("processing/inner", true)); + + if (!postProcess(det_boxes3d)) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_bevfusion"), "Post-process failed. Skipping detection"); + return false; + } + proc_timing.emplace( + "debug/processing_time/postprocess_ms", stop_watch_ptr_->toc("processing/inner", true)); + + return true; +} + +void BEVFusionTRT::setIntrinsicsExtrinsics( + std::vector & camera_info_vector, + std::vector & lidar2camera_vector) +{ + roi_start_y_vector_.clear(); + std::vector img_aug_matrices; + + for (std::int64_t i = 0; i < config_.num_cameras_; i++) { + float fx = camera_info_vector[i].p[0]; + float fy = camera_info_vector[i].p[5]; + float cx = camera_info_vector[i].p[2]; + float cy = camera_info_vector[i].p[6]; + + Matrix4fRowM camera2lidar_matrix = lidar2camera_vector[i].inverse(); + float r31 = camera2lidar_matrix(2, 0); + float r32 = camera2lidar_matrix(2, 1); + float r33 = camera2lidar_matrix(2, 2); + + float yl = cy + cx * (fy / fx) * (r31 / r32) - fy * (r33 / r32); + float yr = + cy + (cx - config_.raw_image_width_ + 1) * (fy / fx) * (r31 / r32) - fy * (r33 / r32); + float yh = std::max(0.0f, std::min(yr, yl)); + float yh_resized = yh * config_.img_aug_scale_y_; + int crop_h = static_cast( + std::min(yh_resized, static_cast(config_.resized_height_ - config_.roi_height_))); + + Matrix4fRowM img_aug_matrix = Matrix4fRowM::Identity(); + img_aug_matrix(0, 0) = config_.img_aug_scale_x_; + img_aug_matrix(1, 1) = config_.img_aug_scale_y_; + img_aug_matrix(1, 3) = -static_cast(crop_h); + + img_aug_matrices.push_back(img_aug_matrix); + roi_start_y_vector_.push_back(crop_h); + } + + auto [lidar2images_flattened, geom_feats, kept, ranks, indices] = + precompute_features(lidar2camera_vector, img_aug_matrices, camera_info_vector, config_); + + assert(static_cast(lidar2images_flattened.size()) == config_.num_cameras_ * 4 * 4); + + assert( + static_cast(geom_feats.size()) <= + config_.num_cameras_ * 4 * config_.features_height_ * config_.features_width_ * + config_.num_depth_features_); + assert( + static_cast(kept.size()) == config_.num_cameras_ * config_.features_height_ * + config_.features_width_ * + config_.num_depth_features_); + assert( + static_cast(ranks.size()) <= config_.num_cameras_ * config_.features_height_ * + config_.features_width_ * + config_.num_depth_features_); + assert( + static_cast(indices.size()) <= config_.num_cameras_ * config_.features_height_ * + config_.features_width_ * + config_.num_depth_features_); + + num_geom_feats_ = static_cast(geom_feats.size()); + num_kept_ = static_cast(kept.size()); + num_ranks_ = static_cast(ranks.size()); + num_indices_ = static_cast(indices.size()); + + assert(num_geom_feats_ == 4 * num_ranks_); + assert(num_ranks_ == num_indices_); + + cudaMemcpy( + lidar2image_d_.get(), lidar2images_flattened.data(), + config_.num_cameras_ * 4 * 4 * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy( + geom_feats_d_.get(), geom_feats.data(), num_geom_feats_ * sizeof(std::int32_t), + cudaMemcpyHostToDevice); + cudaMemcpy(kept_d_.get(), kept.data(), num_kept_ * sizeof(std::uint8_t), cudaMemcpyHostToDevice); + cudaMemcpy( + ranks_d_.get(), ranks.data(), num_ranks_ * sizeof(std::int64_t), cudaMemcpyHostToDevice); + cudaMemcpy( + indices_d_.get(), indices.data(), num_indices_ * sizeof(std::int64_t), cudaMemcpyHostToDevice); +} + +bool BEVFusionTRT::preProcess( + const sensor_msgs::msg::PointCloud2::ConstSharedPtr & pc_msg, + const std::vector & image_msgs, + const std::vector & camera_masks, const tf2_ros::Buffer & tf_buffer) +{ + using autoware::cuda_utils::clear_async; + + if (!is_data_layout_compatible_with_point_xyzirc(*pc_msg)) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_bevfusion"), "Invalid point type. Skipping detection."); + return false; + } + + if (!vg_ptr_->enqueuePointCloud(*pc_msg, tf_buffer)) { + return false; + } + + // TODO(knzo25): these should be able to be removed as they are filled by TensorRT + clear_async(label_pred_output_d_.get(), config_.num_proposals_, stream_); + clear_async(bbox_pred_output_d_.get(), bbox_pred_size_, stream_); + clear_async(score_output_d_.get(), config_.num_proposals_, stream_); + + clear_async(voxel_features_d_.get(), voxel_features_size_, stream_); + clear_async(voxel_coords_d_.get(), voxel_coords_size_, stream_); + clear_async(num_points_per_voxel_d_.get(), config_.max_num_voxels_, stream_); + clear_async(points_d_.get(), config_.cloud_capacity_ * config_.num_point_feature_size_, stream_); + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + + // TODO(knzo25): move this to each image callback + if (config_.sensor_fusion_) { + int start_x = + std::max(0, static_cast(config_.resized_width_) - static_cast(config_.roi_width_)) / + 2; + + for (std::int64_t camera_id = 0; camera_id < config_.num_cameras_; camera_id++) { + int start_y = roi_start_y_vector_[camera_id]; + cudaMemcpyAsync( + image_buffers_d_[camera_id].get(), image_msgs[camera_id]->data.data(), + config_.raw_image_height_ * config_.raw_image_width_ * 3, cudaMemcpyHostToDevice, stream_); + + pre_ptr_->resize_and_extract_roi_launch( + image_buffers_d_[camera_id].get(), + &roi_tensor_d_[camera_id * config_.roi_height_ * config_.roi_width_ * 3], + config_.raw_image_height_, config_.raw_image_width_, config_.resized_height_, + config_.resized_width_, config_.roi_height_, config_.roi_width_, start_y, start_x, + camera_streams_[camera_id]); + } + + cudaMemcpyAsync( + camera_masks_d_.get(), camera_masks.data(), config_.num_cameras_ * sizeof(float), + cudaMemcpyHostToDevice, stream_); + } + + const auto num_points = vg_ptr_->generateSweepPoints(points_d_); + + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + + auto num_voxels = static_cast(pre_ptr_->generateVoxels( + points_d_.get(), num_points, voxel_features_d_.get(), voxel_coords_d_.get(), + num_points_per_voxel_d_.get())); + + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + + if (num_voxels < config_.min_num_voxels_) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_bevfusion"), + "Too few voxels (" << num_voxels << ") for the actual optimization profile (" + << config_.min_num_voxels_ << ")"); + return false; + } + if (num_voxels > config_.max_num_voxels_) { + RCLCPP_ERROR_STREAM( + rclcpp::get_logger("lidar_bevfusion"), + "Actual number of voxels (" << num_voxels + << ") is over the limit for the actual optimization profile (" + << config_.max_num_voxels_ << "). Clipping to the limit."); + num_voxels = config_.max_num_voxels_; + } + + network_trt_ptr_->setInputShape( + "voxels", nvinfer1::Dims{ + 3, {num_voxels, config_.max_points_per_voxel_, config_.num_point_feature_size_}}); + network_trt_ptr_->setInputShape("num_points_per_voxel", nvinfer1::Dims{1, {num_voxels}}); + network_trt_ptr_->setInputShape("coors", nvinfer1::Dims{2, {num_voxels, 3}}); + + if (config_.sensor_fusion_) { + network_trt_ptr_->setInputShape( + "points", nvinfer1::Dims{2, {static_cast(num_points), 5}}); + network_trt_ptr_->setInputShape("geom_feats", nvinfer1::Dims{2, {num_ranks_, 4}}); + network_trt_ptr_->setInputShape("kept", nvinfer1::Dims{1, {num_kept_}}); + network_trt_ptr_->setInputShape("ranks", nvinfer1::Dims{1, {num_ranks_}}); + network_trt_ptr_->setInputShape("indices", nvinfer1::Dims{1, {num_indices_}}); + } + + for (std::int64_t i = 0; i < config_.num_cameras_; i++) { + cudaStreamSynchronize(camera_streams_[i]); + } + + return true; +} + +bool BEVFusionTRT::inference() +{ + auto status = network_trt_ptr_->enqueueV3(stream_); + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + + if (!status) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_bevfusion"), "Fail to enqueue and skip to detect."); + return false; + } + + return true; +} + +bool BEVFusionTRT::postProcess(std::vector & det_boxes3d) +{ + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + + CHECK_CUDA_ERROR(post_ptr_->generateDetectedBoxes3D_launch( + label_pred_output_d_.get(), bbox_pred_output_d_.get(), score_output_d_.get(), det_boxes3d, + stream_)); + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + return true; +} + +} // namespace autoware::lidar_bevfusion diff --git a/perception/autoware_lidar_bevfusion/lib/detection_class_remapper.cpp b/perception/autoware_lidar_bevfusion/lib/detection_class_remapper.cpp new file mode 100644 index 0000000000000..e16aa0f0177b0 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/lib/detection_class_remapper.cpp @@ -0,0 +1,74 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +namespace autoware::lidar_bevfusion +{ + +void DetectionClassRemapper::setParameters( + const std::vector & allow_remapping_by_area_matrix, + const std::vector & min_area_matrix, const std::vector & max_area_matrix) +{ + assert(allow_remapping_by_area_matrix.size() == min_area_matrix.size()); + assert(allow_remapping_by_area_matrix.size() == max_area_matrix.size()); + assert( + std::pow(static_cast(std::sqrt(min_area_matrix.size())), 2) == min_area_matrix.size()); + + num_labels_ = static_cast(std::sqrt(min_area_matrix.size())); + + Eigen::Map> + allow_remapping_by_area_matrix_tmp( + allow_remapping_by_area_matrix.data(), num_labels_, num_labels_); + allow_remapping_by_area_matrix_ = allow_remapping_by_area_matrix_tmp.transpose() + .cast(); // Eigen is column major by default + + Eigen::Map min_area_matrix_tmp( + min_area_matrix.data(), num_labels_, num_labels_); + min_area_matrix_ = min_area_matrix_tmp.transpose(); // Eigen is column major by default + + Eigen::Map max_area_matrix_tmp( + max_area_matrix.data(), num_labels_, num_labels_); + max_area_matrix_ = max_area_matrix_tmp.transpose(); // Eigen is column major by default + + min_area_matrix_ = min_area_matrix_.unaryExpr( + [](double v) { return std::isfinite(v) ? v : std::numeric_limits::max(); }); + max_area_matrix_ = max_area_matrix_.unaryExpr( + [](double v) { return std::isfinite(v) ? v : std::numeric_limits::max(); }); +} + +void DetectionClassRemapper::mapClasses(autoware_perception_msgs::msg::DetectedObjects & msg) +{ + for (auto & object : msg.objects) { + const float bev_area = object.shape.dimensions.x * object.shape.dimensions.y; + + for (auto & classification : object.classification) { + auto & label = classification.label; + + for (int i = 0; i < num_labels_; ++i) { + if ( + allow_remapping_by_area_matrix_(label, i) && bev_area >= min_area_matrix_(label, i) && + bev_area <= max_area_matrix_(label, i)) { + label = i; + break; + } + } + } + } +} + +} // namespace autoware::lidar_bevfusion diff --git a/perception/autoware_lidar_bevfusion/lib/postprocess/circle_nms_kernel.cu b/perception/autoware_lidar_bevfusion/lib/postprocess/circle_nms_kernel.cu new file mode 100644 index 0000000000000..d30bc06dfbe3d --- /dev/null +++ b/perception/autoware_lidar_bevfusion/lib/postprocess/circle_nms_kernel.cu @@ -0,0 +1,147 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Modified from +// https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms_kernel.cu + +/* +3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others) +Written by Shaoshuai Shi +All Rights Reserved 2019-2020. +*/ + +#include "autoware/lidar_bevfusion/postprocess/circle_nms_kernel.hpp" +#include "autoware/lidar_bevfusion/utils.hpp" + +#include + +#include + +#include + +namespace +{ +const std::size_t THREADS_PER_BLOCK_NMS = 16; +} // namespace + +namespace autoware::lidar_bevfusion +{ + +__device__ inline float dist2dPow(const Box3D * a, const Box3D * b) +{ + return powf(a->x - b->x, 2) + powf(a->y - b->y, 2); +} + +// cspell: ignore divup +__global__ void circleNMS_Kernel( + const Box3D * __restrict__ boxes, const std::size_t num_boxes3d, const std::size_t col_blocks, + const float dist2d_pow_threshold, std::uint64_t * __restrict__ mask) +{ + // params: boxes (N,) + // params: mask (N, divup(N/THREADS_PER_BLOCK_NMS)) + + const auto row_start = blockIdx.y; + const auto col_start = blockIdx.x; + + if (row_start > col_start) return; + + const std::size_t row_size = + fminf(num_boxes3d - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS); + const std::size_t col_size = + fminf(num_boxes3d - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS); + + __shared__ Box3D block_boxes[THREADS_PER_BLOCK_NMS]; + + if (threadIdx.x < col_size) { + block_boxes[threadIdx.x] = boxes[THREADS_PER_BLOCK_NMS * col_start + threadIdx.x]; + } + __syncthreads(); + + if (threadIdx.x < row_size) { + const std::size_t cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; + const Box3D * cur_box = boxes + cur_box_idx; + + std::uint64_t t = 0; + std::size_t start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (std::size_t i = start; i < col_size; i++) { + if (dist2dPow(cur_box, block_boxes + i) < dist2d_pow_threshold) { + t |= 1ULL << i; + } + } + mask[cur_box_idx * col_blocks + col_start] = t; + } +} + +cudaError_t circleNMS_launch( + const thrust::device_vector & boxes3d, const std::size_t num_boxes3d, + std::size_t col_blocks, const float distance_threshold, + thrust::device_vector & mask, cudaStream_t stream) +{ + const float dist2d_pow_thres = powf(distance_threshold, 2); + + dim3 blocks(col_blocks, col_blocks); + dim3 threads(THREADS_PER_BLOCK_NMS); + circleNMS_Kernel<<>>( + thrust::raw_pointer_cast(boxes3d.data()), num_boxes3d, col_blocks, dist2d_pow_thres, + thrust::raw_pointer_cast(mask.data())); + + return cudaGetLastError(); +} + +std::size_t circleNMS( + thrust::device_vector & boxes3d, const float distance_threshold, + thrust::device_vector & keep_mask, cudaStream_t stream) +{ + const auto num_boxes3d = boxes3d.size(); + const auto col_blocks = divup(num_boxes3d, THREADS_PER_BLOCK_NMS); + thrust::device_vector mask_d(num_boxes3d * col_blocks); + + CHECK_CUDA_ERROR( + circleNMS_launch(boxes3d, num_boxes3d, col_blocks, distance_threshold, mask_d, stream)); + + // memcpy device to host + thrust::host_vector mask_h(mask_d.size()); + thrust::copy(mask_d.begin(), mask_d.end(), mask_h.begin()); + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream)); + + // generate keep_mask + std::vector remv_h(col_blocks); + thrust::host_vector keep_mask_h(keep_mask.size()); + std::size_t num_to_keep = 0; + for (std::size_t i = 0; i < num_boxes3d; i++) { + auto nblock = i / THREADS_PER_BLOCK_NMS; + auto inblock = i % THREADS_PER_BLOCK_NMS; + + if (!(remv_h[nblock] & (1ULL << inblock))) { + keep_mask_h[i] = true; + num_to_keep++; + std::uint64_t * p = &mask_h[0] + i * col_blocks; + for (std::size_t j = nblock; j < col_blocks; j++) { + remv_h[j] |= p[j]; + } + } else { + keep_mask_h[i] = false; + } + } + + // memcpy host to device + keep_mask = keep_mask_h; + + return num_to_keep; +} + +} // namespace autoware::lidar_bevfusion diff --git a/perception/autoware_lidar_bevfusion/lib/postprocess/non_maximum_suppression.cpp b/perception/autoware_lidar_bevfusion/lib/postprocess/non_maximum_suppression.cpp new file mode 100644 index 0000000000000..dd64a73890247 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/lib/postprocess/non_maximum_suppression.cpp @@ -0,0 +1,114 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/lidar_bevfusion/postprocess/non_maximum_suppression.hpp" + +#include +#include +#include + +#include +#include +#include + +namespace autoware::lidar_bevfusion +{ + +void NonMaximumSuppression::setParameters(const NMSParams & params) +{ + assert(params.search_distance_2d_ >= 0.0); + assert(params.iou_threshold_ >= 0.0 && params.iou_threshold_ <= 1.0); + + params_ = params; + target_class_mask_ = classNamesToBooleanMask(params.target_class_names_); +} + +bool NonMaximumSuppression::isTargetLabel(const std::uint8_t label) +{ + if (label >= target_class_mask_.size()) { + return false; + } + + assert(label < target_class_mask_.size()); + + return target_class_mask_.at(label); +} + +bool NonMaximumSuppression::isTargetPairObject( + const DetectedObject & object1, const DetectedObject & object2) +{ + const auto label1 = + autoware::object_recognition_utils::getHighestProbLabel(object1.classification); + const auto label2 = + autoware::object_recognition_utils::getHighestProbLabel(object2.classification); + + if (isTargetLabel(label1) && isTargetLabel(label2)) { + return true; + } + + const auto search_sqr_dist_2d = params_.search_distance_2d_ * params_.search_distance_2d_; + const auto sqr_dist_2d = autoware::universe_utils::calcSquaredDistance2d( + autoware::object_recognition_utils::getPose(object1), + autoware::object_recognition_utils::getPose(object2)); + return sqr_dist_2d <= search_sqr_dist_2d; +} + +Eigen::MatrixXd NonMaximumSuppression::generateIoUMatrix( + const std::vector & input_objects) +{ + // NOTE: row = target objects to be suppressed, col = source objects to be compared + Eigen::MatrixXd triangular_matrix = + Eigen::MatrixXd::Zero(input_objects.size(), input_objects.size()); + for (std::size_t target_i = 0; target_i < input_objects.size(); ++target_i) { + for (std::size_t source_i = 0; source_i < target_i; ++source_i) { + const auto & target_obj = input_objects.at(target_i); + const auto & source_obj = input_objects.at(source_i); + if (!isTargetPairObject(target_obj, source_obj)) { + continue; + } + + if (params_.nms_type_ == NMS_TYPE::IoU_BEV) { + const double iou = autoware::object_recognition_utils::get2dIoU(target_obj, source_obj); + triangular_matrix(target_i, source_i) = iou; + // NOTE: If the target object has any objects with iou > iou_threshold, it + // will be suppressed regardless of later results. + if (iou > params_.iou_threshold_) { + break; + } + } + } + } + + return triangular_matrix; +} + +std::vector NonMaximumSuppression::apply( + const std::vector & input_objects) +{ + Eigen::MatrixXd iou_matrix = generateIoUMatrix(input_objects); + + std::vector output_objects; + output_objects.reserve(input_objects.size()); + for (std::size_t i = 0; i < input_objects.size(); ++i) { + const auto value = iou_matrix.row(i).maxCoeff(); + if (params_.nms_type_ == NMS_TYPE::IoU_BEV) { + if (value <= params_.iou_threshold_) { + output_objects.emplace_back(input_objects.at(i)); + } + } + } + + return output_objects; +} +} // namespace autoware::lidar_bevfusion diff --git a/perception/autoware_lidar_bevfusion/lib/postprocess/postprocess_kernel.cu b/perception/autoware_lidar_bevfusion/lib/postprocess/postprocess_kernel.cu new file mode 100644 index 0000000000000..9ca15ba19e46d --- /dev/null +++ b/perception/autoware_lidar_bevfusion/lib/postprocess/postprocess_kernel.cu @@ -0,0 +1,141 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/lidar_bevfusion/postprocess/circle_nms_kernel.hpp" +#include "autoware/lidar_bevfusion/postprocess/postprocess_kernel.hpp" + +#include +#include +#include +#include + +#include + +namespace autoware::lidar_bevfusion +{ +struct is_score_greater +{ + is_score_greater(float t) : t_(t) {} + + __device__ bool operator()(const Box3D & b) { return b.score > t_; } + +private: + float t_{0.0}; +}; + +struct is_kept +{ + __device__ bool operator()(const bool keep) { return keep; } +}; + +struct score_greater +{ + __device__ bool operator()(const Box3D & lb, const Box3D & rb) { return lb.score > rb.score; } +}; + +__device__ inline float sigmoid(float x) +{ + return 1.0f / (1.0f + expf(-x)); +} + +__global__ void generateBoxes3D_kernel( + const std::int64_t * __restrict__ label_pred_output, const float * __restrict__ bbox_pred_output, + const float * __restrict__ score_output, const float voxel_size_x, const float voxel_size_y, + const float min_x_range, const float min_y_range, const int num_proposals, + const float out_size_factor, const float * __restrict__ yaw_norm_thresholds, + Box3D * __restrict__ det_boxes3d) +{ + int point_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (point_idx >= num_proposals) { + return; + } + + const float yaw_sin = bbox_pred_output[6 * num_proposals + point_idx]; + const float yaw_cos = bbox_pred_output[7 * num_proposals + point_idx]; + const float yaw_norm = sqrtf(yaw_sin * yaw_sin + yaw_cos * yaw_cos); + const int label = static_cast(label_pred_output[point_idx]); + + det_boxes3d[point_idx].label = label; + det_boxes3d[point_idx].score = + yaw_norm >= yaw_norm_thresholds[label] ? score_output[point_idx] : 0.f; + + det_boxes3d[point_idx].x = + bbox_pred_output[0 * num_proposals + point_idx] * out_size_factor * voxel_size_x + min_x_range; + det_boxes3d[point_idx].y = + bbox_pred_output[1 * num_proposals + point_idx] * out_size_factor * voxel_size_y + min_y_range; + det_boxes3d[point_idx].z = bbox_pred_output[2 * num_proposals + point_idx]; + det_boxes3d[point_idx].length = expf(bbox_pred_output[3 * num_proposals + point_idx]); + det_boxes3d[point_idx].width = expf(bbox_pred_output[4 * num_proposals + point_idx]); + det_boxes3d[point_idx].height = expf(bbox_pred_output[5 * num_proposals + point_idx]); + det_boxes3d[point_idx].yaw = atan2f(yaw_sin, yaw_cos); + det_boxes3d[point_idx].vx = bbox_pred_output[8 * num_proposals + point_idx]; + det_boxes3d[point_idx].vy = bbox_pred_output[9 * num_proposals + point_idx]; +} + +PostprocessCuda::PostprocessCuda(const BEVFusionConfig & config, cudaStream_t & stream) +: config_(config), stream_(stream) +{ +} + +// cspell: ignore divup +cudaError_t PostprocessCuda::generateDetectedBoxes3D_launch( + const std::int64_t * label_pred_output, const float * bbox_pred_output, + const float * score_output, std::vector & det_boxes3d, cudaStream_t stream) +{ + dim3 threads = {config_.threads_per_block_}; + dim3 blocks = {divup(config_.num_proposals_, threads.x)}; + + auto boxes3d_d = thrust::device_vector(config_.num_proposals_); + auto yaw_norm_thresholds_d = thrust::device_vector( + config_.yaw_norm_thresholds_.begin(), config_.yaw_norm_thresholds_.end()); + + generateBoxes3D_kernel<<>>( + label_pred_output, bbox_pred_output, score_output, config_.voxel_x_size_, config_.voxel_y_size_, + config_.min_x_range_, config_.min_y_range_, config_.num_proposals_, config_.out_size_factor_, + thrust::raw_pointer_cast(yaw_norm_thresholds_d.data()), + thrust::raw_pointer_cast(boxes3d_d.data())); + + // suppress by score + const auto num_det_boxes3d = thrust::count_if( + thrust::device, boxes3d_d.begin(), boxes3d_d.end(), is_score_greater(config_.score_threshold_)); + + if (num_det_boxes3d == 0) { + return cudaGetLastError(); + } + + thrust::device_vector det_boxes3d_d(num_det_boxes3d); + thrust::copy_if( + thrust::device, boxes3d_d.begin(), boxes3d_d.end(), det_boxes3d_d.begin(), + is_score_greater(config_.score_threshold_)); + + // sort by score + thrust::sort(det_boxes3d_d.begin(), det_boxes3d_d.end(), score_greater()); + + // supress by NMS + thrust::device_vector final_keep_mask_d(num_det_boxes3d); + const auto num_final_det_boxes3d = + circleNMS(det_boxes3d_d, config_.circle_nms_dist_threshold_, final_keep_mask_d, stream); + thrust::device_vector final_det_boxes3d_d(num_final_det_boxes3d); + thrust::copy_if( + thrust::device, det_boxes3d_d.begin(), det_boxes3d_d.end(), final_keep_mask_d.begin(), + final_det_boxes3d_d.begin(), is_kept()); + + // memcpy device to host + det_boxes3d.resize(num_final_det_boxes3d); + thrust::copy(final_det_boxes3d_d.begin(), final_det_boxes3d_d.end(), det_boxes3d.begin()); + + return cudaGetLastError(); +} + +} // namespace autoware::lidar_bevfusion diff --git a/perception/autoware_lidar_bevfusion/lib/preprocess/pointcloud_densification.cpp b/perception/autoware_lidar_bevfusion/lib/preprocess/pointcloud_densification.cpp new file mode 100644 index 0000000000000..1b25ac1ce3577 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/lib/preprocess/pointcloud_densification.cpp @@ -0,0 +1,111 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/lidar_bevfusion/preprocess/pointcloud_densification.hpp" + +#include +#include + +#include + +#include + +#include +#include + +namespace +{ + +boost::optional getTransform( + const tf2_ros::Buffer & tf_buffer, const std::string & target_frame_id, + const std::string & source_frame_id, const rclcpp::Time & time) +{ + try { + geometry_msgs::msg::TransformStamped transform_stamped; + transform_stamped = tf_buffer.lookupTransform( + target_frame_id, source_frame_id, time, rclcpp::Duration::from_seconds(0.5)); + return transform_stamped.transform; + } catch (tf2::TransformException & ex) { + RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_bevfusion"), ex.what()); + return boost::none; + } +} + +Eigen::Affine3f transformToEigen(const geometry_msgs::msg::Transform & t) +{ + Eigen::Affine3f a; + a.matrix() = tf2::transformToEigen(t).matrix().cast(); + return a; +} + +} // namespace + +namespace autoware::lidar_bevfusion +{ + +PointCloudDensification::PointCloudDensification( + const DensificationParam & param, cudaStream_t & stream) +: param_(param), stream_(stream) +{ +} + +bool PointCloudDensification::enqueuePointCloud( + const sensor_msgs::msg::PointCloud2 & pointcloud_msg, const tf2_ros::Buffer & tf_buffer) +{ + const auto header = pointcloud_msg.header; + + if (param_.pointcloud_cache_size() > 1) { + auto transform_world2current = + getTransform(tf_buffer, header.frame_id, param_.world_frame_id(), header.stamp); + if (!transform_world2current) { + return false; + } + auto affine_world2current = transformToEigen(transform_world2current.get()); + + enqueue(pointcloud_msg, affine_world2current); + } else { + enqueue(pointcloud_msg, Eigen::Affine3f::Identity()); + } + + dequeue(); + + return true; +} + +void PointCloudDensification::enqueue( + const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine_world2current) +{ + affine_world2current_ = affine_world2current; + current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds(); + + auto data_d = autoware::cuda_utils::make_unique(msg.width * msg.height); + + CHECK_CUDA_ERROR(cudaMemcpyAsync( + data_d.get(), msg.data.data(), sizeof(std::uint8_t) * msg.width * msg.height * msg.point_step, + cudaMemcpyHostToDevice, stream_)); + + PointCloudWithTransform pointcloud = { + std::move(data_d), msg.header, msg.width * msg.height, affine_world2current.inverse()}; + + pointcloud_cache_.push_front(std::move(pointcloud)); +} + +void PointCloudDensification::dequeue() +{ + if (pointcloud_cache_.size() > param_.pointcloud_cache_size()) { + pointcloud_cache_.pop_back(); + } +} + +} // namespace autoware::lidar_bevfusion diff --git a/perception/autoware_lidar_bevfusion/lib/preprocess/precomputed_features.cpp b/perception/autoware_lidar_bevfusion/lib/preprocess/precomputed_features.cpp new file mode 100644 index 0000000000000..044f032cfab23 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/lib/preprocess/precomputed_features.cpp @@ -0,0 +1,376 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/lidar_bevfusion/preprocess/precomputed_features.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace autoware::lidar_bevfusion +{ + +Tensor4D create_frustum(const BEVFusionConfig & config) +{ + const float dbound_start = config.dbound_[0]; + const float dbound_end = config.dbound_[1]; + const float dbound_step = config.dbound_[2]; + const int roi_height = static_cast(config.roi_height_); // Image height + const int roi_width = static_cast(config.roi_width_); // Image width + const int features_height = static_cast(config.features_height_); // Feature height + const int features_width = static_cast(config.features_width_); // Feature width + + // Compute D (number of depth layers) + int D = static_cast(std::floor((dbound_end - dbound_start) / dbound_step)); + assert(static_cast(D) == config.num_depth_features_); + + Tensor1D ds_vec(D); + for (Eigen::Index i = 0; i < D; ++i) { + ds_vec(i) = dbound_start + i * dbound_step; + } + + // Create xs_lin and ys_lin tensors + Tensor1D xs_lin(features_width); + for (Eigen::Index i = 0; i < features_width; ++i) { + xs_lin(i) = static_cast(i) * (roi_width - 1) / (features_width - 1); + } + + Tensor1D ys_lin(features_height); + for (Eigen::Index i = 0; i < features_height; ++i) { + ys_lin(i) = static_cast(i) * (roi_height - 1) / (features_height - 1); + } + + // Reshape and broadcast ds_vec to (D, features_height, features_width) + Tensor3D ds_tensor = ds_vec.reshape(Eigen::array{D, 1, 1}); + Tensor3D ds_broadcast = + ds_tensor.broadcast(Eigen::array{1, features_height, features_width}); + + // Reshape and broadcast xs_lin to (1, 1, features_width) + Tensor3D xs_tensor = xs_lin.reshape(Eigen::array{1, 1, features_width}); + Tensor3D xs_broadcast = xs_tensor.broadcast(Eigen::array{D, features_height, 1}); + + // Reshape and broadcast ys_lin to (1, features_height, 1) + Tensor3D ys_tensor = ys_lin.reshape(Eigen::array{1, features_height, 1}); + Tensor3D ys_broadcast = ys_tensor.broadcast(Eigen::array{D, 1, features_width}); + + // Stack xs, ys, ds along the last dimension to form frustum + Tensor4D frustum(D, features_height, features_width, 3); + frustum.chip(0, 3) = xs_broadcast; + frustum.chip(1, 3) = ys_broadcast; + frustum.chip(2, 3) = ds_broadcast; + + return frustum; +} + +Tensor5D get_geometry( + const Tensor4D & frustum, // [D, H, W, 3] + const Tensor3D & camera2lidar_rots, // [N, 3, 3] + const Tensor2D & camera2lidar_trans, // [N, 3] + const Tensor3D & intrins_inverse, // [N, 3, 3] + const Tensor3D & post_rots_inverse, // [N, 3, 3] + const Tensor2D & post_trans // [N, 3] +) +{ + // Get dimensions + int N = camera2lidar_trans.dimension(0); + int D = frustum.dimension(0); + int H = frustum.dimension(1); + int W = frustum.dimension(2); + + // Reshape and broadcast frustum to [N, D, H, W, 3] + Tensor5D frustum_broadcast = frustum.reshape(Eigen::array{1, D, H, W, 3}) + .broadcast(Eigen::array{N, 1, 1, 1, 1}); + + // Reshape post_trans to [N, 1, 1, 1, 3] + Tensor5D post_trans_broadcast = post_trans.reshape(Eigen::array{N, 1, 1, 1, 3}) + .broadcast(Eigen::array{1, D, H, W, 1}); + + // Subtract post_trans from frustum + Tensor5D points = frustum_broadcast - post_trans_broadcast; + + // Unsqueeze points to [N, D, H, W, 3, 1] + Tensor6D points_unsqueezed = points.reshape(Eigen::array{N, D, H, W, 3, 1}); + + Tensor5D points_rotated(N, D, H, W, 3); + + for (Eigen::Index camera_index = 0; camera_index < N; camera_index++) { + Tensor2D post_rot_inverse = post_rots_inverse.chip(camera_index, 0); + Tensor4D points_sliced = points.chip(camera_index, 0); + + Eigen::array, 1> contract_dims = {Eigen::IndexPair(3, 0)}; + Eigen::array shuffling({1, 0}); + Tensor2D post_rot_inverse_transposed = post_rot_inverse.shuffle(shuffling); + + Tensor4D points_sliced_rotated = + points_sliced.contract(post_rot_inverse_transposed, contract_dims); + points_rotated.chip(camera_index, 0) = points_sliced_rotated; + } + + // Remove the last dimension (size 1) + Tensor5D points_squeezed = points_rotated; + + Tensor5D points_xy = points_squeezed.slice( + Eigen::array{0, 0, 0, 0, 0}, Eigen::array{N, D, H, W, 2}); + Tensor5D points_z = points_squeezed.slice( + Eigen::array{0, 0, 0, 0, 2}, Eigen::array{N, D, H, W, 1}); + Tensor5D points_xy_scaled = + points_xy * points_z.broadcast(Eigen::array{1, 1, 1, 1, 2}); + + // Concatenate points_xy_scaled and points_z along the last dimension + Tensor5D points_cat(points_squeezed.dimensions()); + points_cat.slice( + Eigen::array{0, 0, 0, 0, 0}, Eigen::array{N, D, H, W, 2}) = + points_xy_scaled; + points_cat.slice( + Eigen::array{0, 0, 0, 0, 2}, Eigen::array{N, D, H, W, 1}) = + points_z; + + Tensor3D combine(intrins_inverse.dimensions()); + + for (Eigen::Index camera_index = 0; camera_index < N; camera_index++) { + Tensor2D camera2lidar_rot = camera2lidar_rots.chip(camera_index, 0); + Tensor2D intrins_inv = intrins_inverse.chip(camera_index, 0); + + Eigen::array, 1> contract_dims = {Eigen::IndexPair(1, 0)}; + + Tensor2D combine_sliced = camera2lidar_rot.contract(intrins_inv, contract_dims); + combine.chip(camera_index, 0) = combine_sliced; + } + + // Reshape combine to [N, 1, 1, 1, 3, 3] + Tensor6D combine_reshaped = combine.reshape(Eigen::array{N, 1, 1, 1, 3, 3}); + + Tensor5D points_transformed(points_cat.dimensions()); + + for (Eigen::Index camera_index = 0; camera_index < N; camera_index++) { + Tensor2D combine_sliced = combine.chip(camera_index, 0); + Tensor4D points_sliced = points_cat.chip(camera_index, 0); + + Eigen::array, 1> contract_dims = {Eigen::IndexPair(3, 0)}; + Eigen::array shuffling({1, 0}); + Tensor2D combined_transposed = combine_sliced.shuffle(shuffling); + + Tensor4D points_transformed_sliced = points_sliced.contract(combined_transposed, contract_dims); + points_transformed.chip(camera_index, 0) = points_transformed_sliced; + } + + // Squeeze the last dimension + Tensor5D points_squeezed_final = + points_transformed.reshape(Eigen::array{N, D, H, W, 3}); + + // Add camera2lidar_trans + Tensor5D camera2lidar_trans_broacast = + camera2lidar_trans.reshape(Eigen::array{N, 1, 1, 1, 3}) + .broadcast(Eigen::array{1, D, H, W, 1}); + ; + Tensor5D points_final = points_squeezed_final + camera2lidar_trans_broacast; + + return points_final; +} + +// Define the function +std::tuple< + Eigen::Matrix, // geom_feats + Eigen::Matrix, // kept + Eigen::Matrix, // ranks + Eigen::Matrix // indices +> bev_pool_aux( + const Tensor5D& geom_feats_input, + const BEVFusionConfig & config) +{ + Eigen::Vector3f dx(config.xbound_[2], config.ybound_[2], config.zbound_[2]); + Eigen::Vector3f bx( + config.xbound_[0] + config.xbound_[2] / 2.0, config.ybound_[0] + config.ybound_[2] / 2.0, + config.zbound_[0] + config.zbound_[2] / 2.0); + Eigen::Vector3i nx( + static_cast((config.xbound_[1] - config.xbound_[0]) / config.xbound_[2]), + static_cast((config.ybound_[1] - config.ybound_[0]) / config.ybound_[2]), + static_cast((config.zbound_[1] - config.zbound_[0]) / config.zbound_[2])); + + // Get dimensions + Eigen::Index N = geom_feats_input.dimension(0); + Eigen::Index D = geom_feats_input.dimension(1); + Eigen::Index H = geom_feats_input.dimension(2); + Eigen::Index W = geom_feats_input.dimension(3); + Eigen::Index C = geom_feats_input.dimension(4); + assert(C == 3); + + Eigen::Index Nprime = N * D * H * W; + + TensorMap1D dx_map(dx.data(), 3); + TensorMap1D bx_map(bx.data(), 3); + + Tensor5D dx_broadcast = dx_map.reshape(Eigen::array{1, 1, 1, 1, C}) + .broadcast(Eigen::array{N, D, H, W, 1}); + + Tensor5D bx_broadcast = bx_map.reshape(Eigen::array{1, 1, 1, 1, 3}) + .broadcast(Eigen::array{N, D, H, W, 1}); + + Tensor5D geom_feats_aux = (geom_feats_input - (bx_broadcast - 0.5 * dx_broadcast)) / dx_broadcast; + + Eigen::Map> geom_feats_map( + geom_feats_aux.data(), Nprime, C); + Eigen::Matrix geom_feats_int = + geom_feats_map.cast(); + + // Concatenate geom_feats_int and batch_ix along column + Eigen::Matrix geom_feats_cat(Nprime, 4); + geom_feats_cat.block(0, 0, Nprime, C) = geom_feats_int; + geom_feats_cat.col(3).setZero(); + + // Filter out points outside box + Eigen::Matrix kept(Nprime); + for (Eigen::Index i = 0; i < Nprime; ++i) { + int32_t x = geom_feats_cat(i, 0); + int32_t y = geom_feats_cat(i, 1); + int32_t z = geom_feats_cat(i, 2); + + bool keep = (x >= 0) && (x < nx(0)) && (y >= 0) && (y < nx(1)) && (z >= 0) && (z < nx(2)); + kept(i) = keep; + } + + // Collect indices where kept is true + std::vector kept_indices; + for (Eigen::Index i = 0; i < Nprime; ++i) { + if (kept(i)) { + kept_indices.push_back(i); + } + } + + // Create geom_feats_kept + Eigen::Index N_kept = kept_indices.size(); + Eigen::Matrix geom_feats_kept(N_kept, 4); + for (Eigen::Index i = 0; i < N_kept; ++i) { + std::int64_t idx = kept_indices[i]; + geom_feats_kept.row(i) = geom_feats_cat.row(idx); + } + + // Compute ranks + Eigen::Matrix ranks(N_kept); + std::int64_t factor0 = static_cast(W) * D; + std::int64_t factor1 = static_cast(D); + std::int64_t factor2 = 1; + + for (Eigen::Index i = 0; i < N_kept; ++i) { + std::int32_t x = geom_feats_kept(i, 0); + std::int32_t y = geom_feats_kept(i, 1); + std::int32_t z = geom_feats_kept(i, 2); + std::int64_t batch = geom_feats_kept(i, 3); + + std::int64_t rank = x * factor0 + y * factor1 + z * factor2 + batch; + ranks(i) = rank; + } + + // Sort ranks and get indices + std::vector> rank_idx_pairs(N_kept); + for (Eigen::Index i = 0; i < N_kept; ++i) { + rank_idx_pairs[i] = std::make_pair(ranks(i), i); + } + std::sort(rank_idx_pairs.begin(), rank_idx_pairs.end()); + + // Create sorted ranks and indices + Eigen::Matrix ranks_sorted(N_kept); + Eigen::Matrix indices(N_kept); + Eigen::Matrix geom_feats_sorted(N_kept, 4); + for (Eigen::Index i = 0; i < N_kept; ++i) { + std::int64_t rank = rank_idx_pairs[i].first; + std::int64_t idx = rank_idx_pairs[i].second; + + ranks_sorted(i) = rank; + indices(i) = idx; + geom_feats_sorted.row(i) = geom_feats_kept.row(idx); + } + + // Return geom_feats_sorted, kept, ranks_sorted, indices + return std::make_tuple(geom_feats_sorted, kept, ranks_sorted, indices); +} + +std::tuple< + Eigen::VectorXf, Eigen::Matrix, + Eigen::Matrix, + Eigen::Matrix, + Eigen::Matrix> +precompute_features( + const std::vector & lidar2camera_transforms, + const std::vector & camera_aug_matrices, + const std::vector & camera_info_vector, + const BEVFusionConfig & config) +{ + Eigen::VectorXf lidar2images_flat(config.num_cameras_ * 4 * 4); + + Tensor4D frustum = autoware::lidar_bevfusion::create_frustum(config); + + Tensor3D camera2lidar_rotations(config.num_cameras_, 3, 3); + Tensor2D camera2lidar_translations(config.num_cameras_, 3); + Tensor3D intrins_inverse(config.num_cameras_, 3, 3); + Tensor3D post_rots_inverse(config.num_cameras_, 3, 3); + Tensor2D post_trans(config.num_cameras_, 3); + + for (std::int64_t camera_id = 0; camera_id < config.num_cameras_; camera_id++) { + Matrix4fRowM cam2image = Matrix4fRowM::Identity(); + cam2image(0, 0) = camera_info_vector[camera_id].p[0]; + cam2image(1, 1) = camera_info_vector[camera_id].p[5]; + cam2image(0, 2) = camera_info_vector[camera_id].p[2]; + cam2image(1, 2) = camera_info_vector[camera_id].p[6]; + + Matrix4fRowM img_aug_matrix = camera_aug_matrices[camera_id]; + Matrix4fRowM img_aug_matrix_inverse = img_aug_matrix.inverse().eval(); + + Matrix4fRowM lidar2camera = lidar2camera_transforms[camera_id]; + Matrix4fRowM camera2lidar = lidar2camera.inverse().eval(); + + Matrix4fRowM lidar2image = cam2image * lidar2camera; + Eigen::VectorXf lidar2image_flat(Eigen::Map(lidar2image.data(), 16)); + lidar2images_flat.segment(16 * camera_id, 16) = lidar2image_flat; + + Matrix3fRowM camera2lidar_rot = camera2lidar.block<3, 3>(0, 0); + Vector3fRowM camera2lidar_trans = camera2lidar.block<3, 1>(0, 3); + Matrix3fRowM intrins_inv = cam2image.inverse().eval().block<3, 3>(0, 0); + Matrix3fRowM post_rots_inv = img_aug_matrix_inverse.block<3, 3>(0, 0); + Vector3fRowM post_trans_vec = img_aug_matrix.block<3, 1>(0, 3); + + TensorMap2D camera2lidar_rot_tensor(camera2lidar_rot.data(), 3, 3); + TensorMap1D camera2lidar_trans_tensor(camera2lidar_trans.data(), 3); + TensorMap2D intrins_inv_tensor(intrins_inv.data(), 3, 3); + TensorMap2D post_rots_inv_tensor(post_rots_inv.data(), 3, 3); + TensorMap1D post_trans_tensor(post_trans_vec.data(), 3); + + camera2lidar_rotations.chip(camera_id, 0) = camera2lidar_rot_tensor; + camera2lidar_translations.chip(camera_id, 0) = camera2lidar_trans_tensor; + intrins_inverse.chip(camera_id, 0) = intrins_inv_tensor; + post_rots_inverse.chip(camera_id, 0) = post_rots_inv_tensor; + post_trans.chip(camera_id, 0) = post_trans_tensor; + } + + Tensor5D geometry = get_geometry( + frustum, // [D, H, W, 3] + camera2lidar_rotations, // [N, 3, 3] + camera2lidar_translations, // [N, 3] + intrins_inverse, // [N, 3, 3] + post_rots_inverse, // [N, 3, 3] + post_trans // [N, 3] + ); + + auto [geom_feats, kept, ranks, indices] = bev_pool_aux(geometry, config); + + return std::make_tuple(lidar2images_flat, geom_feats, kept, ranks, indices); +} + +} // namespace autoware::lidar_bevfusion diff --git a/perception/autoware_lidar_bevfusion/lib/preprocess/preprocess_kernel.cu b/perception/autoware_lidar_bevfusion/lib/preprocess/preprocess_kernel.cu new file mode 100644 index 0000000000000..27baf7cd45b8f --- /dev/null +++ b/perception/autoware_lidar_bevfusion/lib/preprocess/preprocess_kernel.cu @@ -0,0 +1,228 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/* + * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. + * All rights reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "autoware/lidar_bevfusion/preprocess/point_type.hpp" +#include "autoware/lidar_bevfusion/preprocess/preprocess_kernel.hpp" + +#include + +#include + +#include +#include +#include + +namespace autoware::lidar_bevfusion +{ + +PreprocessCuda::PreprocessCuda( + const BEVFusionConfig & config, cudaStream_t & stream, bool allocate_buffers) +: stream_(stream), config_(config) +{ + if (allocate_buffers) { + hash_key_value_ = tv::empty({config.cloud_capacity_ * 2}, tv::custom128, 0); + point_indice_data_ = tv::empty({config.cloud_capacity_}, tv::int64, 0); + points_voxel_id_ = tv::empty({config.cloud_capacity_}, tv::int64, 0); + } +} + +__global__ void generateSweepPoints_kernel( + const InputPointType * __restrict__ input_points, std::size_t points_size, float time_lag, + const float * transform_array, int num_features, float * __restrict__ output_points) +{ + int point_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (point_idx >= points_size) return; + + const InputPointType * input_point = &input_points[point_idx]; + float input_x = input_point->x; + float input_y = input_point->y; + float input_z = input_point->z; + float input_intensity = static_cast(input_point->intensity); + + output_points[point_idx * num_features] = transform_array[0] * input_x + + transform_array[4] * input_y + + transform_array[8] * input_z + transform_array[12]; + output_points[point_idx * num_features + 1] = transform_array[1] * input_x + + transform_array[5] * input_y + + transform_array[9] * input_z + transform_array[13]; + output_points[point_idx * num_features + 2] = transform_array[2] * input_x + + transform_array[6] * input_y + + transform_array[10] * input_z + transform_array[14]; + output_points[point_idx * num_features + 3] = input_intensity; + output_points[point_idx * num_features + 4] = time_lag; +} + +cudaError_t PreprocessCuda::generateSweepPoints_launch( + const InputPointType * input_data, std::size_t points_size, float time_lag, + const float * transform_array, float * output_points) +{ + dim3 blocks(divup(points_size, config_.threads_per_block_)); + dim3 threads(config_.threads_per_block_); + + generateSweepPoints_kernel<<>>( + input_data, points_size, time_lag, transform_array, config_.num_point_feature_size_, + output_points); + + cudaError_t err = cudaGetLastError(); + return err; +} + +std::size_t PreprocessCuda::generateVoxels( + const float * points, unsigned int points_size, float * voxel_features, + std::int32_t * voxel_coords, std::int32_t * num_points_per_voxel) +{ + using Point2VoxelGPU3D = spconvlib::spconv::csrc::sparse::all::ops3d::Point2Voxel; + + std::array vsize_xyz{ + config_.voxel_z_size_, config_.voxel_y_size_, config_.voxel_x_size_}; + + std::array grid_size{ + static_cast(config_.grid_z_size_), + static_cast(config_.grid_y_size_), + static_cast(config_.grid_x_size_)}; + + std::array grid_stride{ + static_cast(config_.grid_y_size_ * config_.grid_x_size_), + static_cast(config_.grid_x_size_), 1}; + + std::array coors_range{config_.min_z_range_, config_.min_y_range_, + config_.min_x_range_, config_.max_z_range_, + config_.max_y_range_, config_.max_x_range_}; + + tv::Tensor pc = tv::from_blob( + points, {static_cast(points_size), config_.num_point_feature_size_}, tv::float32, + 0); + + auto point_limit = pc.dim(0); + + tv::Tensor voxels_padded = tv::from_blob( + voxel_features, {config_.max_num_voxels_, config_.max_points_per_voxel_, pc.dim(1)}, + tv::float32, 0); + + tv::Tensor indices_padded_no_batch = + tv::from_blob(voxel_coords, {config_.max_num_voxels_, 3}, tv::int32, 0); + + tv::Tensor num_points_per_voxel_tensor = + tv::from_blob(num_points_per_voxel, {config_.max_num_voxels_}, tv::int32, 0); + + auto p2v_res = Point2VoxelGPU3D::point_to_voxel_hash_static( + pc, voxels_padded, indices_padded_no_batch, num_points_per_voxel_tensor, hash_key_value_, + point_indice_data_, points_voxel_id_, vsize_xyz, grid_size, grid_stride, coors_range, true, + false, reinterpret_cast(stream_)); + + std::size_t real_num_voxels = static_cast(std::get<0>(p2v_res).dim(0)); + + return real_num_voxels; +} + +__global__ void resize_and_extract_roi( + const std::uint8_t * __restrict__ input_img, std::uint8_t * __restrict__ output_img, int H, + int W, // Original image dimensions (Height, Width) + int H2, int W2, // Resized image dimensions (Height, Width) + int H3, int W3, // ROI dimensions (Height, Width) + int y_start, int x_start) // ROI top-left coordinates in resized image +{ + // Calculate the global thread indices + int i = blockIdx.x * blockDim.x + threadIdx.x; // Width index in output (ROI) image + int j = blockIdx.y * blockDim.y + threadIdx.y; // Height index in output (ROI) image + + // Check if the thread corresponds to a valid pixel in the ROI + if (i >= W3 || j >= H3) return; + + // Compute scaling factors from original to resized image + float scale_y = static_cast(H) / H2; + float scale_x = static_cast(W) / W2; + + // Map output pixel (i, j) in ROI to position in resized image + int i_resized = i + x_start; + int j_resized = j + y_start; + + // Map position in resized image back to position in original image + float i_original = (i_resized + 0.5f) * scale_x - 0.5f; + float j_original = (j_resized + 0.5f) * scale_y - 0.5f; + + // Compute coordinates for bilinear interpolation + int i0 = static_cast(floorf(i_original)); + int j0 = static_cast(floorf(j_original)); + int i1 = i0 + 1; + int j1 = j0 + 1; + + // Compute interpolation weights + float di = i_original - i0; + float dj = j_original - j0; + + float w00 = (1.0f - di) * (1.0f - dj); + float w01 = (1.0f - di) * dj; + float w10 = di * (1.0f - dj); + float w11 = di * dj; + + // Loop over the three color channels + for (int c = 0; c < 3; ++c) { + float v00 = 0.0f, v01 = 0.0f, v10 = 0.0f, v11 = 0.0f; + + // Boundary checks for each neighboring pixel + if (i0 >= 0 && i0 < W && j0 >= 0 && j0 < H) + v00 = static_cast(input_img[(j0 * W + i0) * 3 + c]); + if (i0 >= 0 && i0 < W && j1 >= 0 && j1 < H) + v01 = static_cast(input_img[(j1 * W + i0) * 3 + c]); + if (i1 >= 0 && i1 < W && j0 >= 0 && j0 < H) + v10 = static_cast(input_img[(j0 * W + i1) * 3 + c]); + if (i1 >= 0 && i1 < W && j1 >= 0 && j1 < H) + v11 = static_cast(input_img[(j1 * W + i1) * 3 + c]); + + // Compute the interpolated pixel value + float value = w00 * v00 + w01 * v01 + w10 * v10 + w11 * v11; + + // Store the result in the output ROI image + output_img[(j * W3 + i) * 3 + c] = static_cast(value); + } +} + +cudaError_t PreprocessCuda::resize_and_extract_roi_launch( + const std::uint8_t * input_img, std::uint8_t * output_img, int H, + int W, // Original image dimensions + int H2, int W2, // Resized image dimensions + int H3, int W3, // ROI dimensions + int y_start, int x_start, // ROI top-left coordinates in resized image + cudaStream_t stream) +{ + // Define the block and grid dimensions + dim3 threads(16, 16); + dim3 blocks(divup(W3, threads.x), divup(H3, threads.y)); + + // Launch the kernel + resize_and_extract_roi<<>>( + input_img, output_img, H, W, H2, W2, H3, W3, y_start, x_start); + + // Check for errors + return cudaGetLastError(); +} + +} // namespace autoware::lidar_bevfusion diff --git a/perception/autoware_lidar_bevfusion/lib/preprocess/voxel_generator.cpp b/perception/autoware_lidar_bevfusion/lib/preprocess/voxel_generator.cpp new file mode 100644 index 0000000000000..6ad6558490dc5 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/lib/preprocess/voxel_generator.cpp @@ -0,0 +1,89 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/lidar_bevfusion/preprocess/voxel_generator.hpp" + +#include "autoware/lidar_bevfusion/preprocess/preprocess_kernel.hpp" + +#include + +#include +#include +#include +#include + +namespace autoware::lidar_bevfusion +{ + +VoxelGenerator::VoxelGenerator( + const DensificationParam & densification_param, const BEVFusionConfig & config, + cudaStream_t & stream) +: config_(config), stream_(stream) +{ + pd_ptr_ = std::make_unique(densification_param, stream_); + + pre_ptr_ = std::make_unique(config_, stream_, false); + + affine_past2current_d_ = + autoware::cuda_utils::make_unique(Eigen::Affine3f::MatrixType::SizeAtCompileTime); +} + +bool VoxelGenerator::enqueuePointCloud( + const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer) +{ + return pd_ptr_->enqueuePointCloud(msg, tf_buffer); +} + +std::size_t VoxelGenerator::generateSweepPoints(CudaUniquePtr & points_d) +{ + std::size_t point_counter{0}; + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + + for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter); + pc_cache_iter++) { + auto sweep_num_points = pc_cache_iter->num_points; + auto output_offset = point_counter * config_.num_point_feature_size_; + + if (point_counter + sweep_num_points >= static_cast(config_.cloud_capacity_)) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_bevfusion"), "Exceeding cloud capacity. Used " + << pd_ptr_->getIdx(pc_cache_iter) << " out of " + << pd_ptr_->getCacheSize() << " sweep(s)"); + break; + } + + auto affine_past2current = + pd_ptr_->getAffineWorldToCurrent() * pc_cache_iter->affine_past2world; + static_assert(std::is_same::value); + static_assert(!Eigen::Matrix4f::IsRowMajor, "matrices should be col-major."); + + float time_lag = static_cast( + pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_cache_iter->header.stamp).seconds()); + + CHECK_CUDA_ERROR(cudaMemcpyAsync( + affine_past2current_d_.get(), affine_past2current.data(), + Eigen::Affine3f::MatrixType::SizeAtCompileTime * sizeof(float), cudaMemcpyHostToDevice, + stream_)); + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + + pre_ptr_->generateSweepPoints_launch( + pc_cache_iter->data_d.get(), sweep_num_points, time_lag, affine_past2current_d_.get(), + points_d.get() + output_offset); + point_counter += sweep_num_points; + } + + return point_counter; +} + +} // namespace autoware::lidar_bevfusion diff --git a/perception/autoware_lidar_bevfusion/lib/ros_utils.cpp b/perception/autoware_lidar_bevfusion/lib/ros_utils.cpp new file mode 100644 index 0000000000000..22a396a067d0a --- /dev/null +++ b/perception/autoware_lidar_bevfusion/lib/ros_utils.cpp @@ -0,0 +1,126 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/lidar_bevfusion/ros_utils.hpp" + +#include +#include +#include + +#include +#include +#include + +namespace autoware::lidar_bevfusion +{ + +using Label = autoware_perception_msgs::msg::ObjectClassification; + +bool is_data_layout_compatible_with_point_xyzirc(const sensor_msgs::msg::PointCloud2 & input) +{ + using PointIndex = autoware::point_types::PointXYZIRCIndex; + using autoware::point_types::PointXYZIRC; + if (input.fields.size() < 6) { + return false; + } + bool same_layout = true; + const auto & field_x = input.fields.at(static_cast(PointIndex::X)); + same_layout &= field_x.name == "x"; + same_layout &= field_x.offset == offsetof(PointXYZIRC, x); + same_layout &= field_x.datatype == sensor_msgs::msg::PointField::FLOAT32; + same_layout &= field_x.count == 1; + const auto & field_y = input.fields.at(static_cast(PointIndex::Y)); + same_layout &= field_y.name == "y"; + same_layout &= field_y.offset == offsetof(PointXYZIRC, y); + same_layout &= field_y.datatype == sensor_msgs::msg::PointField::FLOAT32; + same_layout &= field_y.count == 1; + const auto & field_z = input.fields.at(static_cast(PointIndex::Z)); + same_layout &= field_z.name == "z"; + same_layout &= field_z.offset == offsetof(PointXYZIRC, z); + same_layout &= field_z.datatype == sensor_msgs::msg::PointField::FLOAT32; + same_layout &= field_z.count == 1; + const auto & field_intensity = input.fields.at(static_cast(PointIndex::Intensity)); + same_layout &= field_intensity.name == "intensity"; + same_layout &= field_intensity.offset == offsetof(PointXYZIRC, intensity); + same_layout &= field_intensity.datatype == sensor_msgs::msg::PointField::UINT8; + same_layout &= field_intensity.count == 1; + const auto & field_return_type = input.fields.at(static_cast(PointIndex::ReturnType)); + same_layout &= field_return_type.name == "return_type"; + same_layout &= field_return_type.offset == offsetof(PointXYZIRC, return_type); + same_layout &= field_return_type.datatype == sensor_msgs::msg::PointField::UINT8; + same_layout &= field_return_type.count == 1; + const auto & field_ring = input.fields.at(static_cast(PointIndex::Channel)); + same_layout &= field_ring.name == "channel"; + same_layout &= field_ring.offset == offsetof(PointXYZIRC, channel); + same_layout &= field_ring.datatype == sensor_msgs::msg::PointField::UINT16; + same_layout &= field_ring.count == 1; + + return same_layout; +} + +void box3DToDetectedObject( + const Box3D & box3d, const std::vector & class_names, + autoware_perception_msgs::msg::DetectedObject & obj) +{ + obj.existence_probability = box3d.score; + + // classification + autoware_perception_msgs::msg::ObjectClassification classification; + classification.probability = 1.0f; + if (box3d.label >= 0 && static_cast(box3d.label) < class_names.size()) { + classification.label = getSemanticType(class_names[box3d.label]); + } else { + classification.label = Label::UNKNOWN; + RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_bevfusion"), "Unexpected label: UNKNOWN is set."); + } + + if (autoware::object_recognition_utils::isCarLikeVehicle(classification.label)) { + obj.kinematics.orientation_availability = + autoware_perception_msgs::msg::DetectedObjectKinematics::SIGN_UNKNOWN; + } + + obj.classification.emplace_back(classification); + + // pose and shape + obj.kinematics.pose_with_covariance.pose.position = + autoware::universe_utils::createPoint(box3d.x, box3d.y, box3d.z); + obj.kinematics.pose_with_covariance.pose.orientation = + autoware::universe_utils::createQuaternionFromYaw(box3d.yaw); + obj.shape.type = autoware_perception_msgs::msg::Shape::BOUNDING_BOX; + obj.shape.dimensions = + autoware::universe_utils::createTranslation(box3d.length, box3d.width, box3d.height); +} + +std::uint8_t getSemanticType(const std::string & class_name) +{ + if (class_name == "CAR") { + return Label::CAR; + } else if (class_name == "TRUCK") { + return Label::TRUCK; + } else if (class_name == "BUS") { + return Label::BUS; + } else if (class_name == "TRAILER") { + return Label::TRAILER; + } else if (class_name == "MOTORCYCLE") { + return Label::MOTORCYCLE; + } else if (class_name == "BICYCLE") { + return Label::BICYCLE; + } else if (class_name == "PEDESTRIAN") { + return Label::PEDESTRIAN; + } else { // CONSTRUCTION_VEHICLE, BARRIER, TRAFFIC_CONE + return Label::UNKNOWN; + } +} + +} // namespace autoware::lidar_bevfusion diff --git a/perception/autoware_lidar_bevfusion/package.xml b/perception/autoware_lidar_bevfusion/package.xml new file mode 100644 index 0000000000000..eb38aa9a07f99 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/package.xml @@ -0,0 +1,36 @@ + + + + autoware_lidar_bevfusion + 1.0.0 + The autoware_lidar_bevfusion package + Kenzo Lobos-Tsunekawa + Apache License 2.0 + + ament_cmake_auto + autoware_cmake + + autoware_cuda_utils + autoware_object_recognition_utils + autoware_perception_msgs + autoware_point_types + autoware_tensorrt_common + autoware_universe_utils + launch_ros + pcl_ros + rclcpp + rclcpp_components + tf2_eigen + tf2_geometry_msgs + tf2_sensor_msgs + + autoware_image_transport_decompressor + + ament_cmake_ros + ament_lint_auto + autoware_lint_common + + + ament_cmake + + diff --git a/perception/autoware_lidar_bevfusion/schema/bevfusion.schema.dummy.json b/perception/autoware_lidar_bevfusion/schema/bevfusion.schema.dummy.json new file mode 100644 index 0000000000000..a3af7a6da7e82 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/schema/bevfusion.schema.dummy.json @@ -0,0 +1,121 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Parameters for LiDAR TransFusion Node", + "$comment": "This schema is not considered in CI workflow. See https://github.com/autowarefoundation/autoware.universe/pull/8205#issuecomment-2255074224.", + "type": "object", + "definitions": { + "bevfusion": { + "type": "object", + "properties": { + "trt_precision": { + "type": "string", + "description": "A precision of TensorRT engine.", + "default": "fp16", + "enum": ["fp16", "fp32"] + }, + "cloud_capacity": { + "type": "integer", + "description": "Capacity of the point cloud buffer (should be set to at least the maximum theoretical number of points).", + "default": 2000000, + "minimum": 1 + }, + "onnx_path": { + "type": "string", + "description": "A path to ONNX model file.", + "default": "$(var model_path)/bevfusion.onnx", + "pattern": "\\.onnx$" + }, + "engine_path": { + "type": "string", + "description": "A path to TensorRT engine file.", + "default": "$(var model_path)/bevfusion.engine", + "pattern": "\\.engine$" + }, + "densification_num_past_frames": { + "type": "integer", + "description": "A number of past frames to be considered as same input frame.", + "default": 1, + "minimum": 0 + }, + "densification_world_frame_id": { + "type": "string", + "description": "A name of frame id where world coordinates system is defined with respect to.", + "default": "map" + }, + "circle_nms_dist_threshold": { + "type": "number", + "description": "A distance threshold between detections in NMS.", + "default": 0.5, + "minimum": 0.0 + }, + "iou_nms_target_class_names": { + "type": "array", + "items": { + "type": "string" + }, + "description": "An array of class names to be target in NMS.", + "default": ["CAR"], + "uniqueItems": true + }, + "iou_nms_search_distance_2d": { + "type": "number", + "description": "A maximum distance value to search the nearest objects.", + "default": 10.0, + "minimum": 0.0 + }, + "iou_nms_threshold": { + "type": "number", + "description": "A threshold value of NMS using IoU score.", + "default": 0.1, + "minimum": 0.0, + "maximum": 1.0 + }, + "yaw_norm_thresholds": { + "type": "array", + "items": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + "description": "A thresholds array of direction vectors norm, all of objects with vector norm less than this threshold are ignored.", + "default": [0.3, 0.3, 0.3, 0.3, 0.0] + }, + "score_threshold": { + "type": "number", + "description": "A threshold value of confidence score, all of objects with score less than this threshold are ignored.", + "default": 0.2, + "minimum": 0.0 + } + }, + "required": [ + "trt_precision", + "cloud_capacity", + "onnx_path", + "engine_path", + "densification_num_past_frames", + "densification_world_frame_id", + "circle_nms_dist_threshold", + "iou_nms_target_class_names", + "iou_nms_search_distance_2d", + "iou_nms_threshold", + "yaw_norm_thresholds", + "score_threshold" + ], + "additionalProperties": false + } + }, + "properties": { + "/**": { + "type": "object", + "properties": { + "ros__parameters": { + "$ref": "#/definitions/bevfusion" + } + }, + "required": ["ros__parameters"], + "additionalProperties": false + } + }, + "required": ["/**"], + "additionalProperties": false +} diff --git a/perception/autoware_lidar_bevfusion/schema/bevfusion_ml_package.schema.json b/perception/autoware_lidar_bevfusion/schema/bevfusion_ml_package.schema.json new file mode 100644 index 0000000000000..b726deab95fca --- /dev/null +++ b/perception/autoware_lidar_bevfusion/schema/bevfusion_ml_package.schema.json @@ -0,0 +1,71 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Parameters for LiDAR TransFusion Node", + "type": "object", + "definitions": { + "bevfusion": { + "type": "object", + "properties": { + "class_names": { + "type": "array", + "items": { + "type": "string" + }, + "description": "An array of class names will be predicted.", + "default": ["CAR", "TRUCK", "BUS", "BICYCLE", "PEDESTRIAN"], + "uniqueItems": true + }, + "voxels_num": { + "type": "array", + "items": { + "type": "integer" + }, + "description": "A maximum number of voxels [min, opt, max].", + "default": [5000, 30000, 60000] + }, + "point_cloud_range": { + "type": "array", + "items": { + "type": "number" + }, + "description": "An array of distance ranges of each class.", + "default": [-76.8, -76.8, -3.0, 76.8, 76.8, 5.0], + "minItems": 6, + "maxItems": 6 + }, + "voxel_size": { + "type": "array", + "items": { + "type": "number" + }, + "description": "Voxels size [x, y, z].", + "default": [0.3, 0.3, 8.0], + "minItems": 3, + "maxItems": 3 + }, + "num_proposals": { + "type": "integer", + "description": "Number of proposals at TransHead.", + "default": 500, + "minimum": 1 + } + }, + "required": ["class_names", "voxels_num", "point_cloud_range", "voxel_size", "num_proposals"], + "additionalProperties": false + } + }, + "properties": { + "/**": { + "type": "object", + "properties": { + "ros__parameters": { + "$ref": "#/definitions/bevfusion" + } + }, + "required": ["ros__parameters"], + "additionalProperties": false + } + }, + "required": ["/**"], + "additionalProperties": false +} diff --git a/perception/autoware_lidar_bevfusion/schema/detection_class_remapper.schema.json b/perception/autoware_lidar_bevfusion/schema/detection_class_remapper.schema.json new file mode 100644 index 0000000000000..8aaefa295fcc0 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/schema/detection_class_remapper.schema.json @@ -0,0 +1,72 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Parameters for Detection Class Remapper", + "type": "object", + "definitions": { + "detection_class_remapper": { + "type": "object", + "properties": { + "allow_remapping_by_area_matrix": { + "type": "array", + "items": { + "type": "integer" + }, + "description": "Whether to allow remapping of classes. The order of 8x8 matrix classes comes from ObjectClassification msg.", + "default": [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 + ], + "minItems": 64, + "maxItems": 64 + }, + "min_area_matrix": { + "type": "array", + "items": { + "type": "number" + }, + "description": "Minimum area for specific class to consider class remapping.", + "default": [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 12.1, 0.0, 36.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 36.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 36.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "minItems": 64, + "maxItems": 64 + }, + "max_area_matrix": { + "type": "array", + "items": { + "type": "number" + }, + "description": "Maximum area for specific class to consider class remapping.", + "default": [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 36.0, 0.0, 999.999, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 999.999, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "minItems": 64, + "maxItems": 64 + } + }, + "required": ["allow_remapping_by_area_matrix", "min_area_matrix", "max_area_matrix"], + "additionalProperties": false + } + }, + "properties": { + "/**": { + "type": "object", + "properties": { + "ros__parameters": { + "$ref": "#/definitions/detection_class_remapper" + } + }, + "required": ["ros__parameters"], + "additionalProperties": false + } + }, + "required": ["/**"], + "additionalProperties": false +} diff --git a/perception/autoware_lidar_bevfusion/src/bev_ops/bev_pool_cuda.cu b/perception/autoware_lidar_bevfusion/src/bev_ops/bev_pool_cuda.cu new file mode 100644 index 0000000000000..2f65d1aee3e9b --- /dev/null +++ b/perception/autoware_lidar_bevfusion/src/bev_ops/bev_pool_cuda.cu @@ -0,0 +1,55 @@ +// Taken from +// https://github.com/mit-han-lab/bevfusion/blob/main/mmdet3d/ops/bev_pool/src/bev_pool_cuda.cu +// Available under Apache-2.0 license + +#include +#include + +/* + Function: pillar pooling + Args: + b : batch size + d : depth of the feature map + h : height of pooled feature map + w : width of pooled feature map + n : number of input points + c : number of channels + n_intervals : number of unique points + x : input features, FloatTensor[n, c] + geom_feats : input coordinates, IntTensor[n, 4] + interval_lengths : starting position for pooled point, IntTensor[n_intervals] + interval_starts : how many points in each pooled point, IntTensor[n_intervals] + out : output features, FloatTensor[b, d, h, w, c] +*/ +__global__ void bev_pool_kernel( + int b, int d, int h, int w, int n, int c, int n_intervals, const float * __restrict__ x, + const int * __restrict__ geom_feats, const int * __restrict__ interval_starts, + const int * __restrict__ interval_lengths, float * __restrict__ out) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + int index = idx / c; + int cur_c = idx % c; + if (index >= n_intervals) return; + int interval_start = interval_starts[index]; + int interval_length = interval_lengths[index]; + + const int * cur_geom_feats = geom_feats + interval_start * 4; + const float * cur_x = x + interval_start * c + cur_c; + float * cur_out = out + cur_geom_feats[3] * d * h * w * c + cur_geom_feats[2] * h * w * c + + cur_geom_feats[0] * w * c + cur_geom_feats[1] * c + cur_c; + float psum = 0; + for (int i = 0; i < interval_length; i++) { + psum += cur_x[i * c]; + } + + *cur_out = psum; +} + +void bev_pool( + int b, int d, int h, int w, int n, int c, int n_intervals, const float * x, + const int * geom_feats, const int * interval_starts, const int * interval_lengths, float * out, + cudaStream_t & stream) +{ + bev_pool_kernel<<<(int)ceil(((double)n_intervals * c / 256)), 256, 0, stream>>>( + b, d, h, w, n, c, n_intervals, x, geom_feats, interval_starts, interval_lengths, out); +} diff --git a/perception/autoware_lidar_bevfusion/src/lidar_bevfusion_node.cpp b/perception/autoware_lidar_bevfusion/src/lidar_bevfusion_node.cpp new file mode 100644 index 0000000000000..413912e0c3292 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/src/lidar_bevfusion_node.cpp @@ -0,0 +1,342 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +//    http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/lidar_bevfusion/lidar_bevfusion_node.hpp" + +#include "autoware/lidar_bevfusion/utils.hpp" + +#include +#include +#include +#include +#include + +namespace autoware::lidar_bevfusion +{ + +LidarBEVFusionNode::LidarBEVFusionNode(const rclcpp::NodeOptions & options) +: Node("lidar_bevfusion", options), tf_buffer_(this->get_clock()) +{ + auto descriptor = rcl_interfaces::msg::ParameterDescriptor{}.set__read_only(true); + + // Modality + sensor_fusion_ = this->declare_parameter("sensor_fusion", descriptor); + + // Non network parameters + max_camera_lidar_delay_ = this->declare_parameter("max_camera_lidar_delay", descriptor); + + // TensorRT parameters + const std::string plugins_path = this->declare_parameter("plugins_path", descriptor); + + // Network parameters + const std::string onnx_path = this->declare_parameter("onnx_path", descriptor); + const std::string engine_path = this->declare_parameter("engine_path", descriptor); + const std::string trt_precision = + this->declare_parameter("trt_precision", descriptor); + + // Common parameters + const auto out_size_factor = this->declare_parameter("out_size_factor", descriptor); + + auto to_float_vector = [](const auto & v) -> std::vector { + return std::vector(v.begin(), v.end()); + }; + + // Lidar branch parameters + const auto cloud_capacity = this->declare_parameter("cloud_capacity", descriptor); + const auto max_points_per_voxel = + this->declare_parameter("max_points_per_voxel", descriptor); + const auto voxels_num = + this->declare_parameter>("voxels_num", descriptor); + const auto point_cloud_range = + to_float_vector(this->declare_parameter>("point_cloud_range", descriptor)); + const auto voxel_size = + to_float_vector(this->declare_parameter>("voxel_size", descriptor)); + + // Camera branch parameters + // cSpell:ignore dbound xbound ybound zbound + const auto dbound = + to_float_vector(this->declare_parameter>("dbound", descriptor)); + const auto xbound = + to_float_vector(this->declare_parameter>("xbound", descriptor)); + const auto ybound = + to_float_vector(this->declare_parameter>("ybound", descriptor)); + const auto zbound = + to_float_vector(this->declare_parameter>("zbound", descriptor)); + const auto num_cameras = this->declare_parameter("num_cameras", descriptor); + const auto raw_image_height = + this->declare_parameter("raw_image_height", descriptor); + const auto raw_image_width = this->declare_parameter("raw_image_width", descriptor); + const auto img_aug_scale_x = this->declare_parameter("img_aug_scale_x", descriptor); + const auto img_aug_scale_y = this->declare_parameter("img_aug_scale_y", descriptor); + const auto roi_height = this->declare_parameter("roi_height", descriptor); + const auto roi_width = this->declare_parameter("roi_width", descriptor); + const auto features_height = this->declare_parameter("features_height", descriptor); + const auto features_width = this->declare_parameter("features_width", descriptor); + const auto num_depth_features = this->declare_parameter("num_depth_features", descriptor); + + // Head parameters + const auto num_proposals = this->declare_parameter("num_proposals", descriptor); + class_names_ = this->declare_parameter>("class_names", descriptor); + + if (point_cloud_range.size() != 6) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_bevfusion"), + "The size of point_cloud_range != 6: use the default parameters."); + } + if (voxel_size.size() != 3) { + RCLCPP_WARN_STREAM( + rclcpp::get_logger("lidar_bevfusion"), + "The size of voxel_size != 3: use the default parameters."); + } + + // pre-process + const std::string densification_world_frame_id = + this->declare_parameter("densification_world_frame_id", descriptor); + const int densification_num_past_frames = + this->declare_parameter("densification_num_past_frames", descriptor); + + // post-process + const float circle_nms_dist_threshold = + static_cast(this->declare_parameter("circle_nms_dist_threshold", descriptor)); + { // IoU NMS + NMSParams p; + p.nms_type_ = NMS_TYPE::IoU_BEV; + p.target_class_names_ = + this->declare_parameter>("iou_nms_target_class_names", descriptor); + p.search_distance_2d_ = + this->declare_parameter("iou_nms_search_distance_2d", descriptor); + p.iou_threshold_ = this->declare_parameter("iou_nms_threshold", descriptor); + iou_bev_nms_.setParameters(p); + } + const auto yaw_norm_thresholds = + this->declare_parameter>("yaw_norm_thresholds", descriptor); + const float score_threshold = + static_cast(this->declare_parameter("score_threshold", descriptor)); + + DensificationParam densification_param( + densification_world_frame_id, densification_num_past_frames); + + BEVFusionConfig config( + sensor_fusion_, plugins_path, out_size_factor, cloud_capacity, max_points_per_voxel, voxels_num, + point_cloud_range, voxel_size, dbound, xbound, ybound, zbound, num_cameras, raw_image_height, + raw_image_width, img_aug_scale_x, img_aug_scale_y, roi_height, roi_width, features_height, + features_width, num_depth_features, num_proposals, circle_nms_dist_threshold, + yaw_norm_thresholds, score_threshold); + + const auto allow_remapping_by_area_matrix = this->declare_parameter>( + "allow_remapping_by_area_matrix", descriptor); + const auto min_area_matrix = + this->declare_parameter>("min_area_matrix", descriptor); + const auto max_area_matrix = + this->declare_parameter>("max_area_matrix", descriptor); + detection_class_remapper_.setParameters( + allow_remapping_by_area_matrix, min_area_matrix, max_area_matrix); + + auto trt_config = + tensorrt_common::TrtCommonConfig(onnx_path, trt_precision, engine_path, 1ULL << 32U); + detector_ptr_ = std::make_unique(trt_config, densification_param, config); + + cloud_sub_ = this->create_subscription( + "~/input/pointcloud", rclcpp::SensorDataQoS{}.keep_last(1), + std::bind(&LidarBEVFusionNode::cloudCallback, this, std::placeholders::_1)); + + objects_pub_ = this->create_publisher( + "~/output/objects", rclcpp::QoS(1)); + + if (sensor_fusion_) { + image_subs_.resize(num_cameras); + camera_info_subs_.resize(num_cameras); + image_msgs_.resize(num_cameras); + camera_info_msgs_.resize(num_cameras); + lidar2camera_extrinsics_.resize(num_cameras); + + for (std::int64_t camera_id = 0; camera_id < num_cameras; ++camera_id) { + image_subs_[camera_id] = this->create_subscription( + "~/input/image" + std::to_string(camera_id), rclcpp::SensorDataQoS{}.keep_last(1), + [this, camera_id](const sensor_msgs::msg::Image::ConstSharedPtr msg) { + this->imageCallback(msg, camera_id); + }); + + camera_info_subs_[camera_id] = this->create_subscription( + "~/input/camera_info" + std::to_string(camera_id), rclcpp::SensorDataQoS{}.keep_last(1), + [this, camera_id](const sensor_msgs::msg::CameraInfo & msg) { + this->cameraInfoCallback(msg, camera_id); + }); + } + } + + published_time_pub_ = std::make_unique(this); + + // initialize debug tool + { + using autoware::universe_utils::DebugPublisher; + using autoware::universe_utils::StopWatch; + stop_watch_ptr_ = std::make_unique>(); + debug_publisher_ptr_ = std::make_unique(this, this->get_name()); + stop_watch_ptr_->tic("cyclic"); + stop_watch_ptr_->tic("processing/total"); + } + + if (this->declare_parameter("build_only", false, descriptor)) { + RCLCPP_INFO(this->get_logger(), "TensorRT engine was built Shutting down the node."); + rclcpp::shutdown(); + } +} + +void LidarBEVFusionNode::cloudCallback(const sensor_msgs::msg::PointCloud2::ConstSharedPtr pc_msg) +{ + lidar_frame_ = pc_msg->header.frame_id; + + if (sensor_fusion_ && (!extrinsics_available_ || !images_available_ || !intrinsics_available_)) { + return; + } + + if (sensor_fusion_ && !intrinsics_extrinsics_precomputed_) { + std::vector camera_info_msgs; + std::vector lidar2camera_extrinsics; + + std::transform( + camera_info_msgs_.begin(), camera_info_msgs_.end(), std::back_inserter(camera_info_msgs), + [](const auto & opt) { return *opt; }); + + std::transform( + lidar2camera_extrinsics_.begin(), lidar2camera_extrinsics_.end(), + std::back_inserter(lidar2camera_extrinsics), [](const auto & opt) { return *opt; }); + + detector_ptr_->setIntrinsicsExtrinsics(camera_info_msgs, lidar2camera_extrinsics); + intrinsics_extrinsics_precomputed_ = true; + } + + const auto objects_sub_count = + objects_pub_->get_subscription_count() + objects_pub_->get_intra_process_subscription_count(); + if (objects_sub_count < 1) { + return; + } + + if (stop_watch_ptr_) { + stop_watch_ptr_->toc("processing/total", true); + } + + double lidar_stamp = rclcpp::Time(pc_msg->header.stamp).seconds(); + camera_masks_.resize(camera_info_msgs_.size()); + for (std::size_t i = 0; i < camera_masks_.size(); ++i) { + camera_masks_[i] = 1.f ? (lidar_stamp - rclcpp::Time(image_msgs_[i]->header.stamp).seconds()) < + max_camera_lidar_delay_ + : 0.f; + } + + std::vector det_boxes3d; + std::unordered_map proc_timing; + bool is_success = + detector_ptr_->detect(pc_msg, image_msgs_, camera_masks_, tf_buffer_, det_boxes3d, proc_timing); + if (!is_success) { + return; + } + + std::vector raw_objects; + raw_objects.reserve(det_boxes3d.size()); + for (const auto & box3d : det_boxes3d) { + autoware_perception_msgs::msg::DetectedObject obj; + box3DToDetectedObject(box3d, class_names_, obj); + raw_objects.emplace_back(obj); + } + + autoware_perception_msgs::msg::DetectedObjects output_msg; + output_msg.header = pc_msg->header; + output_msg.objects = iou_bev_nms_.apply(raw_objects); + + detection_class_remapper_.mapClasses(output_msg); + + if (objects_sub_count > 0) { + objects_pub_->publish(output_msg); + published_time_pub_->publish_if_subscribed(objects_pub_, output_msg.header.stamp); + } + + // add processing time for debug + if (debug_publisher_ptr_ && stop_watch_ptr_) { + const double cyclic_time_ms = stop_watch_ptr_->toc("cyclic", true); + const double processing_time_ms = stop_watch_ptr_->toc("processing/total", true); + const double pipeline_latency_ms = + std::chrono::duration( + std::chrono::nanoseconds( + (this->get_clock()->now() - output_msg.header.stamp).nanoseconds())) + .count(); + debug_publisher_ptr_->publish( + "debug/cyclic_time_ms", cyclic_time_ms); + debug_publisher_ptr_->publish( + "debug/pipeline_latency_ms", pipeline_latency_ms); + debug_publisher_ptr_->publish( + "debug/processing_time/total_ms", processing_time_ms); + for (const auto & [topic, time_ms] : proc_timing) { + debug_publisher_ptr_->publish(topic, time_ms); + } + } +} + +void LidarBEVFusionNode::imageCallback( + const sensor_msgs::msg::Image::ConstSharedPtr msg, std::size_t camera_id) +{ + image_msgs_[camera_id] = msg; + + std::size_t num_valid_images = std::count_if( + image_msgs_.begin(), image_msgs_.end(), + [](const auto & image_msg) { return image_msg != nullptr; }); + + images_available_ = num_valid_images == image_msgs_.size(); +} + +void LidarBEVFusionNode::cameraInfoCallback( + const sensor_msgs::msg::CameraInfo & msg, std::size_t camera_id) +{ + camera_info_msgs_[camera_id] = msg; + + std::size_t num_valid_intrinsics = std::count_if( + camera_info_msgs_.begin(), camera_info_msgs_.end(), + [](const auto & opt) { return opt.has_value(); }); + + intrinsics_available_ = num_valid_intrinsics == camera_info_msgs_.size(); + + if ( + lidar2camera_extrinsics_[camera_id].has_value() || !lidar_frame_.has_value() || + extrinsics_available_) { + return; + } + + try { + geometry_msgs::msg::TransformStamped transform_stamped; + transform_stamped = + tf_buffer_.lookupTransform(msg.header.frame_id, *lidar_frame_, msg.header.stamp); + + Eigen::Matrix4f lidar2camera_transform = + tf2::transformToEigen(transform_stamped.transform).matrix().cast(); + + Matrix4f lidar2camera_rowmajor_transform = lidar2camera_transform.eval(); + lidar2camera_extrinsics_[camera_id] = lidar2camera_rowmajor_transform; + } catch (tf2::TransformException & ex) { + RCLCPP_WARN(this->get_logger(), "%s", ex.what()); + return; + } + + std::size_t num_valid_extrinsics = std::count_if( + lidar2camera_extrinsics_.begin(), lidar2camera_extrinsics_.end(), + [](const auto & opt) { return opt.has_value(); }); + + extrinsics_available_ = num_valid_extrinsics == lidar2camera_extrinsics_.size(); +} + +} // namespace autoware::lidar_bevfusion + +#include "rclcpp_components/register_node_macro.hpp" + +RCLCPP_COMPONENTS_REGISTER_NODE(autoware::lidar_bevfusion::LidarBEVFusionNode) diff --git a/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin.cpp b/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin.cpp new file mode 100644 index 0000000000000..b69cb4e9710a0 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin.cpp @@ -0,0 +1,421 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin.hpp" + +#include "autoware/tensorrt_plugins/plugin_utils.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// GetIndicePairsImplicitGemm + +namespace nvinfer1 +{ +namespace plugin +{ + +GetIndicePairsImplicitGemmPlugin::GetIndicePairsImplicitGemmPlugin( + const std::string & name, GetIndicePairsImplicitGemmParameters const & params) +: layer_name_{name}, params_{params} +{ + initFieldsToSerialize(); +} + +void GetIndicePairsImplicitGemmPlugin::initFieldsToSerialize() +{ + data_to_serialize_.clear(); + data_to_serialize_.emplace_back("batch_size", ¶ms_.batch_size, PluginFieldType::kINT32, 1); + data_to_serialize_.emplace_back("algo", ¶ms_.algo, PluginFieldType::kINT32, 1); + data_to_serialize_.emplace_back("is_train", ¶ms_.is_train, PluginFieldType::kINT32, 1); + data_to_serialize_.emplace_back( + "dilation_dims", ¶ms_.dilation_dims, PluginFieldType::kDIMS, 1); + data_to_serialize_.emplace_back("ksize_dims", ¶ms_.ksize_dims, PluginFieldType::kDIMS, 1); + data_to_serialize_.emplace_back( + "out_padding_dims", ¶ms_.out_padding_dims, PluginFieldType::kDIMS, 1); + data_to_serialize_.emplace_back("padding_dims", ¶ms_.padding_dims, PluginFieldType::kDIMS, 1); + data_to_serialize_.emplace_back( + "spatial_shape_dims", ¶ms_.spatial_shape_dims, PluginFieldType::kDIMS, 1); + data_to_serialize_.emplace_back("stride_dims", ¶ms_.stride_dims, PluginFieldType::kDIMS, 1); + data_to_serialize_.emplace_back("subm", ¶ms_.subm, PluginFieldType::kINT32, 1); + data_to_serialize_.emplace_back("transpose", ¶ms_.transpose, PluginFieldType::kINT32, 1); + + fc_to_serialize_.nbFields = data_to_serialize_.size(); + fc_to_serialize_.fields = data_to_serialize_.data(); +} + +IPluginCapability * GetIndicePairsImplicitGemmPlugin::getCapabilityInterface( + PluginCapabilityType type) noexcept +{ + try { + if (type == PluginCapabilityType::kBUILD) { + return static_cast(this); + } + if (type == PluginCapabilityType::kRUNTIME) { + return static_cast(this); + } + PLUGIN_ASSERT(type == PluginCapabilityType::kCORE); + return static_cast(this); + } catch (std::exception const & e) { + caughtError(e); + } + return nullptr; +} + +IPluginV3 * GetIndicePairsImplicitGemmPlugin::clone() noexcept +{ + try { + IPluginV3 * const plugin{new GetIndicePairsImplicitGemmPlugin{layer_name_, params_}}; + return plugin; + } catch (std::exception const & e) { + caughtError(e); + } + return nullptr; +} + +char const * GetIndicePairsImplicitGemmPlugin::getPluginName() const noexcept +{ + return kGET_INDICE_PAIRS_IMPLICIT_GEMM_PLUGIN_NAME; +} + +char const * GetIndicePairsImplicitGemmPlugin::getPluginVersion() const noexcept +{ + return kGET_INDICE_PAIRS_IMPLICIT_GEMM_PLUGIN_VERSION; +} + +char const * GetIndicePairsImplicitGemmPlugin::getPluginNamespace() const noexcept +{ + return kGET_INDICE_PAIRS_IMPLICIT_GEMM_PLUGIN_NAMESPACE; +} + +std::int32_t GetIndicePairsImplicitGemmPlugin::getNbOutputs() const noexcept +{ + return 5; +} + +std::int32_t GetIndicePairsImplicitGemmPlugin::configurePlugin( + DynamicPluginTensorDesc const * in, std::int32_t num_inputs, DynamicPluginTensorDesc const * out, + std::int32_t num_outputs) noexcept +{ + // Validate input arguments. + PLUGIN_ASSERT(num_inputs == 1); + PLUGIN_ASSERT(num_outputs == 5); + PLUGIN_ASSERT(in[0].desc.dims.nbDims == 2); + PLUGIN_ASSERT(out[0].desc.dims.nbDims == 2); + PLUGIN_ASSERT(out[1].desc.dims.nbDims == 2); + PLUGIN_ASSERT(out[2].desc.dims.nbDims == 2); + PLUGIN_ASSERT(out[3].desc.dims.nbDims == 1); + PLUGIN_ASSERT(out[4].desc.dims.nbDims == 0); + + std::int64_t kernel_volume = 1; + for (const std::int64_t ksize : params_.ksize) { + kernel_volume *= ksize; + } + + PLUGIN_ASSERT(in[0].desc.dims.d[1] == 4); // coords + 1 + + PLUGIN_ASSERT(out[0].desc.dims.d[1] == 4); // coords + 1 + PLUGIN_ASSERT(out[0].desc.type == in[0].desc.type); + + PLUGIN_ASSERT(out[1].desc.dims.d[0] == kernel_volume); + PLUGIN_ASSERT(out[1].desc.type == in[0].desc.type); + + PLUGIN_ASSERT(out[2].desc.dims.d[1] == 1); + PLUGIN_ASSERT(out[2].desc.type == in[0].desc.type); + + PLUGIN_ASSERT(out[3].desc.type == in[0].desc.type); + + PLUGIN_ASSERT(out[4].desc.type == in[0].desc.type); + + return 0; +} + +bool GetIndicePairsImplicitGemmPlugin::supportsFormatCombination( + std::int32_t pos, DynamicPluginTensorDesc const * in_out, std::int32_t num_inputs, + std::int32_t num_outputs) noexcept +{ + PLUGIN_ASSERT(num_inputs == 1); + PLUGIN_ASSERT(num_outputs == 5); + + return ( + in_out[pos].desc.format == nvinfer1::TensorFormat::kLINEAR && + in_out[pos].desc.type == nvinfer1::DataType::kINT32); +} + +std::int32_t GetIndicePairsImplicitGemmPlugin::getOutputDataTypes( + DataType * output_types, std::int32_t num_outputs, DataType const * input_types, + std::int32_t num_inputs) const noexcept +{ + PLUGIN_ASSERT(num_inputs == 1); + PLUGIN_ASSERT(num_outputs == 5); + + output_types[0] = input_types[0]; + output_types[1] = input_types[0]; + output_types[2] = input_types[0]; + output_types[3] = input_types[0]; + output_types[4] = input_types[0]; + + return 0; +} + +std::int32_t GetIndicePairsImplicitGemmPlugin::getOutputShapes( + DimsExprs const * inputs, std::int32_t num_inputs, + [[maybe_unused]] DimsExprs const * shape_inputs, [[maybe_unused]] std::int32_t num_shape_inputs, + DimsExprs * outputs, std::int32_t num_outputs, IExprBuilder & expr_builder) noexcept +{ + PLUGIN_ASSERT(num_inputs == 1); + PLUGIN_ASSERT(num_outputs == 5); + PLUGIN_ASSERT(inputs[0].nbDims == 2); + + std::int64_t kernel_volume = 1; + + for (std::size_t i = 0; i < params_.ksize.size(); ++i) { + kernel_volume *= params_.ksize[i]; + } + + if (params_.subm) { + outputs[0] = inputs[0]; + outputs[0].d[1] = inputs[0].d[1]; + + outputs[1].nbDims = 2; + outputs[1].d[0] = expr_builder.constant(kernel_volume); + outputs[1].d[1] = inputs[0].d[0]; + + outputs[2].nbDims = 2; + outputs[2].d[0] = inputs[0].d[0]; + outputs[2].d[1] = expr_builder.constant(1); + + outputs[3].nbDims = 1; + outputs[3].d[0] = inputs[0].d[0]; + + } else { + auto opt_value = expr_builder.operation( + DimensionOperation::kCEIL_DIV, *inputs[0].d[0], *expr_builder.constant(2)); + + outputs[0].nbDims = 2; + outputs[0].d[0] = + expr_builder.declareSizeTensor(4, *opt_value, *expr_builder.constant(out_inds_num_limit_)); + outputs[0].d[1] = inputs[0].d[1]; + + outputs[1].nbDims = 2; + outputs[1].d[0] = expr_builder.constant(kernel_volume); + outputs[1].d[1] = + expr_builder.declareSizeTensor(4, *opt_value, *expr_builder.constant(out_inds_num_limit_)); + + outputs[2].nbDims = 2; + outputs[2].d[0] = + expr_builder.declareSizeTensor(4, *opt_value, *expr_builder.constant(out_inds_num_limit_)); + outputs[2].d[1] = expr_builder.constant(1); + + outputs[3].nbDims = 1; + outputs[3].d[0] = + expr_builder.declareSizeTensor(4, *opt_value, *expr_builder.constant(out_inds_num_limit_)); + } + + // num_activate_out + outputs[4].nbDims = 0; + + return 0; +} + +std::int32_t GetIndicePairsImplicitGemmPlugin::enqueue( + PluginTensorDesc const * input_desc, [[maybe_unused]] PluginTensorDesc const * output_desc, + void const * const * inputs, void * const * outputs, [[maybe_unused]] void * workspace, + cudaStream_t stream) noexcept +{ + using SpconvOps = spconvlib::spconv::csrc::sparse::all::SpconvOps; + using StaticAllocator = spconvlib::spconv::csrc::sparse::alloc::StaticAllocator; + + const bool is_subm = params_.subm; + const bool direct_table = true; + const bool use_direct_table = direct_table && !is_subm; + const int static_num_act_in = out_inds_num_limit_; + + std::vector ksize(params_.ksize.begin(), params_.ksize.end()); + std::vector stride(params_.stride.begin(), params_.stride.end()); + std::vector padding(params_.padding.begin(), params_.padding.end()); + std::vector dilation(params_.dilation.begin(), params_.dilation.end()); + std::vector input_dims(params_.spatial_shape.begin(), params_.spatial_shape.end()); + + auto out_dims = SpconvOps::get_conv_output_size(input_dims, ksize, stride, padding, dilation); + std::vector output_dims_i64(out_dims.begin(), out_dims.end()); + std::int64_t out_spatial_volume = std::accumulate( + output_dims_i64.begin(), output_dims_i64.end(), static_cast(1), + std::multiplies()); + + bool use_int64_hash_k = + out_spatial_volume >= static_cast(std::numeric_limits::max()); + + int kernel_volume = 1; + for (const auto & ksize : params_.ksize) { + kernel_volume *= ksize; + } + + auto max_act_out_theory = SpconvOps::get_handcrafted_max_act_out( + input_desc[0].dims.d[0], ksize, stride, padding, dilation); + + auto ws_tensors = SpconvOps::get_indice_gen_tensors_from_workspace( + reinterpret_cast(workspace), kernel_volume, out_inds_num_limit_, + is_subm ? out_inds_num_limit_ : out_inds_num_limit_, max_act_out_theory, is_subm, + use_int64_hash_k, use_direct_table); + + int pair_fwd_size_padded = is_subm ? input_desc[0].dims.d[0] : out_inds_num_limit_; + tv::Tensor pair_fwd_padded = + tv::from_blob(outputs[1], {kernel_volume, pair_fwd_size_padded}, tv::int32, 0); + + bool is_split_mask = + params_.algo == static_cast(tv::gemm::SparseConvAlgo::kMaskSplitImplicitGemm); + int mask_count = is_split_mask ? 2 : 1; + + tv::Tensor pair_mask_fwd_padded = + tv::from_blob(outputs[2], {mask_count, pair_fwd_size_padded}, tv::int32, 0); + tv::Tensor mask_argsort_fwd_padded = + tv::from_blob(outputs[3], {mask_count, pair_fwd_size_padded}, tv::int32, 0); + tv::Tensor out_inds = tv::from_blob( + outputs[0], {is_subm ? input_desc[0].dims.d[0] : out_inds_num_limit_, 4}, tv::int32, 0); + tv::Tensor indices_kernel_num = tv::zeros({kernel_volume}, tv::int32, 0); + + tv::Tensor input_indices = tv::from_blob(inputs[0], {input_desc[0].dims.d[0], 4}, tv::int32, 0); + + std::tuple pair_res; + + tv::Context ctx; + ctx.set_cuda_stream_int(reinterpret_cast(stream)); + + if (is_subm) { + out_inds.copy_(input_indices, ctx); + + ws_tensors.insert({SPCONV_ALLOC_PAIR_FWD, pair_fwd_padded}); + ws_tensors.insert({SPCONV_ALLOC_PAIR_MASK, pair_mask_fwd_padded}); + ws_tensors.insert({SPCONV_ALLOC_MASK_ARG_SORT, mask_argsort_fwd_padded}); + ws_tensors.insert({SPCONV_ALLOC_OUT_INDICES, out_inds}); + ws_tensors.insert({SPCONV_ALLOC_INDICE_NUM_PER_LOC, indices_kernel_num}); + StaticAllocator alloc(ws_tensors); + + pair_res = SpconvOps::get_indice_pairs_implicit_gemm( + alloc, input_indices, params_.batch_size, input_dims, static_cast(params_.algo), ksize, + stride, padding, dilation, {0, 0, 0}, params_.subm, params_.transpose, false /*is_train*/, + reinterpret_cast(stream), out_inds_num_limit_, tv::CUDAKernelTimer(false), + use_direct_table); + + } else { + tv::Tensor pair_bwd_padded = tv::empty({kernel_volume, static_num_act_in}, tv::int32, 0); + tv::Tensor pair_mask_bwd_padded = tv::empty({mask_count, static_num_act_in}, tv::int32, 0); + tv::Tensor mask_argsort_bwd_padded = tv::empty({mask_count, static_num_act_in}, tv::int32, 0); + + ws_tensors.insert({SPCONV_ALLOC_PAIR_FWD, pair_fwd_padded}); + ws_tensors.insert({SPCONV_ALLOC_PAIR_BWD, pair_bwd_padded}); + + ws_tensors.insert({SPCONV_ALLOC_PAIR_MASK, pair_mask_fwd_padded}); + ws_tensors.insert({SPCONV_ALLOC_PAIR_MASK_BWD, pair_mask_bwd_padded}); + + ws_tensors.insert({SPCONV_ALLOC_MASK_ARG_SORT, mask_argsort_fwd_padded}); + ws_tensors.insert({SPCONV_ALLOC_MASK_ARG_SORT_BWD, mask_argsort_bwd_padded}); + + ws_tensors.insert({SPCONV_ALLOC_OUT_INDICES, out_inds}); + ws_tensors.insert({SPCONV_ALLOC_INDICE_NUM_PER_LOC, indices_kernel_num}); + + StaticAllocator alloc(ws_tensors); + + pair_res = SpconvOps::get_indice_pairs_implicit_gemm( + alloc, input_indices, params_.batch_size, input_dims, static_cast(params_.algo), ksize, + stride, padding, dilation, {0, 0, 0}, params_.subm, params_.transpose, false /*is_train*/, + reinterpret_cast(stream), out_inds_num_limit_, tv::CUDAKernelTimer(false), + use_direct_table); + } + + std::int32_t num_act_out_real = std::get<1>(pair_res); + std::int32_t * num_act_out_data = static_cast(outputs[4]); + + cudaError_t const status = cudaMemcpyAsync( + num_act_out_data, &num_act_out_real, sizeof(std::int32_t), cudaMemcpyHostToDevice, stream); + + return status; +} + +std::int32_t GetIndicePairsImplicitGemmPlugin::onShapeChange( + [[maybe_unused]] PluginTensorDesc const * in, [[maybe_unused]] std::int32_t num_inputs, + [[maybe_unused]] PluginTensorDesc const * out, [[maybe_unused]] std::int32_t num_outputs) noexcept +{ + return 0; +} + +IPluginV3 * GetIndicePairsImplicitGemmPlugin::attachToContext( + [[maybe_unused]] IPluginResourceContext * context) noexcept +{ + return clone(); +} + +PluginFieldCollection const * GetIndicePairsImplicitGemmPlugin::getFieldsToSerialize() noexcept +{ + return &fc_to_serialize_; +} + +std::size_t GetIndicePairsImplicitGemmPlugin::getWorkspaceSize( + [[maybe_unused]] DynamicPluginTensorDesc const * inputs, [[maybe_unused]] std::int32_t num_inputs, + [[maybe_unused]] DynamicPluginTensorDesc const * outputs, + [[maybe_unused]] std::int32_t num_outputs) const noexcept +{ + using SpconvOps = spconvlib::spconv::csrc::sparse::all::SpconvOps; + + bool is_subm = params_.subm; + const bool direct_table = true; + const bool use_direct_table = direct_table && !is_subm; + + std::vector ksize(params_.ksize.begin(), params_.ksize.end()); + std::vector stride(params_.stride.begin(), params_.stride.end()); + std::vector padding(params_.padding.begin(), params_.padding.end()); + std::vector dilation(params_.dilation.begin(), params_.dilation.end()); + std::vector input_dims(params_.spatial_shape.begin(), params_.spatial_shape.end()); + + auto out_dims = SpconvOps::get_conv_output_size(input_dims, ksize, stride, padding, dilation); + std::vector output_dims_i64(out_dims.begin(), out_dims.end()); + std::int64_t out_spatial_volume = std::accumulate( + output_dims_i64.begin(), output_dims_i64.end(), static_cast(1), + std::multiplies()); + bool use_int64_hash_k = + out_spatial_volume >= static_cast(std::numeric_limits::max()); + + int kernel_volume = 1; + for (const auto & ksize : params_.ksize) { + kernel_volume *= ksize; + } + + // query workspace size. + int workspace_size = SpconvOps::get_indice_gen_workspace_size( + kernel_volume, out_inds_num_limit_, out_inds_num_limit_, out_inds_num_limit_, is_subm, + use_int64_hash_k, use_direct_table); + + return static_cast(workspace_size); +} + +} // namespace plugin +} // namespace nvinfer1 diff --git a/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin_creator.cpp b/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin_creator.cpp new file mode 100644 index 0000000000000..2ddb213cbd265 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin_creator.cpp @@ -0,0 +1,324 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/tensorrt_plugins//get_indice_pairs_implicit_gemm_plugin_creator.hpp" + +#include "autoware/tensorrt_plugins//get_indice_pairs_implicit_gemm_plugin.hpp" +#include "autoware/tensorrt_plugins/plugin_utils.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace nvinfer1 +{ +namespace plugin +{ + +REGISTER_TENSORRT_PLUGIN(GetIndicePairsImplicitGemmPluginCreator); + +GetIndicePairsImplicitGemmPluginCreator::GetIndicePairsImplicitGemmPluginCreator() +{ + std::cout << "GetIndicePairsImplicitGemmPluginCreator::GetIndicePairsImplicitGemmPluginCreator" + << std::endl + << std::flush; + + plugin_attributes_.clear(); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("algo", nullptr, PluginFieldType::kINT32, 1)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("batch_size", nullptr, PluginFieldType::kINT32, 1)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("dilation", nullptr, PluginFieldType::kINT32, 3)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("is_train", nullptr, PluginFieldType::kINT32, 1)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("ksize", nullptr, PluginFieldType::kINT32, 3)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("out_padding", nullptr, PluginFieldType::kINT32, 3)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("padding", nullptr, PluginFieldType::kINT32, 3)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("spatial_shape", nullptr, PluginFieldType::kINT32, 3)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("stride", nullptr, PluginFieldType::kINT32, 3)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("subm", nullptr, PluginFieldType::kINT32, 1)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("transpose", nullptr, PluginFieldType::kINT32, 1)); + + fc_.nbFields = plugin_attributes_.size(); + fc_.fields = plugin_attributes_.data(); +} + +nvinfer1::PluginFieldCollection const * +GetIndicePairsImplicitGemmPluginCreator::getFieldNames() noexcept +{ + // This is only used in the build phase. + return &fc_; +} + +IPluginV3 * GetIndicePairsImplicitGemmPluginCreator::createPlugin( + char const * name, PluginFieldCollection const * fc, TensorRTPhase phase) noexcept +{ + // The build phase and the deserialization phase are handled differently. + if (phase == TensorRTPhase::kBUILD || phase == TensorRTPhase::kRUNTIME) { + // The attributes from the ONNX node will be parsed and passed via fc. + try { + nvinfer1::PluginField const * fields{fc->fields}; + std::int32_t num_fields{fc->nbFields}; + + PLUGIN_VALIDATE(num_fields == 11); + + GetIndicePairsImplicitGemmParameters parameters; + + for (std::int32_t i{0}; i < num_fields; ++i) { + const std::string attr_name = fields[i].name; + const nvinfer1::PluginFieldType type = fields[i].type; + + if (attr_name == "batch_size") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + parameters.batch_size = static_cast(fields[i].data)[0]; + } + if (attr_name == "algo") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + parameters.algo = static_cast(fields[i].data)[0]; + } + if (attr_name == "is_train") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + parameters.is_train = static_cast(fields[i].data)[0]; + } + if (attr_name == "dilation") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + std::int32_t const * const dilation_data{ + static_cast(fields[i].data)}; + + parameters.dilation_dims.nbDims = fields[i].length; + for (std::int32_t j{0}; j < fields[i].length; ++j) { + parameters.dilation.push_back(dilation_data[j]); + parameters.dilation_dims.d[j] = static_cast(dilation_data[j]); + } + } + if (attr_name == "dilation_dims") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kDIMS); + parameters.dilation_dims = static_cast(fields[i].data)[0]; + for (std::int32_t j{0}; j < parameters.dilation_dims.nbDims; ++j) { + parameters.dilation.push_back(static_cast(parameters.dilation_dims.d[j])); + } + } + if (attr_name == "ksize") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + std::int32_t const * const ksize_data{static_cast(fields[i].data)}; + parameters.ksize_dims.nbDims = fields[i].length; + for (std::int32_t j{0}; j < fields[i].length; ++j) { + parameters.ksize.push_back(ksize_data[j]); + parameters.ksize_dims.d[j] = static_cast(ksize_data[j]); + } + } + if (attr_name == "ksize_dims") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kDIMS); + parameters.ksize_dims = static_cast(fields[i].data)[0]; + for (std::int32_t j{0}; j < parameters.ksize_dims.nbDims; ++j) { + parameters.ksize.push_back(static_cast(parameters.ksize_dims.d[j])); + } + } + if (attr_name == "out_padding") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + std::int32_t const * const out_padding_data{ + static_cast(fields[i].data)}; + parameters.out_padding_dims.nbDims = fields[i].length; + for (std::int32_t j{0}; j < fields[i].length; ++j) { + parameters.out_padding.push_back(out_padding_data[j]); + parameters.out_padding_dims.d[j] = static_cast(out_padding_data[j]); + } + } + if (attr_name == "out_padding_dims") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kDIMS); + parameters.out_padding_dims = static_cast(fields[i].data)[0]; + for (std::int32_t j{0}; j < parameters.out_padding_dims.nbDims; ++j) { + parameters.out_padding.push_back( + static_cast(parameters.out_padding_dims.d[j])); + } + } + if (attr_name == "padding") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + std::int32_t const * const padding_data{ + static_cast(fields[i].data)}; + parameters.padding_dims.nbDims = fields[i].length; + for (std::int32_t j{0}; j < fields[i].length; ++j) { + parameters.padding.push_back(padding_data[j]); + parameters.padding_dims.d[j] = static_cast(padding_data[j]); + } + } + if (attr_name == "padding_dims") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kDIMS); + parameters.padding_dims = static_cast(fields[i].data)[0]; + for (std::int32_t j{0}; j < parameters.padding_dims.nbDims; ++j) { + parameters.padding.push_back(static_cast(parameters.padding_dims.d[j])); + } + } + if (attr_name == "spatial_shape") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + std::int32_t const * const spatial_shape_data{ + static_cast(fields[i].data)}; + parameters.spatial_shape_dims.nbDims = fields[i].length; + for (std::int32_t j{0}; j < fields[i].length; ++j) { + parameters.spatial_shape.push_back(spatial_shape_data[j]); + parameters.spatial_shape_dims.d[j] = static_cast(spatial_shape_data[j]); + } + } + if (attr_name == "spatial_shape_dims") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kDIMS); + parameters.spatial_shape_dims = static_cast(fields[i].data)[0]; + for (std::int32_t j{0}; j < parameters.spatial_shape_dims.nbDims; ++j) { + parameters.spatial_shape.push_back( + static_cast(parameters.spatial_shape_dims.d[j])); + } + } + if (attr_name == "stride") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + std::int32_t const * const stride_data{static_cast(fields[i].data)}; + parameters.stride_dims.nbDims = fields[i].length; + for (std::int32_t j{0}; j < fields[i].length; ++j) { + parameters.stride.push_back(stride_data[j]); + parameters.stride_dims.d[j] = static_cast(stride_data[j]); + } + } + if (attr_name == "stride_dims") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kDIMS); + parameters.stride_dims = static_cast(fields[i].data)[0]; + for (std::int32_t j{0}; j < parameters.stride_dims.nbDims; ++j) { + parameters.stride.push_back(static_cast(parameters.stride_dims.d[j])); + } + } + if (attr_name == "subm") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + parameters.subm = static_cast(fields[i].data)[0]; + } + if (attr_name == "transpose") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + parameters.transpose = static_cast(fields[i].data)[0]; + } + } + + // Log the attributes parsed from ONNX node. + std::stringstream ss; + ss << name << " plugin Attributes:"; + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "batch_size: " << parameters.batch_size; + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "algo: " << parameters.algo; + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "is_train: " << parameters.is_train; + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "dilation: "; + for (auto const & val : parameters.dilation) { + ss << val << " "; + } + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "ksize: "; + for (auto const & val : parameters.ksize) { + ss << val << " "; + } + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "out_padding: "; + for (auto const & val : parameters.out_padding) { + ss << val << " "; + } + logDebug(ss.str().c_str()); + + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "padding: "; + for (auto const & val : parameters.padding) { + ss << val << " "; + } + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "spatial_shape: "; + for (auto const & val : parameters.spatial_shape) { + ss << val << " "; + } + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "stride: "; + for (auto const & val : parameters.stride) { + ss << val << " "; + } + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "subm: " << parameters.subm; + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "transpose: " << parameters.transpose; + logDebug(ss.str().c_str()); + + GetIndicePairsImplicitGemmPlugin * const plugin{ + new GetIndicePairsImplicitGemmPlugin{std::string(name), parameters}}; + return plugin; + } catch (std::exception const & e) { + caughtError(e); + } + return nullptr; + } else if (phase == TensorRTPhase::kRUNTIME) { + // The attributes from the serialized plugin will be passed via fc. + try { + nvinfer1::PluginField const * fields{fc->fields}; + std::int32_t num_fields{fc->nbFields}; + PLUGIN_VALIDATE(num_fields == 1); + + char const * attr_name = fields[0].name; + PLUGIN_VALIDATE(!strcmp(attr_name, "parameters")); + PLUGIN_VALIDATE(fields[0].type == nvinfer1::PluginFieldType::kUNKNOWN); + PLUGIN_VALIDATE(fields[0].length == sizeof(GetIndicePairsImplicitGemmParameters)); + GetIndicePairsImplicitGemmParameters params{ + *(static_cast(fields[0].data))}; + + GetIndicePairsImplicitGemmPlugin * const plugin{ + new GetIndicePairsImplicitGemmPlugin{std::string(name), params}}; + return plugin; + } catch (std::exception const & e) { + caughtError(e); + } + return nullptr; + } else { + return nullptr; + } +} + +} // namespace plugin +} // namespace nvinfer1 diff --git a/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/implicit_gemm_plugin.cpp b/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/implicit_gemm_plugin.cpp new file mode 100644 index 0000000000000..fc535fe3534b8 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/implicit_gemm_plugin.cpp @@ -0,0 +1,314 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/tensorrt_plugins/implicit_gemm_plugin.hpp" + +#include "autoware/tensorrt_plugins/plugin_utils.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// ImplicitGemm + +namespace nvinfer1 +{ +namespace plugin +{ + +ImplicitGemmPlugin::ImplicitGemmPlugin( + const std::string & name, ImplicitGemmParameters const & params) +: layer_name_{name}, params_{params} +{ + using ConvGemmOps = spconvlib::spconv::csrc::sparse::convops::spops::ConvGemmOps; + using ConvMain = spconvlib::cumm::conv::main::ConvMainUnitTest; + + initFieldsToSerialize(); + + arch_ = ConvGemmOps::get_compute_capability(); + tunner_ptr_ = std::make_unique(ConvMain::get_all_conv_algo_desp()); +} + +void ImplicitGemmPlugin::initFieldsToSerialize() +{ + data_to_serialize_.clear(); + data_to_serialize_.emplace_back("act_alpha", ¶ms_.act_alpha, PluginFieldType::kFLOAT32, 1); + data_to_serialize_.emplace_back("act_alpha", ¶ms_.act_beta, PluginFieldType::kFLOAT32, 1); + + data_to_serialize_.emplace_back("is_subm", ¶ms_.is_subm, PluginFieldType::kINT32, 1); + data_to_serialize_.emplace_back("is_train", ¶ms_.is_train, PluginFieldType::kINT32, 1); + + data_to_serialize_.emplace_back( + "output_add_scale", ¶ms_.output_add_scale, PluginFieldType::kFLOAT32, 1); + data_to_serialize_.emplace_back( + "output_scale", ¶ms_.output_scale, PluginFieldType::kFLOAT32, 1); + + fc_to_serialize_.nbFields = data_to_serialize_.size(); + fc_to_serialize_.fields = data_to_serialize_.data(); +} + +IPluginCapability * ImplicitGemmPlugin::getCapabilityInterface(PluginCapabilityType type) noexcept +{ + try { + if (type == PluginCapabilityType::kBUILD) { + return static_cast(this); + } + if (type == PluginCapabilityType::kRUNTIME) { + return static_cast(this); + } + PLUGIN_ASSERT(type == PluginCapabilityType::kCORE); + return static_cast(this); + } catch (std::exception const & e) { + caughtError(e); + } + return nullptr; +} + +IPluginV3 * ImplicitGemmPlugin::clone() noexcept +{ + try { + IPluginV3 * const plugin{new ImplicitGemmPlugin{layer_name_, params_}}; + return plugin; + } catch (std::exception const & e) { + caughtError(e); + } + return nullptr; +} + +char const * ImplicitGemmPlugin::getPluginName() const noexcept +{ + return kIMPLICIT_GEMM_PLUGIN_NAME; +} + +char const * ImplicitGemmPlugin::getPluginVersion() const noexcept +{ + return kIMPLICIT_GEMM_PLUGIN_VERSION; +} + +char const * ImplicitGemmPlugin::getPluginNamespace() const noexcept +{ + return kIMPLICIT_GEMM_PLUGIN_NAMESPACE; +} + +std::int32_t ImplicitGemmPlugin::getNbOutputs() const noexcept +{ + return 1; +} + +std::int32_t ImplicitGemmPlugin::configurePlugin( + DynamicPluginTensorDesc const * in, std::int32_t num_inputs, DynamicPluginTensorDesc const * out, + std::int32_t num_outputs) noexcept +{ + // Validate input arguments. + PLUGIN_ASSERT(num_inputs == 5); + PLUGIN_ASSERT(num_outputs == 1); + PLUGIN_ASSERT(in[INOUT_IN_FEATURES_INDEX].desc.dims.nbDims == 2); + PLUGIN_ASSERT(in[INOUT_FILTERS_INDEX].desc.dims.nbDims == 5); + PLUGIN_ASSERT(in[INOUT_PAIR_FWD_INDEX].desc.dims.nbDims == 2); + PLUGIN_ASSERT(in[INOUT_PAIR_MASK_FWD_SPLITS_INDEX].desc.dims.nbDims == 2); + PLUGIN_ASSERT(in[INOUT_MASK_ARGSORT_FWD_SPLITS_INDEX].desc.dims.nbDims == 1); + PLUGIN_ASSERT(out[0].desc.dims.nbDims == 2); + PLUGIN_ASSERT( + in[INOUT_FILTERS_INDEX].desc.dims.d[4] == in[INOUT_IN_FEATURES_INDEX].desc.dims.d[1]); + PLUGIN_ASSERT( + in[INOUT_PAIR_MASK_FWD_SPLITS_INDEX].desc.dims.d[0] == in[INOUT_PAIR_FWD_INDEX].desc.dims.d[1]); + PLUGIN_ASSERT(in[INOUT_PAIR_MASK_FWD_SPLITS_INDEX].desc.dims.d[1] == 1); + PLUGIN_ASSERT( + in[INOUT_MASK_ARGSORT_FWD_SPLITS_INDEX].desc.dims.d[0] == + in[INOUT_PAIR_FWD_INDEX].desc.dims.d[1]); + PLUGIN_ASSERT(in[INOUT_IN_FEATURES_INDEX].desc.type == in[INOUT_FILTERS_INDEX].desc.type); + PLUGIN_ASSERT(in[INOUT_IN_FEATURES_INDEX].desc.type == out[0].desc.type); + PLUGIN_ASSERT( + in[INOUT_PAIR_FWD_INDEX].desc.type == in[INOUT_PAIR_MASK_FWD_SPLITS_INDEX].desc.type); + PLUGIN_ASSERT( + in[INOUT_PAIR_FWD_INDEX].desc.type == in[INOUT_MASK_ARGSORT_FWD_SPLITS_INDEX].desc.type); + + return 0; +} + +bool ImplicitGemmPlugin::supportsFormatCombination( + std::int32_t pos, DynamicPluginTensorDesc const * in_out, std::int32_t num_inputs, + std::int32_t num_outputs) noexcept +{ + PLUGIN_ASSERT(num_inputs == 5); + PLUGIN_ASSERT(num_outputs == 1); + + bool supported = in_out[pos].desc.format == nvinfer1::TensorFormat::kLINEAR; + + switch (pos) { + case INOUT_IN_FEATURES_INDEX: + case INOUT_FILTERS_INDEX: + case INOUT_OUT_FEATURES_INDEX: + supported &= in_out[pos].desc.type == nvinfer1::DataType::kFLOAT; + break; + case INOUT_PAIR_FWD_INDEX: + case INOUT_PAIR_MASK_FWD_SPLITS_INDEX: + case INOUT_MASK_ARGSORT_FWD_SPLITS_INDEX: + supported &= in_out[pos].desc.type == nvinfer1::DataType::kINT32; + break; + default: + supported = false; + break; + } + + return supported; +} + +std::int32_t ImplicitGemmPlugin::getOutputDataTypes( + DataType * output_types, std::int32_t num_outputs, DataType const * input_types, + std::int32_t num_inputs) const noexcept +{ + PLUGIN_ASSERT(num_inputs == 5); + PLUGIN_ASSERT(num_outputs == 1); + + output_types[0] = input_types[INOUT_IN_FEATURES_INDEX]; + + return 0; +} + +std::int32_t ImplicitGemmPlugin::getOutputShapes( + DimsExprs const * inputs, std::int32_t num_inputs, + [[maybe_unused]] DimsExprs const * shape_inputs, [[maybe_unused]] std::int32_t num_shape_inputs, + DimsExprs * outputs, std::int32_t num_outputs, + [[maybe_unused]] IExprBuilder & expr_builder) noexcept +{ + PLUGIN_ASSERT(num_inputs == 5); + PLUGIN_ASSERT(num_outputs == 1); + PLUGIN_ASSERT(inputs[0].nbDims == 2); + + outputs[0].nbDims = 2; + outputs[0].d[0] = inputs[3].d[0]; + outputs[0].d[1] = inputs[1].d[0]; + + return 0; +} + +std::int32_t ImplicitGemmPlugin::enqueue( + PluginTensorDesc const * input_desc, [[maybe_unused]] PluginTensorDesc const * output_desc, + void const * const * inputs, void * const * outputs, [[maybe_unused]] void * workspace, + cudaStream_t stream) noexcept +{ + using StaticAllocator = spconvlib::spconv::csrc::sparse::alloc::StaticAllocator; + using ConvGemmOps = spconvlib::spconv::csrc::sparse::convops::spops::ConvGemmOps; + + std::int64_t num_act_in = input_desc[INOUT_IN_FEATURES_INDEX].dims.d[0]; + std::int64_t num_in_features = input_desc[INOUT_IN_FEATURES_INDEX].dims.d[1]; + // std::int64_t kernel_volume = input_desc[INOUT_PAIR_FWD_INDEX].dims.d[0]; + std::int64_t num_act_out = input_desc[INOUT_PAIR_FWD_INDEX].dims.d[1]; + std::int64_t num_out_features = input_desc[INOUT_FILTERS_INDEX].dims.d[0]; + + tv::Tensor input_features = + tv::from_blob(inputs[INOUT_IN_FEATURES_INDEX], {num_act_in, num_in_features}, tv::float32, 0); + + tv::Tensor weights = tv::from_blob( + inputs[INOUT_FILTERS_INDEX], + {input_desc[INOUT_FILTERS_INDEX].dims.d[0], input_desc[INOUT_FILTERS_INDEX].dims.d[1], + input_desc[INOUT_FILTERS_INDEX].dims.d[2], input_desc[INOUT_FILTERS_INDEX].dims.d[3], + input_desc[INOUT_FILTERS_INDEX].dims.d[4]}, + tv::float32, 0); + + tv::Tensor pair_fwd = tv::from_blob( + inputs[INOUT_PAIR_FWD_INDEX], + {input_desc[INOUT_PAIR_FWD_INDEX].dims.d[0], input_desc[INOUT_PAIR_FWD_INDEX].dims.d[1]}, + tv::int32, 0); + + tv::Tensor pair_mask_fwd_splits = tv::from_blob( + inputs[INOUT_PAIR_MASK_FWD_SPLITS_INDEX], + {1, input_desc[INOUT_PAIR_MASK_FWD_SPLITS_INDEX].dims.d[0]}, tv::int32, 0); + + tv::Tensor mask_argsort_fwd_splits = tv::from_blob( + inputs[INOUT_MASK_ARGSORT_FWD_SPLITS_INDEX], + { + 1, + input_desc[INOUT_MASK_ARGSORT_FWD_SPLITS_INDEX].dims.d[0], + }, + tv::int32, 0); + + PLUGIN_ASSERT( + input_desc[INOUT_PAIR_FWD_INDEX].dims.d[1] == + input_desc[INOUT_MASK_ARGSORT_FWD_SPLITS_INDEX].dims.d[0]); + PLUGIN_ASSERT(input_desc[INOUT_MASK_ARGSORT_FWD_SPLITS_INDEX].dims.nbDims == 1); + + tv::Tensor out_features = + tv::from_blob(outputs[0], {num_act_out, num_out_features}, tv::float32, 0); + + tv::Tensor mask_tensor = tv::zeros({1}, tv::uint32, -1); + + auto mask_tensor_ptr = mask_tensor.data_ptr(); + mask_tensor_ptr[0] = 0xffffffff; + + std::vector pair_mask_splits; + std::vector mask_argsort_splits; + + pair_mask_splits.push_back(pair_mask_fwd_splits); + mask_argsort_splits.push_back(mask_argsort_fwd_splits); + + std::unordered_map tensor_dict{ + {SPCONV_ALLOC_FEATURES, input_features}, + {SPCONV_ALLOC_FILTERS, weights}, + {SPCONV_ALLOC_OUT_FEATURES, out_features}}; + StaticAllocator alloc2(tensor_dict); + + auto conv_run_status = ConvGemmOps::implicit_gemm( + alloc2, *tunner_ptr_, input_features, weights, pair_fwd, pair_mask_splits, mask_argsort_splits, + num_act_out, mask_tensor, arch_, false, params_.is_subm, + reinterpret_cast(stream), tv::CUDAKernelTimer(false), true, false, tv::Tensor(), + 0.0, 0.0, tv::gemm::Activation::kNone, false, 1.0, tv::Tensor(), tv::Tensor(), 0.0, 0); + + return 0; +} + +std::int32_t ImplicitGemmPlugin::onShapeChange( + [[maybe_unused]] PluginTensorDesc const * in, [[maybe_unused]] std::int32_t num_inputs, + [[maybe_unused]] PluginTensorDesc const * out, [[maybe_unused]] std::int32_t num_outputs) noexcept +{ + return 0; +} + +IPluginV3 * ImplicitGemmPlugin::attachToContext( + [[maybe_unused]] IPluginResourceContext * context) noexcept +{ + return clone(); +} + +PluginFieldCollection const * ImplicitGemmPlugin::getFieldsToSerialize() noexcept +{ + return &fc_to_serialize_; +} + +std::size_t ImplicitGemmPlugin::getWorkspaceSize( + [[maybe_unused]] DynamicPluginTensorDesc const * inputs, [[maybe_unused]] std::int32_t num_inputs, + [[maybe_unused]] DynamicPluginTensorDesc const * outputs, + [[maybe_unused]] std::int32_t num_outputs) const noexcept +{ + return 0; +} + +} // namespace plugin +} // namespace nvinfer1 diff --git a/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/implicit_gemm_plugin_creator.cpp b/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/implicit_gemm_plugin_creator.cpp new file mode 100644 index 0000000000000..9c77b04d883d2 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/implicit_gemm_plugin_creator.cpp @@ -0,0 +1,174 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/tensorrt_plugins/implicit_gemm_plugin_creator.hpp" + +#include "autoware/tensorrt_plugins/implicit_gemm_plugin.hpp" +#include "autoware/tensorrt_plugins/plugin_utils.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace nvinfer1 +{ +namespace plugin +{ + +REGISTER_TENSORRT_PLUGIN(ImplicitGemmPluginCreator); + +ImplicitGemmPluginCreator::ImplicitGemmPluginCreator() +{ + std::cout << "ImplicitGemmPluginCreator::ImplicitGemmPluginCreator" << std::endl << std::flush; + + plugin_attributes_.clear(); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("act_alpha", nullptr, PluginFieldType::kFLOAT32, 1)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("act_beta", nullptr, PluginFieldType::kFLOAT32, 1)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("is_subm", nullptr, PluginFieldType::kINT32, 1)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("is_train", nullptr, PluginFieldType::kINT32, 1)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("output_add_scale", nullptr, PluginFieldType::kFLOAT32, 1)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("output_scale", nullptr, PluginFieldType::kFLOAT32, 1)); + + fc_.nbFields = plugin_attributes_.size(); + fc_.fields = plugin_attributes_.data(); +} + +nvinfer1::PluginFieldCollection const * ImplicitGemmPluginCreator::getFieldNames() noexcept +{ + // This is only used in the build phase. + return &fc_; +} + +IPluginV3 * ImplicitGemmPluginCreator::createPlugin( + char const * name, PluginFieldCollection const * fc, TensorRTPhase phase) noexcept +{ + // The build phase and the deserialization phase are handled differently. + if (phase == TensorRTPhase::kBUILD || phase == TensorRTPhase::kRUNTIME) { + // The attributes from the ONNX node will be parsed and passed via fc. + try { + nvinfer1::PluginField const * fields{fc->fields}; + std::int32_t num_fields{fc->nbFields}; + + PLUGIN_VALIDATE(num_fields == 6); + + ImplicitGemmParameters parameters; + + for (std::int32_t i{0}; i < num_fields; ++i) { + const std::string attr_name = fields[i].name; + const nvinfer1::PluginFieldType type = fields[i].type; + + if (attr_name == "act_alpha") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kFLOAT32); + parameters.act_alpha = static_cast(fields[i].data)[0]; + } + + if (attr_name == "act_beta") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kFLOAT32); + parameters.act_beta = static_cast(fields[i].data)[0]; + } + + if (attr_name == "is_subm") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + parameters.is_subm = static_cast(fields[i].data)[0]; + } + + if (attr_name == "is_train") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + parameters.is_train = static_cast(fields[i].data)[0]; + } + + if (attr_name == "output_add_scale") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kFLOAT32); + parameters.output_add_scale = static_cast(fields[i].data)[0]; + } + + if (attr_name == "output_scale") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kFLOAT32); + parameters.output_scale = static_cast(fields[i].data)[0]; + } + } + + // Log the attributes parsed from ONNX node. + std::stringstream ss; + ss << name << " plugin Attributes:"; + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "act_alpha: " << parameters.act_alpha; + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "act_beta: " << parameters.act_beta; + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "is_subm: " << parameters.is_subm; + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "is_train: " << parameters.is_train; + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "output_add_scale: " << parameters.output_add_scale; + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "output_scale: " << parameters.output_scale; + logDebug(ss.str().c_str()); + + ImplicitGemmPlugin * const plugin{new ImplicitGemmPlugin{std::string(name), parameters}}; + return plugin; + } catch (std::exception const & e) { + caughtError(e); + } + return nullptr; + } else if (phase == TensorRTPhase::kRUNTIME) { + // The attributes from the serialized plugin will be passed via fc. + try { + nvinfer1::PluginField const * fields{fc->fields}; + std::int32_t num_fields{fc->nbFields}; + PLUGIN_VALIDATE(num_fields == 1); + + char const * attr_name = fields[0].name; + PLUGIN_VALIDATE(!strcmp(attr_name, "parameters")); + PLUGIN_VALIDATE(fields[0].type == nvinfer1::PluginFieldType::kUNKNOWN); + PLUGIN_VALIDATE(fields[0].length == sizeof(ImplicitGemmParameters)); + ImplicitGemmParameters params{*(static_cast(fields[0].data))}; + + ImplicitGemmPlugin * const plugin{new ImplicitGemmPlugin{std::string(name), params}}; + return plugin; + } catch (std::exception const & e) { + caughtError(e); + } + return nullptr; + } else { + return nullptr; + } +} + +} // namespace plugin +} // namespace nvinfer1 diff --git a/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/plugin_registration.cpp b/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/plugin_registration.cpp new file mode 100644 index 0000000000000..1462cba3d656f --- /dev/null +++ b/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/plugin_registration.cpp @@ -0,0 +1,73 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/tensorrt_plugins/get_indice_pairs_implicit_gemm_plugin_creator.hpp" +#include "autoware/tensorrt_plugins/implicit_gemm_plugin_creator.hpp" +#include "autoware/tensorrt_plugins/quick_cumsum_cuda_plugin_creator.hpp" + +#include + +#include +#include +#include + +class ThreadSafeLoggerFinder +{ +public: + ThreadSafeLoggerFinder() = default; + + // Set the logger finder. + void setLoggerFinder(nvinfer1::ILoggerFinder * finder) + { + std::lock_guard lk(mutex_); + if (logger_finder_ == nullptr && finder != nullptr) { + logger_finder_ = finder; + } + } + + // Get the logger. + nvinfer1::ILogger * getLogger() noexcept + { + std::lock_guard lk(mutex_); + if (logger_finder_ != nullptr) { + return logger_finder_->findLogger(); + } + return nullptr; + } + +private: + nvinfer1::ILoggerFinder * logger_finder_{nullptr}; + std::mutex mutex_; +}; + +ThreadSafeLoggerFinder glogger_finder; + +extern "C" void setLoggerFinder(nvinfer1::ILoggerFinder * finder) +{ + glogger_finder.setLoggerFinder(finder); +} + +extern "C" nvinfer1::IPluginCreatorInterface * const * getCreators(std::int32_t & num_creators) +{ + num_creators = 3; + static nvinfer1::plugin::QuickCumsumCudaPluginCreator quick_cumsum_cuda_plugin_creator{}; + static nvinfer1::plugin::GetIndicePairsImplicitGemmPluginCreator + get_indice_pairs_implicit_gemm_plugin_creator{}; + static nvinfer1::plugin::ImplicitGemmPluginCreator implicit_gemm_plugin_creator{}; + + static nvinfer1::IPluginCreatorInterface * const plugin_creator_list[] = { + &quick_cumsum_cuda_plugin_creator, &get_indice_pairs_implicit_gemm_plugin_creator, + &implicit_gemm_plugin_creator}; + return plugin_creator_list; +} diff --git a/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/plugin_utils.cpp b/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/plugin_utils.cpp new file mode 100644 index 0000000000000..cabc121e30910 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/plugin_utils.cpp @@ -0,0 +1,57 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/tensorrt_plugins/plugin_utils.hpp" + +#include + +#include +#include +#include + +void caughtError(std::exception const & e) +{ + getLogger()->log(nvinfer1::ILogger::Severity::kINTERNAL_ERROR, e.what()); +} + +void logDebug(char const * msg) +{ + getLogger()->log(nvinfer1::ILogger::Severity::kVERBOSE, msg); +} + +void logInfo(char const * msg) +{ + getLogger()->log(nvinfer1::ILogger::Severity::kINFO, msg); +} + +void reportAssertion(bool success, char const * msg, char const * file, std::int32_t line) +{ + if (!success) { + std::ostringstream stream; + stream << "Assertion failed: " << msg << std::endl + << file << ':' << line << std::endl + << "Aborting..." << std::endl; + getLogger()->log(nvinfer1::ILogger::Severity::kINTERNAL_ERROR, stream.str().c_str()); + std::abort(); + } +} + +void reportValidation(bool success, char const * msg, char const * file, std::int32_t line) +{ + if (!success) { + std::ostringstream stream; + stream << "Validation failed: " << msg << std::endl << file << ':' << line << std::endl; + getLogger()->log(nvinfer1::ILogger::Severity::kINTERNAL_ERROR, stream.str().c_str()); + } +} diff --git a/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/quick_cumsum_cuda_plugin.cpp b/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/quick_cumsum_cuda_plugin.cpp new file mode 100644 index 0000000000000..781b8e3fe5539 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/quick_cumsum_cuda_plugin.cpp @@ -0,0 +1,245 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/tensorrt_plugins/quick_cumsum_cuda_plugin.hpp" + +#include "autoware/bev_ops/bev_pool_cuda.h" +#include "autoware/tensorrt_plugins/plugin_utils.hpp" + +#include +#include + +#include // NOTE(knzo25): temporary +#include +#include +#include +#include +#include +#include + +namespace nvinfer1 +{ +namespace plugin +{ + +QuickCumsumCudaPlugin::QuickCumsumCudaPlugin( + const std::string & name, QuickCumsumCudaParameters const & params) +: layer_name_{name}, params_{params} +{ + initFieldsToSerialize(); +} + +void QuickCumsumCudaPlugin::initFieldsToSerialize() +{ + data_to_serialize_.clear(); + data_to_serialize_.emplace_back("batch_size", ¶ms_.batch_size, PluginFieldType::kINT32, 1); + data_to_serialize_.emplace_back("dimension", ¶ms_.dimension, PluginFieldType::kINT32, 1); + data_to_serialize_.emplace_back("height", ¶ms_.height, PluginFieldType::kINT32, 1); + data_to_serialize_.emplace_back("width", ¶ms_.width, PluginFieldType::kINT32, 1); + + fc_to_serialize_.nbFields = data_to_serialize_.size(); + fc_to_serialize_.fields = data_to_serialize_.data(); +} + +IPluginCapability * QuickCumsumCudaPlugin::getCapabilityInterface( + PluginCapabilityType type) noexcept +{ + try { + if (type == PluginCapabilityType::kBUILD) { + return static_cast(this); + } + if (type == PluginCapabilityType::kRUNTIME) { + return static_cast(this); + } + PLUGIN_ASSERT(type == PluginCapabilityType::kCORE); + return static_cast(this); + } catch (std::exception const & e) { + caughtError(e); + } + return nullptr; +} + +IPluginV3 * QuickCumsumCudaPlugin::clone() noexcept +{ + try { + IPluginV3 * const plugin{new QuickCumsumCudaPlugin{layer_name_, params_}}; + return plugin; + } catch (std::exception const & e) { + caughtError(e); + } + return nullptr; +} + +char const * QuickCumsumCudaPlugin::getPluginName() const noexcept +{ + return kQUICK_CUMSUM_CUDA_PLUGIN_NAME; +} + +char const * QuickCumsumCudaPlugin::getPluginVersion() const noexcept +{ + return kQUICK_CUMSUM_CUDA_PLUGIN_VERSION; +} + +char const * QuickCumsumCudaPlugin::getPluginNamespace() const noexcept +{ + return kQUICK_CUMSUM_CUDA_PLUGIN_NAMESPACE; +} + +std::int32_t QuickCumsumCudaPlugin::getNbOutputs() const noexcept +{ + return 1; +} + +std::int32_t QuickCumsumCudaPlugin::configurePlugin( + DynamicPluginTensorDesc const * in, std::int32_t num_inputs, DynamicPluginTensorDesc const * out, + std::int32_t num_outputs) noexcept +{ + // Validate input arguments. + PLUGIN_ASSERT(num_inputs == 4); + PLUGIN_ASSERT(num_outputs == 1); + PLUGIN_ASSERT(in[0].desc.dims.nbDims == 2); + PLUGIN_ASSERT(in[1].desc.dims.nbDims == 2); + PLUGIN_ASSERT(in[2].desc.dims.nbDims == 1); + PLUGIN_ASSERT(in[3].desc.dims.nbDims == 1); + + PLUGIN_ASSERT(out[0].desc.dims.nbDims == 5); + + PLUGIN_ASSERT(in[0].desc.dims.d[0] == -1); + PLUGIN_ASSERT(in[1].desc.dims.d[0] == -1); + PLUGIN_ASSERT(in[1].desc.dims.d[1] == 4); + + PLUGIN_ASSERT(in[2].desc.dims.d[0] == -1); + PLUGIN_ASSERT(in[3].desc.dims.d[0] == -1); + + PLUGIN_ASSERT(out[0].desc.type == in[0].desc.type); + + return 0; +} + +bool QuickCumsumCudaPlugin::supportsFormatCombination( + std::int32_t pos, DynamicPluginTensorDesc const * in_out, std::int32_t num_inputs, + std::int32_t num_outputs) noexcept +{ + PLUGIN_ASSERT(num_inputs == 4 && num_outputs == 1 && pos < num_inputs + num_outputs); + bool valid; + + const std::int32_t INPUT_FEATURES_INDEX = 0; + const std::int32_t INPUT_GEOM_FEATURES_INDEX = 1; + const std::int32_t INPUT_INTERVAL_LENGTH_INDEX = 2; + const std::int32_t INPUT_INTERVAL_START_INDEX = 3; + const std::int32_t OUTPUT_INDEX = 4; + + switch (pos) { + case INPUT_FEATURES_INDEX: + case OUTPUT_INDEX: + valid = in_out[pos].desc.format == nvinfer1::TensorFormat::kLINEAR && + in_out[pos].desc.type == nvinfer1::DataType::kFLOAT; + break; + case INPUT_GEOM_FEATURES_INDEX: + case INPUT_INTERVAL_LENGTH_INDEX: + case INPUT_INTERVAL_START_INDEX: + valid = in_out[pos].desc.format == nvinfer1::TensorFormat::kLINEAR && + in_out[pos].desc.type == nvinfer1::DataType::kINT32; + break; + default: + valid = false; + break; + } + + return valid; +} + +std::int32_t QuickCumsumCudaPlugin::getOutputDataTypes( + DataType * output_types, std::int32_t num_outputs, DataType const * input_types, + std::int32_t num_inputs) const noexcept +{ + PLUGIN_ASSERT(num_inputs == 4); + PLUGIN_ASSERT(num_outputs == 1); + output_types[0] = input_types[0]; + return 0; +} + +std::int32_t QuickCumsumCudaPlugin::getOutputShapes( + DimsExprs const * inputs, std::int32_t num_inputs, + [[maybe_unused]] DimsExprs const * shape_inputs, [[maybe_unused]] std::int32_t num_shape_inputs, + DimsExprs * outputs, std::int32_t num_outputs, IExprBuilder & expr_builder) noexcept +{ + PLUGIN_ASSERT(num_inputs == 4); + PLUGIN_ASSERT(num_outputs == 1); + PLUGIN_ASSERT(inputs != nullptr); + PLUGIN_ASSERT(inputs[0].nbDims == 2); + + outputs[0].nbDims = 5; + outputs[0].d[0] = expr_builder.constant(static_cast(params_.batch_size)); + outputs[0].d[1] = expr_builder.constant(static_cast(params_.dimension)); + outputs[0].d[2] = expr_builder.constant(static_cast(params_.height)); + outputs[0].d[3] = expr_builder.constant(static_cast(params_.width)); + outputs[0].d[4] = inputs[0].d[1]; + + return 0; +} + +std::int32_t QuickCumsumCudaPlugin::enqueue( + PluginTensorDesc const * input_desc, [[maybe_unused]] PluginTensorDesc const * output_desc, + void const * const * inputs, void * const * outputs, [[maybe_unused]] void * workspace, + cudaStream_t stream) noexcept +{ + const auto output_features_num = input_desc[0].dims.d[0]; + const auto output_features_dim = input_desc[0].dims.d[1]; + const auto intervals_num = input_desc[2].dims.d[0]; + + cudaMemsetAsync( + outputs[0], 0, + params_.batch_size * params_.dimension * params_.height * params_.width * output_features_dim * + sizeof(float), + stream); + + bev_pool( + static_cast(params_.batch_size), static_cast(params_.dimension), + static_cast(params_.height), static_cast(params_.width), + output_features_num, output_features_dim, intervals_num, static_cast(inputs[0]), + static_cast(inputs[1]), static_cast(inputs[3]), + static_cast(inputs[2]), static_cast(outputs[0]), stream); + + return cudaSuccess; +} + +std::int32_t QuickCumsumCudaPlugin::onShapeChange( + [[maybe_unused]] PluginTensorDesc const * in, [[maybe_unused]] std::int32_t num_inputs, + [[maybe_unused]] PluginTensorDesc const * out, [[maybe_unused]] std::int32_t num_outputs) noexcept +{ + return 0; +} + +IPluginV3 * QuickCumsumCudaPlugin::attachToContext( + [[maybe_unused]] IPluginResourceContext * context) noexcept +{ + return clone(); +} + +PluginFieldCollection const * QuickCumsumCudaPlugin::getFieldsToSerialize() noexcept +{ + return &fc_to_serialize_; +} + +std::size_t QuickCumsumCudaPlugin::getWorkspaceSize( + [[maybe_unused]] DynamicPluginTensorDesc const * inputs, [[maybe_unused]] std::int32_t num_inputs, + [[maybe_unused]] DynamicPluginTensorDesc const * outputs, + [[maybe_unused]] std::int32_t num_outputs) const noexcept +{ + return 0; +} + +} // namespace plugin +} // namespace nvinfer1 diff --git a/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/quick_cumsum_cuda_plugin_creator.cpp b/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/quick_cumsum_cuda_plugin_creator.cpp new file mode 100644 index 0000000000000..c74edc03b4a47 --- /dev/null +++ b/perception/autoware_lidar_bevfusion/src/tensorrt_plugins/quick_cumsum_cuda_plugin_creator.cpp @@ -0,0 +1,143 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/tensorrt_plugins/quick_cumsum_cuda_plugin_creator.hpp" + +#include "autoware/tensorrt_plugins/plugin_utils.hpp" +#include "autoware/tensorrt_plugins/quick_cumsum_cuda_plugin.hpp" + +#include + +#include +#include +#include +#include +#include +#include + +namespace nvinfer1 +{ +namespace plugin +{ + +REGISTER_TENSORRT_PLUGIN(QuickCumsumCudaPluginCreator); + +QuickCumsumCudaPluginCreator::QuickCumsumCudaPluginCreator() +{ + plugin_attributes_.clear(); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("batch_size", nullptr, PluginFieldType::kINT32, 1)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("dimension", nullptr, PluginFieldType::kINT32, 1)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("height", nullptr, PluginFieldType::kINT32, 1)); + plugin_attributes_.emplace_back( + nvinfer1::PluginField("width", nullptr, PluginFieldType::kINT32, 1)); + + fc_.nbFields = plugin_attributes_.size(); + fc_.fields = plugin_attributes_.data(); +} + +nvinfer1::PluginFieldCollection const * QuickCumsumCudaPluginCreator::getFieldNames() noexcept +{ + // This is only used in the build phase. + return &fc_; +} + +IPluginV3 * QuickCumsumCudaPluginCreator::createPlugin( + char const * name, PluginFieldCollection const * fc, TensorRTPhase phase) noexcept +{ + if (phase == TensorRTPhase::kBUILD || phase == TensorRTPhase::kRUNTIME) { + try { + nvinfer1::PluginField const * fields{fc->fields}; + std::int32_t num_fields{fc->nbFields}; + + PLUGIN_VALIDATE(num_fields == 4); + + QuickCumsumCudaParameters parameters; + + for (std::int32_t i{0}; i < num_fields; ++i) { + const std::string attr_name = fields[i].name; + const nvinfer1::PluginFieldType type = fields[i].type; + + if (attr_name == "batch_size") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + parameters.batch_size = *static_cast(fields[i].data); + } else if (attr_name == "dimension") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + parameters.dimension = *static_cast(fields[i].data); + } else if (attr_name == "height") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + parameters.height = *static_cast(fields[i].data); + } else if (attr_name == "width") { + PLUGIN_VALIDATE(type == nvinfer1::PluginFieldType::kINT32); + parameters.width = *static_cast(fields[i].data); + } + } + + // Log the attributes parsed from ONNX node. + std::stringstream ss; + ss << name << " plugin Attributes:"; + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "batch_size: " << parameters.batch_size; + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "dimension: " << parameters.dimension; + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "height: " << parameters.height; + logDebug(ss.str().c_str()); + + ss.str(""); + ss << "width: " << parameters.width; + logDebug(ss.str().c_str()); + + QuickCumsumCudaPlugin * const plugin{ + new QuickCumsumCudaPlugin{std::string(name), parameters}}; + return plugin; + } catch (std::exception const & e) { + caughtError(e); + } + return nullptr; + } else if (phase == TensorRTPhase::kRUNTIME) { + // The attributes from the serialized plugin will be passed via fc. + try { + nvinfer1::PluginField const * fields{fc->fields}; + std::int32_t num_fields{fc->nbFields}; + PLUGIN_VALIDATE(num_fields == 1); + + char const * attr_name = fields[0].name; + PLUGIN_VALIDATE(!strcmp(attr_name, "parameters")); + PLUGIN_VALIDATE(fields[0].type == nvinfer1::PluginFieldType::kUNKNOWN); + PLUGIN_VALIDATE(fields[0].length == sizeof(QuickCumsumCudaParameters)); + QuickCumsumCudaParameters params{ + *(static_cast(fields[0].data))}; + + QuickCumsumCudaPlugin * const plugin{new QuickCumsumCudaPlugin{std::string(name), params}}; + return plugin; + } catch (std::exception const & e) { + caughtError(e); + } + return nullptr; + } else { + return nullptr; + } +} + +} // namespace plugin +} // namespace nvinfer1 diff --git a/sensing/autoware_cuda_utils/include/autoware/cuda_utils/cuda_utils.hpp b/sensing/autoware_cuda_utils/include/autoware/cuda_utils/cuda_utils.hpp new file mode 100644 index 0000000000000..9d0a0f3e3e015 --- /dev/null +++ b/sensing/autoware_cuda_utils/include/autoware/cuda_utils/cuda_utils.hpp @@ -0,0 +1,36 @@ +// Copyright 2025 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This code is licensed under CC0 1.0 Universal (Public Domain). +// You can use this without any limitation. +// https://creativecommons.org/publicdomain/zero/1.0/deed.en +// borrowed from https://proc-cpuinfo.fixstars.com/2019/02/cuda_smart_pointer/ + +#ifndef AUTOWARE__CUDA_UTILS__CUDA_UTILS_HPP_ +#define AUTOWARE__CUDA_UTILS__CUDA_UTILS_HPP_ + +#include "autoware/cuda_utils/cuda_check_error.hpp" + +#include + +namespace autoware::cuda_utils +{ +template +void clear_async(T * ptr, std::size_t num_elem, cudaStream_t stream) +{ + CHECK_CUDA_ERROR(::cudaMemsetAsync(ptr, 0, sizeof(T) * num_elem, stream)); +} +} // namespace autoware::cuda_utils + +#endif // AUTOWARE__CUDA_UTILS__CUDA_UTILS_HPP_