diff --git a/perception/tensorrt_mtr/config/tensorrt_mtr.param.yaml b/perception/tensorrt_mtr/config/tensorrt_mtr.param.yaml index 6e969d21c638a..3d88066f6582e 100644 --- a/perception/tensorrt_mtr/config/tensorrt_mtr.param.yaml +++ b/perception/tensorrt_mtr/config/tensorrt_mtr.param.yaml @@ -3,13 +3,12 @@ model_path: "$(var data_path)/mtr_static.onnx" precision: "fp32" target_labels: ["VEHICLE", "PEDESTRIAN", "CYCLIST"] - num_past: 10 + num_past: 11 num_mode: 6 num_future: 80 - max_num_polyline: 798 + max_num_polyline: 768 max_num_point: 20 point_break_distance: 1.0 - offset_xy: [30.0, 0.0] intention_point_filepath: "$(var data_path)/intention_point.csv" num_intention_point_cluster: 64 polyline_label_path: "$(var data_path)/polyline_label.txt" diff --git a/perception/tensorrt_mtr/include/tensorrt_mtr/agent.hpp b/perception/tensorrt_mtr/include/tensorrt_mtr/agent.hpp index cc0d7af2488ec..d789d44e36b44 100644 --- a/perception/tensorrt_mtr/include/tensorrt_mtr/agent.hpp +++ b/perception/tensorrt_mtr/include/tensorrt_mtr/agent.hpp @@ -34,9 +34,7 @@ enum AgentLabel { VEHICLE = 0, PEDESTRIAN = 1, CYCLIST = 2 }; */ struct AgentState { - /** - * @brief Construct a new instance filling all elements by `0.0f`. - */ + // Construct a new instance filling all elements by `0.0f`. AgentState() : data_({0.0f}) {} /** @@ -84,33 +82,49 @@ struct AgentState std::copy(itr, itr + dim(), data_.begin()); } - static size_t dim() { return AgentStateDim; } - - /** - * @brief Construct a new instance filling all elements by `0.0f`. - * - * @return AgentState - */ + // Construct a new instance filling all elements by `0.0f`. static AgentState empty() noexcept { return AgentState(); } - /** - * @brief Return the address pointer of data array. - * - * @return float* - */ + // Return the agent state dimensions `D`. + static size_t dim() { return AgentStateDim; } + + // Return the address pointer of data array. float * data_ptr() noexcept { return data_.data(); } + // Return the x position. float x() const { return x_; } + + // Return the y position. float y() const { return y_; } + + // Return the z position. float z() const { return z_; } + + // Return the length of object size. float length() const { return length_; } + + // Return the width of object size. float width() const { return width_; } + + // Return the height of object size. float height() const { return height_; } + + // Return the yaw angle `[rad]`. float yaw() const { return yaw_; } + + // Return the x velocity. float vx() const { return vx_; } + + // Return the y velocity. float vy() const { return vy_; } + + // Return the x acceleration. float ax() const { return ax_; } + + // Return the y acceleration. float ay() const { return ay_; } + + // Return `true` if the value is `1.0`. bool is_valid() const { return is_valid_ == 1.0f; } private: @@ -163,34 +177,19 @@ struct AgentHistory { } - static size_t state_dim() { return AgentStateDim; } - - /** - * @brief Return the history length. - * - * @return size_t History length. - */ + // Return the history time length `T`. size_t length() const { return max_time_length_; } - /** - * @brief Return the data size of history `T * D`. - * - * @return size_t - */ + // Return the number of agent state dimensions `D`. + static size_t state_dim() { return AgentStateDim; } + + // Return the data size of history `T * D`. size_t size() const { return max_time_length_ * state_dim(); } - /** - * @brief Return the shape of history matrix ordering in `(T, D)`. - * - * @return std::tuple - */ + // Return the shape of history matrix ordering in `(T, D)`. std::tuple shape() const { return {max_time_length_, state_dim()}; } - /** - * @brief Return the object id. - * - * @return const std::string& - */ + // Return the object id. const std::string & object_id() const { return object_id_; } size_t label_index() const { return label_index_; } @@ -220,10 +219,7 @@ struct AgentHistory latest_time_ = current_time; } - /** - * @brief Update history with all-zeros state, but latest time is not updated. - * - */ + // Update history with all-zeros state, but latest time is not updated. void update_empty() noexcept { // remove the state at the oldest timestamp @@ -235,11 +231,7 @@ struct AgentHistory } } - /** - * @brief Return the address pointer of data array. - * - * @return float* The pointer of data array. - */ + // Return the address pointer of data array. float * data_ptr() noexcept { return data_.data(); } /** @@ -264,11 +256,7 @@ struct AgentHistory */ bool is_valid_latest() const { return get_latest_state().is_valid(); } - /** - * @brief Get the latest agent state. - * - * @return AgentState - */ + // Get the latest agent state at `T`. AgentState get_latest_state() const { const auto & latest_itr = (data_.begin() + state_dim() * (max_time_length_ - 1)); @@ -338,49 +326,55 @@ struct AgentData } } - static size_t state_dim() { return AgentStateDim; } + // Return the number of classes `C`. static size_t num_class() { return 3; } // TODO(ktro2828): Do not use magic number. + + // Return the number of target agents `B`. size_t num_target() const { return num_target_; } + + // Return the number of agents `N`. size_t num_agent() const { return num_agent_; } + + // Return the timestamp length `T`. size_t time_length() const { return time_length_; } + + // Return the number of agent state dimensions `D`. + static size_t state_dim() { return AgentStateDim; } + + // Return the index of ego. int sdc_index() const { return sdc_index_; } + + // Return the vector of indices of target agents, in shape `[B]`. const std::vector & target_index() const { return target_index_; } + + // Return the vector of label indices of all agents, in shape `[N]`. const std::vector & label_index() const { return label_index_; } + + // Return the vector of label indices of target agents, in shape `[B]`. const std::vector & target_label_index() const { return target_label_index_; } + + // Return the vector of timestamps in shape `[T]`. const std::vector & timestamps() const { return timestamps_; } + // Return the number of all elements `N*T*D`. size_t size() const { return num_agent_ * time_length_ * state_dim(); } - size_t input_dim() const { return num_agent_ + time_length_ + num_class() + 3; } - /** - * @brief Return the data shape ordering in (N, T, D). - * - * @return std::tuple - */ + // Return the number of state dimensions of MTR input `T+C+D+3`. + size_t input_dim() const { return time_length_ + state_dim() + num_class() + 3; } + + // Return the data shape ordering in (N, T, D). std::tuple shape() const { return {num_agent_, time_length_, state_dim()}; } - /** - * @brief Return the address pointer of data array. - * - * @return float* The pointer of data array. - */ + // Return the address pointer of data array. float * data_ptr() noexcept { return data_.data(); } - /** - * @brief Return the address pointer of data array for target agents. - * - * @return float* The pointer of data array for target agents. - */ + // Return the address pointer of data array for target agents. float * target_data_ptr() noexcept { return target_data_.data(); } - /** - * @brief Return the address pointer of data array for ego vehicle. - * - * @return float* The pointer of data array for ego vehicle. - */ + // Return the address pointer of data array for ego vehicle. float * ego_data_ptr() noexcept { return ego_data_.data(); } private: @@ -397,6 +391,7 @@ struct AgentData std::vector ego_data_; }; +// Get label names from label indices. std::vector getLabelNames(const std::vector & label_index) { std::vector label_names; diff --git a/perception/tensorrt_mtr/include/tensorrt_mtr/intention_point.hpp b/perception/tensorrt_mtr/include/tensorrt_mtr/intention_point.hpp index 0e957bb0d4e4b..4e40a7acab22c 100644 --- a/perception/tensorrt_mtr/include/tensorrt_mtr/intention_point.hpp +++ b/perception/tensorrt_mtr/include/tensorrt_mtr/intention_point.hpp @@ -107,18 +107,10 @@ struct IntentionPoint return points; } - /** - * @brief Return the size of intension point K * D. - * - * @return size_t - */ + // Return the size of intension point `K*D`. size_t size() const { return num_cluster_ * state_dim(); } - /** - * @brief Return the number of clusters contained in intention points. - * - * @return size_t - */ + // Return the number of clusters contained in intention points `K`. size_t num_cluster() const { return num_cluster_; } private: diff --git a/perception/tensorrt_mtr/include/tensorrt_mtr/node.hpp b/perception/tensorrt_mtr/include/tensorrt_mtr/node.hpp index a23652ca57606..b095002e5d42a 100644 --- a/perception/tensorrt_mtr/include/tensorrt_mtr/node.hpp +++ b/perception/tensorrt_mtr/include/tensorrt_mtr/node.hpp @@ -88,13 +88,8 @@ class PolylineTypeMap } } - /** - * @brief Return the ID of the corresponding label type. - * If specified type is not contained in map, return -1. - * - * @param type - * @return int - */ + // Return the ID of the corresponding label type. If specified type is not contained in map, + // return `-1`. int getTypeID(const std::string & type) const { return label_map_.count(type) == 0 ? -1 : label_map_.at(type); @@ -109,83 +104,42 @@ class MTRNode : public rclcpp::Node public: explicit MTRNode(const rclcpp::NodeOptions & node_options); - // Object ID for ego vehicle + // Object ID of the ego vehicle const std::string EGO_ID{"EGO"}; private: - /** - * @brief Main callback being invoked when the tracked objects topic is subscribed. - * - * @param object_msg - */ + // Main callback being invoked when the tracked objects topic is subscribed. void callback(const TrackedObjects::ConstSharedPtr object_msg); - /** - * @brief Callback being invoked when the HD map topic is subscribed. - * - * @param map_msg - */ + // Callback being invoked when the HD map topic is subscribed. void onMap(const HADMapBin::ConstSharedPtr map_msg); - /** - * @brief Callback being invoked when the Ego's odometry topic is subscribed. - * - * @param ego_msg - */ + // Callback being invoked when the Ego's odometry topic is subscribed. void onEgo(const Odometry::ConstSharedPtr ego_msg); - /** - * @brief Converts lanelet2 to polylines. - * - * @return true - */ + // Convert Lanelet to `PolylineData`. bool convertLaneletToPolyline(); - /** - * @brief Remove ancient agent histories. - * - * @param current_time - * @param objects_msg - */ + // Remove ancient agent histories. void removeAncientAgentHistory( const float current_time, const TrackedObjects::ConstSharedPtr objects_msg); - /** - * @brief Appends new states to history. - * - * @param current_time - * @param objects_msg - */ + // Appends new states to history. void updateAgentHistory( const float current_time, const TrackedObjects::ConstSharedPtr objects_msg); + // Extract ego state stored in the buffer which has the nearest timestamp from current timestamp. AgentState extractNearestEgo(const float current_time) const; - /** - * @brief Extract target agents and return corresponding indices. - * - * NOTE: Extract targets in order of proximity, closest first. - * - * @param histories - * @return std::vector - */ + // Extract target agents and return corresponding indices. + // NOTE: Extract targets in order of proximity, closest first. std::vector extractTargetAgent(const std::vector & histories); - /** - * @brief Return the timestamps relative from the first element.Return the timestamps relative - * from the first element. - * - * @return std::vector - */ + // Return the timestamps relative from the first element.Return the timestamps relative from the + // first element. std::vector getRelativeTimestamps() const; - /** - * @brief Generate `PredictedObject` from `PredictedTrajectory`. - * - * @param object - * @param trajectory - * @return PredictedObject - */ + // Generate `PredictedObject` from `PredictedTrajectory`. PredictedObject generatePredictedObject( const TrackedObject & object, const PredictedTrajectory & trajectory); diff --git a/perception/tensorrt_mtr/include/tensorrt_mtr/polyline.hpp b/perception/tensorrt_mtr/include/tensorrt_mtr/polyline.hpp index c7695deb0bb3d..660db35fecfa3 100644 --- a/perception/tensorrt_mtr/include/tensorrt_mtr/polyline.hpp +++ b/perception/tensorrt_mtr/include/tensorrt_mtr/polyline.hpp @@ -32,9 +32,7 @@ enum PolylineLabel { LANE = 0, ROAD_LINE = 1, ROAD_EDGE = 2, CROSSWALK = 3 }; struct LanePoint { - /** - * @brief Construct a new instance filling all elements by `0.0f`. - */ + // Construct a new instance filling all elements by `0.0f`. LanePoint() : data_({0.0f}) {} /** @@ -55,34 +53,22 @@ struct LanePoint { } + // Construct a new instance filling all elements by `0.0f`. + static LanePoint empty() noexcept { return LanePoint(); } + + // Return the point state dimensions `D`. static size_t dim() { return PointStateDim; } - /** - * @brief Return the x position of the point. - * - * @return float The x position. - */ + // Return the x position of the point. float x() const { return x_; } - /** - * @brief Return the y position of the point. - * - * @return float The y position. - */ + // Return the y position of the point. float y() const { return y_; } - /** - * @brief Return the z position of the point. - * - * @return float The z position. - */ + // Return the z position of the point. float z() const { return z_; } - /** - * @brief Return the label of the point. - * - * @return float The label. - */ + // Return the label of the point. float label() const { return label_; } /** @@ -96,18 +82,7 @@ struct LanePoint return std::hypot(x_ - other.x(), y_ - other.y(), z_ - other.z()); } - /** - * @brief Construct a new instance filling all elements by `0.0f`. - * - * @return LanePoint - */ - static LanePoint empty() noexcept { return LanePoint(); } - - /** - * @brief Return the address pointer of data array. - * - * @return float* - */ + // Return the address pointer of data array. float * data_ptr() noexcept { return data_.data(); } private: @@ -165,27 +140,28 @@ struct PolylineData } } - static size_t state_dim() { return PointStateDim; } + // Return the number of polylines `K`. size_t num_polyline() const { return num_polyline_; } + + // Return the number of points contained in each polyline `P`. size_t num_point() const { return num_point_; } + // Return the number of point dimensions `D`. + static size_t state_dim() { return PointStateDim; } + + // Return the number of all elements `K*P*D`. size_t size() const { return num_polyline_ * num_point_ * state_dim(); } - /** - * @brief Return the data shape ordering in (L, P, D). - * - * @return std::tuple - */ + // Return the number of state dimensions of MTR input `D+2`. + size_t input_dim() const { return state_dim() + 2; } + + // Return the data shape ordering in `(K, P, D)`. std::tuple shape() const { return {num_polyline_, num_point_, state_dim()}; } - /** - * @brief Return the address pointer of data array. - * - * @return float* The pointer of data array. - */ + // Return the address pointer of data array. float * data_ptr() noexcept { return data_.data(); } private: diff --git a/perception/tensorrt_mtr/include/tensorrt_mtr/trajectory.hpp b/perception/tensorrt_mtr/include/tensorrt_mtr/trajectory.hpp index 08ee1a789293f..6bade709cbebc 100644 --- a/perception/tensorrt_mtr/include/tensorrt_mtr/trajectory.hpp +++ b/perception/tensorrt_mtr/include/tensorrt_mtr/trajectory.hpp @@ -35,11 +35,19 @@ struct PredictedState { } + // Return the predicted state dimensions `D`. static size_t dim() { return PredictedStateDim; } + // Return the predicted x position. float x() const { return x_; } + + // Return the predicted y position. float y() const { return y_; } + + // Return the predicted x velocity. float vx() const { return vx_; } + + // Return the predicted y velocity. float vy() const { return vy_; } private: @@ -47,7 +55,7 @@ struct PredictedState }; // struct PredictedState /** - * @brief A class to represent waypoints for a single mode. + * @brief A class to represent waypoints of a single mode. */ struct PredictedMode { @@ -61,15 +69,16 @@ struct PredictedMode } } + // Return the number of predicted future timestamps `T`. + size_t num_future() const { return num_future_; } + + // Return the number of predicted state dimensions `D`. static size_t state_dim() { return PredictedStateDim; } + + // Return the predicted score. float score() const { return score_; } - size_t num_future() const { return num_future_; } - /** - * @brief Get the waypoints. - * - * @return const std::vector& - */ + // Return the vector of waypoints. const std::vector & get_waypoints() const { return waypoints_; } private: @@ -83,6 +92,16 @@ struct PredictedMode */ struct PredictedTrajectory { + /** + * @brief Construct a new instance. + * + * @note Predicted trajectories are sorted with the smallest scores. + * + * @param scores Predicted cores for each target, in shape `[B*M]`. + * @param trajectories Predicted trajectories for each target. `[B*M*T*D]`. + * @param num_mode The number of predicted modes. + * @param num_future The number of predicted timestamps. + */ PredictedTrajectory( const float * scores, const float * trajectories, const size_t num_mode, const size_t num_future) @@ -94,26 +113,24 @@ struct PredictedTrajectory std::vector waypoints(start_ptr, start_ptr + num_future_ * state_dim()); modes_.emplace_back(score, waypoints.data(), num_future_); } - // sort by score sort_by_score(); } - static size_t state_dim() { return PredictedStateDim; } + // Return the number of predicted modes `M`. size_t num_mode() const { return num_mode_; } + + // Return the number of predicted future timestamps `T`. size_t num_future() const { return num_future_; } - /** - * @brief Return predicted modes. Modes are sorted in descending order based on their scores. - * - * @return const std::vector& - */ + // Return the number of predicted state dimensions `D`. + static size_t state_dim() { return PredictedStateDim; } + + // Return predicted modes. Modes are sorted in descending order based on their scores. const std::vector & get_modes() const { return modes_; } private: - /** - * @brief Sort modes in descending order based on their scores. - */ + // Sort modes in descending order based on their scores. void sort_by_score() { std::sort(modes_.begin(), modes_.end(), [](const auto & mode1, const auto & mode2) { diff --git a/perception/tensorrt_mtr/include/tensorrt_mtr/trt_mtr.hpp b/perception/tensorrt_mtr/include/tensorrt_mtr/trt_mtr.hpp index 3a27cf6e48c56..5438adf0fb21c 100644 --- a/perception/tensorrt_mtr/include/tensorrt_mtr/trt_mtr.hpp +++ b/perception/tensorrt_mtr/include/tensorrt_mtr/trt_mtr.hpp @@ -42,18 +42,19 @@ struct MTRConfig * @brief Construct a new instance. * * @param target_labels An array of target label names. + * @param num_past The number of past timestamps. * @param num_mode The number of modes. * @param num_future The number of future time step length predicted by MTR. * @param max_num_polyline The max number of polylines which can be contained in a single input. - * @param offset_xy The offset value used in input pre-process. + * @param max_num_point The max number of points contained in each polyline. * @param intention_point_filepath The path to intention points file. * @param num_intention_point_cluster The number of clusters for intension points. */ MTRConfig( const std::vector & target_labels = {"VEHICLE", "PEDESTRIAN", "CYCLIST"}, - const size_t num_past = 10, const size_t num_mode = 6, const size_t num_future = 80, + const size_t num_past = 11, const size_t num_mode = 6, const size_t num_future = 80, const size_t max_num_polyline = 768, const size_t max_num_point = 20, - const float point_break_distance = 1.0f, const std::array & offset_xy = {30.0f, 0.0f}, + const float point_break_distance = 1.0f, const std::string & intention_point_filepath = "./data/waymo64.csv", const size_t num_intention_point_cluster = 64) : target_labels(target_labels), @@ -64,7 +65,6 @@ struct MTRConfig max_num_polyline(max_num_polyline), max_num_point(max_num_point), point_break_distance(point_break_distance), - offset_xy(offset_xy), intention_point_filepath(intention_point_filepath), num_intention_point_cluster(num_intention_point_cluster) { @@ -78,7 +78,6 @@ struct MTRConfig size_t max_num_polyline; size_t max_num_point; float point_break_distance; - std::array offset_xy; std::string intention_point_filepath; size_t num_intention_point_cluster; }; // struct MTRConfig @@ -159,6 +158,15 @@ class TrtMTR IntentionPoint intention_point_; + // data size + // load from input data + size_t num_target_, num_agent_, time_length_; + size_t agent_input_dim_; + size_t num_polyline_, num_point_; + size_t polyline_input_dim_; + // load from config + size_t max_num_polyline_, num_mode_, num_future_; + // source data cuda::unique_ptr d_target_index_{nullptr}; cuda::unique_ptr d_label_index_{nullptr}; @@ -167,7 +175,6 @@ class TrtMTR cuda::unique_ptr d_target_state_{nullptr}; cuda::unique_ptr d_intention_points_{nullptr}; cuda::unique_ptr d_polyline_{nullptr}; - cuda::unique_ptr d_topk_index_{nullptr}; // preprocessed inputs cuda::unique_ptr d_in_trajectory_{nullptr}; @@ -176,6 +183,10 @@ class TrtMTR cuda::unique_ptr d_in_polyline_{nullptr}; cuda::unique_ptr d_in_polyline_mask_{nullptr}; cuda::unique_ptr d_in_polyline_center_{nullptr}; + // only used for topk extraction + cuda::unique_ptr d_tmp_polyline_{nullptr}; + cuda::unique_ptr d_tmp_polyline_mask_{nullptr}; + cuda::unique_ptr d_tmp_distance_{nullptr}; // outputs cuda::unique_ptr d_out_score_{nullptr}; diff --git a/perception/tensorrt_mtr/lib/include/postprocess/postprocess_kernel.cuh b/perception/tensorrt_mtr/lib/include/postprocess/postprocess_kernel.cuh index 2b3014d1fb39c..949eacc6abeca 100644 --- a/perception/tensorrt_mtr/lib/include/postprocess/postprocess_kernel.cuh +++ b/perception/tensorrt_mtr/lib/include/postprocess/postprocess_kernel.cuh @@ -18,21 +18,20 @@ #include /** - * @brief A kernel to transform predicted trajectory from each target coords system to world coords - * system. + * @brief Transform predicted trajectory from target agent coords to world coords. * * @param B The number of target agents. * @param M The number of modes. * @param T The number of future timestamps. - * @param inDim The number of input agent state dimensions. - * @param targetState Source target agent states at latest timestamp, in shape [B*inDim]. - * @param outDim The number of output state dimensions. - * @param trajectory Output predicted trajectory, in shape [B*M*T*outDim]. - * @return __global__ + * @param AgentDim The number of target agent state dimensions, expecting (x, y, z, length, width, + * height, yaw, vx, vy, ax, ay). + * @param targetState Source target agent state, in shape [B*AgentDim]. + * @param PredDim The number of predicted state dimension, expecting (x, y, ?, ?, ?, vx, vy). + * @param predTrajectory Predicted trajectory, in shape [B*M*T*PredDim]. */ __global__ void transformTrajectoryKernel( - const int B, const int M, const int T, const int inDim, const float * targetState, - const int outDim, float * trajectory); + const int B, const int M, const int T, const int AgentDim, const float * targetState, + const int PredDim, float * predTrajectory); /** * @brief Execute postprocess to predicted score and trajectory. @@ -40,16 +39,16 @@ __global__ void transformTrajectoryKernel( * @param B The number of target agents. * @param M The number of modes. * @param T The number of future timestamps. - * @param inDim The number of input agent state dimensions. - * @param target_state Target agent states at the latest timestamp, in shape [B * inDim]. - * @param outDim The number predicted agent state dimensions - * @param pred_scores Predicted scores, in shape [B*M]. - * @param pred_trajectory Predicted trajectories, in shape [B*M*T*D]. + * @param AgentDim The number of target agent state dimensions, expecting (x, y, z, length, width, + * height, yaw, vx, vy, ax, ay). + * @param targetState Target agent states at the latest timestamp, in shape [B*inDim]. + * @param PredDim The number predicted state dimensions, expecting (x, y, ?, ?, ?, vx, vy). + * @param predTrajectory Predicted trajectory, in shape [B*M*T*PredDim]. * @param stream CUDA stream. - * @return cudaError_t + * @return cudaError_t CUDA error type. */ cudaError_t postprocessLauncher( - const int B, const int M, const int T, const int inDim, const float * target_state, - const int outDim, float * pred_score, float * pred_trajectory, cudaStream_t stream); + const int B, const int M, const int T, const int AgentDim, const float * targetState, + const int PredDim, float * predTrajectory, cudaStream_t stream); #endif // POSTPROCESS__POSTPROCESS_KERNEL_CUH__ diff --git a/perception/tensorrt_mtr/lib/include/preprocess/polyline_preprocess_kernel.cuh b/perception/tensorrt_mtr/lib/include/preprocess/polyline_preprocess_kernel.cuh index d655f50820282..0ffec0e01daf6 100644 --- a/perception/tensorrt_mtr/lib/include/preprocess/polyline_preprocess_kernel.cuh +++ b/perception/tensorrt_mtr/lib/include/preprocess/polyline_preprocess_kernel.cuh @@ -16,96 +16,125 @@ #define PREPROCESS__POLYLINE_PREPROCESS_KERNEL_CUH_ /** - * @brief Transform to target agent coordinates system. + * @brief Transform polylines to target agent coordinate system and extend its feature with previous + * x, y`(D->D+2)`. + * + * Points which all elements are 0.0 except of typeID are filled by 0.0 and the corresponding + * mask value is 0.0 too. * * @param K The number of polylines. * @param P The number of points contained in each polyline. - * @param PointDim The number of point state dimensions. - * @param in_polyline Source polylines, in shape [K*P*PointDim]. + * @param PointDim The number of point dimensions, expecting (x, y, z, dx, dy, dz, typeID). + * @param inPolyline Source polyline, in shape [K*P*D]. * @param B The number of target agents. - * @param AgentDim The number of agent state dimensions. - * @param target_state Source target state at the latest timestamp, in shape [B*AgentDim]. - * @param out_polyline Output polylines, in shape [B*K*P*(PointDim+2)]. - * @param out_polyline_mask Output polyline mask, in shape [B*K*P]. + * @param AgentDim The number of agent state dimensions, expecting (x, y, z, length, width, height, + * yaw, vx, vy, ax, ay). + * @param targetState Source target agent states, in shape [B*AgentDim]. + * @param outPolyline Output polyline, in shape [B*K*P*(PointDim+2)]. + * @param outPolylineMask Output polyline mask, in shape [B*K*P]. */ __global__ void transformPolylineKernel( - const int K, const int P, const int PointDim, const float * in_polyline, const int B, - const int AgentDim, const float * target_state, float * out_polyline, bool * out_polyline_mask); + const int K, const int P, const int PointDim, const float * inPolyline, const int B, + const int AgentDim, const float * targetState, float * outPolyline, bool * outPolylineMask); /** - * @brief Set the previous xy position at the end of element. + * @brief Set the Previous Position Kernel object * * @param B The number of target agents. * @param K The number of polylines. * @param P The number of points contained in each polyline. - * @param D The number of point dimensions. - * @param mask The polyline mask, in shape [B*K*P]. - * @param polyline The container of polylines, in shape [B*K*P*D] + * @param D The number of point dimensions, expecting (x, y, ..., preX, preY). + * @param polyline Source polyline, in shape [B*K*P*D]. */ __global__ void setPreviousPositionKernel( - const int B, const int K, const int P, const int D, const bool * mask, float * polyline); + const int B, const int K, const int P, const int D, float * polyline); /** - * @brief Extract TopK elements. + * @brief Calculate center distance from target agent to each polyline. + * + * @note Polyline must have been transformed to target agent coordinates system. * - * @param K The number of K. - * @param L The number of source polylines. + * @param B The number of target agents. + * @param K The number of polylines. * @param P The number of points contained in each polyline. + * @param D The number of point dimensions, expecting [x, y, ...]. + * @param polyline Source polyline, in shape [B*K*P*D]. + * @param polylineMask Source polyline mask, in shape [B*K*P]. + * @param distance Output calculated distances, in shape [B*K]. + */ +__global__ void calculateCenterDistanceKernel( + const int B, const int K, const int P, const int D, const float * polyline, + const bool * polylineMask, float * distance); + +/** + * @brief Extract K polylines with the smallest distances. + * + * @note Because this kernel supposes to allocate shared memory dynamically it is necessary to + * specify `sizeof(float) * L` in the kernel execution configuration. + * + * @param K The number of polylines to be extracted. * @param B The number of target agents. - * @param offsetX X offset position. - * @param offsetY Y offset position. - * @param AgentDim The number of agent state dimensions. - * @param targetState Source state of target agents, in shape [B*AgentDim]. - * @param PointDim The number of point state dimensions. - * @param inPolyline Source polylines, in shape [L*P*PointDim]. - * @param outPolyline Output polylines, in shape [K*P*PointDim]. + * @param L The number of original polylines. + * @param P The number of points contained in each polyline. + * @param D The number of point dimensions. + * @param inPolyline Source polyline, in shape [B*L*P*D]. + * @param inPolylineMask Source polyline mask, in shape [B*L*P]. + * @param inDistance Source distances from target agents to the centers of each polyline, in shape + * [B*L]. + * @param outPolyline Output polyline, in shape [B*K*P*D]. + * @param outPolylineMask Output polyline mask, in shape [B*K*P]. */ -__global__ void extractTopkKernel( - const int K, const int L, const int P, const int B, const float offsetX, const float offsetY, - const int AgentDim, const float * targetState, const int PointDim, const float * inPolyline, - float * outPolyline); +__global__ void extractTopKPolylineKernel( + const int K, const int B, const int L, const int P, const int D, const float * inPolyline, + const bool * inPolylineMask, const float * inDistance, float * outPolyline, + bool * outPolylineMask); /** - * @brief Calculate the magnitudes of polylines. + * @brief Calculate center positions of each polyline with respect to target agent coordinates + * system. + * + * @note Polyline must have been transformed to target agent coordinates system. * * @param B The number of target agents. * @param K The number of polylines. * @param P The number of points contained in each polyline. - * @param D The number of point dimensions. - * @param polyline Source polylines, in shape [B*K*P*(D + 2)]. - * @param mask Source polyline masks, in shape [B*K*P]. - * @param center Output magnitudes of polylines, in shape [B*K*3]. + * @param D The number of point dimensions, expecting (x, y, z, ...). + * @param polyline Source polyline, in shape [B*K*P*D]. + * @param polylineMask Source polyline mask, in shape [B*K*P]. + * @param center Output centers, in shape [B*K*3]. */ __global__ void calculatePolylineCenterKernel( - const int B, const int K, const int P, const int PointDim, const float * polyline, - const bool * mask, float * center); + const int B, const int K, const int P, const int D, const float * polyline, + const bool * polylineMask, float * center); /** * @brief In cases of the number of batch polylines (L) is greater than K, * extracts the topK elements. * - * @param L The number of source polylines. - * @param K The number of polylines expected as the model input. + * @param K The number of polylines to be extracted. + * @param L The number of original polylines. * @param P The number of points contained in each polyline. * @param PointDim The number of point state dimensions. - * @param AgentDim The number of agent state dimensions. - * @param in_polyline Source polylines, in shape [L*P*PointDim]. + * @param inPolyline Source polylines, in shape [L*P*PointDim]. * @param B The number of target agents. - * @param target_state Target agent state at the latest timestamp, in shape [B, AgentDim]. - * @param offset_x The x offset. - * @param offset_y The y offset. - * @param topk_index A container to store topK indices, in shape [K]. - * @param out_polyline Output polylines, in shape [B*K*P*(PointDim+2)]. - * @param out_polyline_mask Output polyline masks, in shape [B*K*P]. - * @param out_polyline_center Output magnitudes of each polyline with respect to target coords, + * @param AgentDim The number of agent state dimensions. + * @param targetState Target agent state at the latest timestamp, in shape [B*AgentDim]. + * @param tmpPolyline A container to store transformed polyline temporary, in shape + * [B*L*P*(PointDim+2)]. + * @param tmpPolylineMask A container to store transformed polyline mask temporary, in shape + * [B*L*P]. + * @param tmpDistance A container to store distances temporary, in shape [B*L]. + * @param outPolyline Output polylines, in shape [B*K*P*(PointDim+2)]. + * @param outPolylineMask Output polyline masks, in shape [B*K*P]. + * @param outPolylineCenter Output magnitudes of each polyline with respect to target coords, * in shape [B*K*3]. * @param stream CUDA stream. * @return cudaError_t */ cudaError_t polylinePreprocessWithTopkLauncher( - const int L, const int K, const int P, const int PointDim, const float * in_polyline, const int B, - const int AgentDim, const float * target_state, const float offsetX, const float offsetY, - int * topk_index, float * out_polyline, bool * out_polyline_mask, float * out_polyline_center, + const int K, const int L, const int P, const int PointDim, const float * inPolyline, const int B, + const int AgentDim, const float * targetState, float * tmpPolyline, bool * tmpPolylineMask, + float * tmpDistance, float * outPolyline, bool * outPolylineMask, float * outPolylineCenter, cudaStream_t stream); /** @@ -114,20 +143,20 @@ cudaError_t polylinePreprocessWithTopkLauncher( * @param K The number of polylines. * @param P The number of points contained in each polyline. * @param PointDim The number of point state dimensions. - * @param in_polyline Source polylines, in shape [K*P*PointDim]. + * @param inPolyline Source polylines, in shape [K*P*PointDim]. * @param B The number of target agents. * @param AgentDim The number of agent state dimensions. - * @param target_state Target agent state at the latest timestamp, in shape [B, AgentDim]. - * @param out_polyline Output polylines, in shape [B*K*P*(PointDim + 2)]. - * @param out_polyline_mask Output polyline masks, in shape [B*K*P]. - * @param out_polyline_center Output magnitudes of each polyline with respect to target coords, + * @param targetState Target agent state at the latest timestamp, in shape [B*AgentDim]. + * @param outPolyline Output polylines, in shape [B*K*P*(PointDim+2)]. + * @param outPolylineMask Output polyline masks, in shape [B*K*P]. + * @param outPolylineCenter Output magnitudes of each polyline with respect to target coords, * in shape [B*K*3]. * @param stream CUDA stream. * @return cudaError_t */ cudaError_t polylinePreprocessLauncher( - const int K, const int P, const int PointDim, const float * in_polyline, const int B, - const int AgentDim, const float * target_state, float * out_polyline, bool * out_polyline_mask, - float * out_polyline_center, cudaStream_t stream); + const int K, const int P, const int PointDim, const float * inPolyline, const int B, + const int AgentDim, const float * targetState, float * outPolyline, bool * outPolylineMask, + float * outPolylineCenter, cudaStream_t stream); #endif // PREPROCESS__POLYLINE_PREPROCESS_KERNEL_CUH_ diff --git a/perception/tensorrt_mtr/lib/src/postprocess/postprocess_kernel.cu b/perception/tensorrt_mtr/lib/src/postprocess/postprocess_kernel.cu index 2149fb8aac41f..381ef00343764 100644 --- a/perception/tensorrt_mtr/lib/src/postprocess/postprocess_kernel.cu +++ b/perception/tensorrt_mtr/lib/src/postprocess/postprocess_kernel.cu @@ -12,11 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "postprocess/postprocess_kernel.cuh" - __global__ void transformTrajectoryKernel( - const int B, const int M, const int T, const int inDim, const float * targetState, - const int outDim, float * trajectory) + const int B, const int M, const int T, const int AgentDim, const float * targetState, + const int PredDim, float * predTrajectory) { int b = blockIdx.x * blockDim.x + threadIdx.x; int m = blockIdx.y * blockDim.y + threadIdx.y; @@ -26,31 +24,31 @@ __global__ void transformTrajectoryKernel( return; } - const int pred_idx = (b * M * T + m * T + t) * outDim; - const float pred_x = trajectory[pred_idx]; - const float pred_y = trajectory[pred_idx + 1]; + const int predIdx = (b * M * T + m * T + t) * PredDim; + const float predX = predTrajectory[predIdx]; + const float predY = predTrajectory[predIdx + 1]; - const int target_idx = b * inDim; - const float target_x = targetState[target_idx]; - const float target_y = targetState[target_idx + 1]; - const float target_yaw = targetState[target_idx + 6]; - const float target_cos = cos(target_yaw); - const float target_sin = sin(target_yaw); + const int targetIdx = b * AgentDim; + const float targetX = targetState[targetIdx]; + const float targetY = targetState[targetIdx + 1]; + const float targetYaw = targetState[targetIdx + 6]; + const float targetCos = cosf(targetYaw); + const float targetSin = sinf(targetYaw); - trajectory[pred_idx] = target_cos * pred_x + target_sin * pred_y + target_x; - trajectory[pred_idx + 1] = -target_sin * pred_x + target_cos * pred_y + target_y; + predTrajectory[predIdx] = targetCos * predX + targetSin * predY + targetX; + predTrajectory[predIdx + 1] = -targetSin * predX + targetCos * predY + targetY; } cudaError_t postprocessLauncher( - const int B, const int M, const int T, const int inDim, const float * target_state, - const int outDim, float * pred_score, float * pred_trajectory, cudaStream_t stream) + const int B, const int M, const int T, const int AgentDim, const float * targetState, + const int PredDim, float * predTrajectory, cudaStream_t stream) { // TODO: update the number of blocks and threads to guard from `cudaErrorIllegalAccess` constexpr int threadsPerBlock = 256; dim3 blocks(B, M, T); transformTrajectoryKernel<<>>( - B, M, T, inDim, target_state, outDim, pred_trajectory); + B, M, T, AgentDim, targetState, PredDim, predTrajectory); return cudaGetLastError(); } diff --git a/perception/tensorrt_mtr/lib/src/preprocess/polyline_preprocess_kernel.cu b/perception/tensorrt_mtr/lib/src/preprocess/polyline_preprocess_kernel.cu index 227f57e300cbe..f04985d394118 100644 --- a/perception/tensorrt_mtr/lib/src/preprocess/polyline_preprocess_kernel.cu +++ b/perception/tensorrt_mtr/lib/src/preprocess/polyline_preprocess_kernel.cu @@ -14,11 +14,13 @@ #include "preprocess/polyline_preprocess_kernel.cuh" +#include + #include __global__ void transformPolylineKernel( - const int K, const int P, const int PointDim, const float * in_polyline, const int B, - const int AgentDim, const float * target_state, float * out_polyline, bool * out_polyline_mask) + const int K, const int P, const int PointDim, const float * inPolyline, const int B, + const int AgentDim, const float * targetState, float * outPolyline, bool * outPolylineMask) { int b = blockIdx.x * blockDim.x + threadIdx.x; int k = blockIdx.y * blockDim.y + threadIdx.y; @@ -28,50 +30,59 @@ __global__ void transformPolylineKernel( return; } - const int src_polyline_idx = (k * P + p) * PointDim; - const float x = in_polyline[src_polyline_idx]; - const float y = in_polyline[src_polyline_idx + 1]; - const float z = in_polyline[src_polyline_idx + 2]; - const float dx = in_polyline[src_polyline_idx + 3]; - const float dy = in_polyline[src_polyline_idx + 4]; - const float dz = in_polyline[src_polyline_idx + 5]; - const float type_id = in_polyline[src_polyline_idx + 6]; - - const int center_idx = b * AgentDim; - const float center_x = target_state[center_idx]; - const float center_y = target_state[center_idx + 1]; - const float center_z = target_state[center_idx + 2]; - const float center_yaw = target_state[center_idx + 6]; - const float center_cos = cos(center_yaw); - const float center_sin = sin(center_yaw); - - // do transform - const float trans_x = center_cos * (x - center_x) - center_sin * (y - center_y); - const float trans_y = center_sin * (x - center_x) + center_cos * (y - center_y); - const float trans_z = z - center_z; - const float trans_dx = center_cos * dx - center_sin * dy; - const float trans_dy = center_sin * dx + center_cos * dy; - const float trans_dz = dz; - - const int out_idx = (b * K * P + k * P + p) * (PointDim + 2); - out_polyline[out_idx] = trans_x; - out_polyline[out_idx + 1] = trans_y; - out_polyline[out_idx + 2] = trans_z; - out_polyline[out_idx + 3] = trans_dx; - out_polyline[out_idx + 4] = trans_dy; - out_polyline[out_idx + 5] = trans_dz; - out_polyline[out_idx + 6] = type_id; - - const int out_mask_idx = b * K * P + k * P + p; - bool is_valid = false; - for (size_t i = 0; i < 6; ++i) { - is_valid += out_polyline[out_idx + i] != 0.0f; + const int inIdx = (k * P + p) * PointDim; + const int outIdx = b * K * P + k * P + p; + bool isValid = false; + for (int d = 0; d < PointDim - 1; ++d) { + if (inPolyline[inIdx + d] != 0.0f) { + isValid = true; + } + } + outPolylineMask[outIdx] = isValid; + + // initialize output polyline with 0.0 + for (int d = 0; d < PointDim + 2; ++d) { + outPolyline[outIdx * (PointDim + 2) + d] = 0.0f; + } + + // set transformed values if valid, otherwise all 0.0. + if (isValid) { + const float x = inPolyline[inIdx]; + const float y = inPolyline[inIdx + 1]; + const float z = inPolyline[inIdx + 2]; + const float dx = inPolyline[inIdx + 3]; + const float dy = inPolyline[inIdx + 4]; + const float dz = inPolyline[inIdx + 5]; + const float typeID = inPolyline[inIdx + 6]; + + const int centerIdx = b * AgentDim; + const float centerX = targetState[centerIdx]; + const float centerY = targetState[centerIdx + 1]; + const float centerZ = targetState[centerIdx + 2]; + const float centerYaw = targetState[centerIdx + 6]; + const float centerCos = cosf(centerYaw); + const float centerSin = sinf(centerYaw); + + // do transform + const float transX = centerCos * (x - centerX) - centerSin * (y - centerY); + const float transY = centerSin * (x - centerX) + centerCos * (y - centerY); + const float transZ = z - centerZ; + const float transDx = centerCos * dx - centerSin * dy; + const float transDy = centerSin * dx + centerCos * dy; + const float transDz = dz; + + outPolyline[outIdx * (PointDim + 2)] = transX; + outPolyline[outIdx * (PointDim + 2) + 1] = transY; + outPolyline[outIdx * (PointDim + 2) + 2] = transZ; + outPolyline[outIdx * (PointDim + 2) + 3] = transDx; + outPolyline[outIdx * (PointDim + 2) + 4] = transDy; + outPolyline[outIdx * (PointDim + 2) + 5] = transDz; + outPolyline[outIdx * (PointDim + 2) + 6] = typeID; } - out_polyline_mask[out_mask_idx] = is_valid; } __global__ void setPreviousPositionKernel( - const int B, const int K, const int P, const int D, const bool * mask, float * polyline) + const int B, const int K, const int P, const int D, float * polyline) { int b = blockIdx.x * blockDim.x + threadIdx.x; int k = blockIdx.y * blockDim.y + threadIdx.y; @@ -81,56 +92,92 @@ __global__ void setPreviousPositionKernel( return; } - const int cur_idx = (b * K * P + k * P + p) * D; - const int pre_idx = k == 0 ? cur_idx : (b * K * P + (k - 1) * P + p) * D; + const int curIdx = (b * K * P + k * P + p) * D; + const int preIdx = p == 0 ? curIdx : (b * K * P + k * P + p - 1) * D; - polyline[cur_idx + D - 2] = polyline[pre_idx]; - polyline[cur_idx + D - 1] = polyline[pre_idx + 1]; + polyline[curIdx + D - 2] = polyline[preIdx]; // x + polyline[curIdx + D - 1] = polyline[preIdx + 1]; // y +} - const int mask_idx = b * K * P + k * P + p; - if (!mask[mask_idx]) { - for (int d = 0; d < D; ++d) { - polyline[cur_idx + d] = 0.0f; +__global__ void calculateCenterDistanceKernel( + const int B, const int K, const int P, const int D, const float * polyline, + const bool * polylineMask, float * distance) +{ + int b = blockIdx.x * blockDim.x + threadIdx.x; + int k = blockIdx.y * blockDim.y + threadIdx.y; + if (b >= B || k >= K) { + return; + } + + // calculate polyline center + float sumX = 0.0f, sumY = 0.0f; + int numValid = 0; + for (int p = 0; p < P; ++p) { + int idx = b * K * P + k * P + p; + if (polylineMask[idx]) { + sumX += polyline[idx * D]; + sumY += polyline[idx * D + 1]; + ++numValid; } } + float centerX = sumX / fmaxf(1.0f, numValid); + float centerY = sumY / fmaxf(1.0f, numValid); + + distance[b * K + k] = hypot(centerX, centerY); } -__global__ void extractTopkKernel( - const int K, const int L, const int P, const int B, const float offsetX, const float offsetY, - const int AgentDim, const float * targetState, const int PointDim, const float * inPolyline, - float * outPolyline) +__global__ void extractTopKPolylineKernel( + const int K, const int B, const int L, const int P, const int D, const float * inPolyline, + const bool * inPolylineMask, const float * inDistance, float * outPolyline, + bool * outPolylineMask) { - // --- pseudo code --- - // mask = All(polyline != 0.0, dim=2) - // polylineCenter = polyline[:, :, 0:2].sum(dim=1) / clampMin(mask.sum(dim=1), min=1.0) - // offset = rotateAlongZ((offset_x, offset_y), target_state[:, 6]) - // targetOffsetPos = target_state[:, 0:2] + offset - // distances = (target_offset_pos - center) - // _, topkIdxs = distances.topk(k=K, descending=True) - // outPolyline = inPolyline[topkIdxs] - // ------------------- - - // int targetIdx = blockIdx.x; - - // const float targetX = targetState[targetIdx]; - // const float targetY = targetState[targetIdx + 1]; - // const float targetYaw = targetState[targetIdx + 6]; - // const float targetCos = cos(targetYaw); - // const float targetSin = sin(targetYaw); - - // const float transTargetX = targetCos * offsetX + targetSin * offsetY + targetX; - // const float transTargetY = -targetSin * offsetX + targetCos * offsetY + targetY; + int b = blockIdx.x; // Batch index + int tid = threadIdx.x; // Polyline index + int p = blockIdx.y * blockDim.y + threadIdx.y; // Point index + int d = blockIdx.z * blockDim.z + threadIdx.z; // Dim index + if (b >= B || tid >= L || p >= P || d >= D) { + return; + } + extern __shared__ float distances[]; + + // Load distances into shared memory + if (tid < L) { + distances[tid] = inDistance[b * L + tid]; + } + __syncthreads(); + + // Simple selection of the smallest K distances + // (this part should be replaced with a more efficient sorting/selecting algorithm) + for (int k = 0; k < K; k++) { + float minDistance = FLT_MAX; + int minIndex = -1; + + for (int l = 0; l < L; l++) { + if (distances[l] < minDistance) { + minDistance = distances[l]; + minIndex = l; + } + } + __syncthreads(); + + if (minIndex == -1) { + continue; + } + + if (tid == k) { // this thread will handle copying the k-th smallest polyline + int inIdx = b * L * P + minIndex * P + p; + int outIdx = b * K * P + k * P + p; + outPolyline[outIdx * D + d] = inPolyline[inIdx * D + d]; + outPolylineMask[outIdx] = inPolylineMask[inIdx]; + } + distances[minIndex] = FLT_MAX; // exclude this index from future consideration + } } __global__ void calculatePolylineCenterKernel( - const int B, const int K, const int P, const int PointDim, const float * polyline, - const bool * mask, float * center) + const int B, const int K, const int P, const int D, const float * polyline, + const bool * polylineMask, float * center) { - // --- pseudo code --- - // sum = (polylines[:, :, :, 0:3] * mask[:, :, :, None]).sum(dim=2) - // center = sum / clampMIN(mask.sum(dim=2), min=1.0) - // ------------------- - int b = blockIdx.x * blockDim.x + threadIdx.x; int k = blockIdx.y * blockDim.y + threadIdx.y; @@ -139,33 +186,33 @@ __global__ void calculatePolylineCenterKernel( } // initialize with 0.0 - int center_idx = (b * K + k) * 3; + int centerIdx = (b * K + k) * 3; for (int d = 0; d < 3; ++d) { - center[center_idx + d] = 0.0f; + center[centerIdx + d] = 0.0f; } - float sum_xyz[3] = {0.0f, 0.0f, 0.0f}; - int count = 0; + // calculate polyline center + float sumX = 0.0f, sumY = 0.0f, sumZ = 0.0f; + int numValid = 0; for (int p = 0; p < P; ++p) { - int src_idx = b * K * P + k * P + p; - if (mask[src_idx]) { - for (int d = 0; d < 3; ++d) { - sum_xyz[d] += polyline[src_idx * PointDim + d]; - } - ++count; + int idx = b * K * P + k * P + p; + if (polylineMask[idx]) { + sumX += polyline[idx * D]; + sumY += polyline[idx * D + 1]; + sumZ += polyline[idx * D + 2]; + ++numValid; } } - count = max(count, 1); - for (int d = 0; d < 3; ++d) { - center[center_idx + d] = sum_xyz[d] / static_cast(count); - } + center[centerIdx] = sumX / fmaxf(1.0f, numValid); + center[centerIdx + 1] = sumY / fmaxf(1.0f, numValid); + center[centerIdx + 2] = sumZ / fmaxf(1.0f, numValid); } cudaError_t polylinePreprocessWithTopkLauncher( - const int L, const int K, const int P, const int PointDim, const float * in_polyline, const int B, - const int AgentDim, const float * target_state, const float offset_x, const float offset_y, - int * topk_index, float * out_polyline, bool * out_polyline_mask, float * out_polyline_center, + const int K, const int L, const int P, const int PointDim, const float * inPolyline, const int B, + const int AgentDim, const float * targetState, float * tmpPolyline, bool * tmpPolylineMask, + float * tmpDistance, float * outPolyline, bool * outPolylineMask, float * outPolylineCenter, cudaStream_t stream) { if (L < K) { @@ -173,27 +220,54 @@ cudaError_t polylinePreprocessWithTopkLauncher( return cudaError_t::cudaErrorInvalidValue; } + const int outPointDim = PointDim + 2; + + // TODO: update the number of blocks and threads to guard from `cudaErrorIllegalAccess` + constexpr int threadsPerBlock = 256; + const dim3 blocks1(B, L, P); + transformPolylineKernel<<>>( + L, P, PointDim, inPolyline, B, AgentDim, targetState, tmpPolyline, tmpPolylineMask); + + const dim3 blocks2(B, L); + calculateCenterDistanceKernel<<>>( + B, L, P, outPointDim, tmpPolyline, tmpPolylineMask, tmpDistance); + + const dim3 blocks3(B, P, outPointDim); + const size_t sharedMemSize = sizeof(float) * L; + extractTopKPolylineKernel<<>>( + K, B, L, P, outPointDim, tmpPolyline, tmpPolylineMask, tmpDistance, outPolyline, + outPolylineMask); + + const dim3 blocks4(B, K, P); + setPreviousPositionKernel<<>>( + B, K, P, outPointDim, outPolyline); + + const dim3 blocks5(B, K); + calculatePolylineCenterKernel<<>>( + B, K, P, outPointDim, outPolyline, outPolylineMask, outPolylineCenter); + return cudaGetLastError(); } cudaError_t polylinePreprocessLauncher( - const int K, const int P, const int PointDim, const float * in_polyline, const int B, - const int AgentDim, const float * target_state, float * out_polyline, bool * out_polyline_mask, - float * out_polyline_center, cudaStream_t stream) + const int K, const int P, const int PointDim, const float * inPolyline, const int B, + const int AgentDim, const float * targetState, float * outPolyline, bool * outPolylineMask, + float * outPolylineCenter, cudaStream_t stream) { + const int outPointDim = PointDim + 2; + // TODO: update the number of blocks and threads to guard from `cudaErrorIllegalAccess` constexpr int threadsPerBlock = 256; - const dim3 block3d(B, K / threadsPerBlock, P); - + const dim3 block3d(B, K, P); transformPolylineKernel<<>>( - K, P, PointDim, in_polyline, B, AgentDim, target_state, out_polyline, out_polyline_mask); + K, P, PointDim, inPolyline, B, AgentDim, targetState, outPolyline, outPolylineMask); setPreviousPositionKernel<<>>( - B, K, P, PointDim, out_polyline_mask, out_polyline); + B, K, P, outPointDim, outPolyline); - const dim3 block2d(B, K / threadsPerBlock); + const dim3 block2d(B, K); calculatePolylineCenterKernel<<>>( - B, K, P, PointDim, out_polyline, out_polyline_mask, out_polyline_center); + B, K, P, outPointDim, outPolyline, outPolylineMask, outPolylineCenter); return cudaGetLastError(); } diff --git a/perception/tensorrt_mtr/src/node.cpp b/perception/tensorrt_mtr/src/node.cpp index 5f59108f236d8..89e5b768e8521 100644 --- a/perception/tensorrt_mtr/src/node.cpp +++ b/perception/tensorrt_mtr/src/node.cpp @@ -28,12 +28,8 @@ namespace trt_mtr { namespace { -/** - * @brief Get the Lanelet subtype name. - * - * @param lanelet - * @return std::string - */ +// Return the Lanelet subtype name. If input Lanelet has no attribute named `type` return empty +// string `""`. std::string getLaneletSubtype(const lanelet::Lanelet & lanelet) { if (!lanelet.hasAttribute("subtype")) { @@ -44,12 +40,8 @@ std::string getLaneletSubtype(const lanelet::Lanelet & lanelet) } } -/** - * @brief Get the LineString type name. - * - * @param linestring - * @return std::string - */ +// Return the LineString type name. If input LineString has no attribute named `type` return empty +// string `""`. std::string getLineStringType(const lanelet::ConstLineString3d & linestring) { if (!linestring.hasAttribute("type")) { @@ -60,12 +52,8 @@ std::string getLineStringType(const lanelet::ConstLineString3d & linestring) } } -/** - * @brief Get the LineString subtype name. - * - * @param linestring - * @return std::string - */ +// Return the LineString subtype name. If input LineString has no attribute named `subtype` return +// empty string `""`. std::string getLineStringSubtype(const lanelet::ConstLineString3d & linestring) { if (!linestring.hasAttribute("subtype")) { @@ -76,13 +64,7 @@ std::string getLineStringSubtype(const lanelet::ConstLineString3d & linestring) } } -/** - * @brief Check whether lanelet has an attribute named `turn_direction`. - * - * @param lanelet - * @return true - * @return false - */ +// Check whether lanelet has an attribute named `turn_direction`. bool isTurnIntersection(const lanelet::Lanelet & lanelet) { if (lanelet.hasAttribute("turn_direction")) { @@ -92,25 +74,14 @@ bool isTurnIntersection(const lanelet::Lanelet & lanelet) } } -/** - * @brief Insert source LanePoints into target LanePoints. - * - * @param src Source LanePoints. - * @param dst Target LanePoints. - */ +// Insert source `LanePoints` into target `LanePoints`. void insertLanePoints(const std::vector & src, std::vector & dst) { dst.reserve(dst.size() * 2); dst.insert(dst.end(), src.cbegin(), src.cend()); } -/** - * @brief Convert TrackedObject to AgentState. - * - * @param object - * @param is_valid - * @return AgentState - */ +// Convert `TrackedObject` to `AgentState`. AgentState trackedObjectToAgentState(const TrackedObject & object, const bool is_valid) { const auto & pose = object.kinematics.pose_with_covariance.pose; @@ -135,13 +106,8 @@ AgentState trackedObjectToAgentState(const TrackedObject & object, const bool is valid}; } -/** - * @brief Get the label index corresponding to AgentLabel. If the label of tracked object is not - * defined in AgentLabel returns `-1`. - * - * @param object - * @return int - */ +// Get the label index corresponding to AgentLabel. If the label of tracked object is not * defined +// in AgentLabel returns `-1`. int getLabelIndex(const TrackedObject & object) { const auto classification = object_recognition_utils::getHighestProbLabel(object.classification); @@ -175,16 +141,13 @@ MTRNode::MTRNode(const rclcpp::NodeOptions & node_options) const auto max_num_point = static_cast(declare_parameter("max_num_point")); const auto point_break_distance = static_cast(declare_parameter("point_break_distance")); - auto tmp_offset_xy = declare_parameter>("offset_xy"); - std::array offset_xy; - std::copy_n(tmp_offset_xy.begin(), 2, offset_xy.begin()); const auto intention_point_filepath = declare_parameter("intention_point_filepath"); const auto num_intention_point_cluster = static_cast(declare_parameter("num_intention_point_cluster")); config_ptr_ = std::make_unique( target_labels, num_past, num_mode, num_future, max_num_polyline, max_num_point, - point_break_distance, offset_xy, intention_point_filepath, num_intention_point_cluster); + point_break_distance, intention_point_filepath, num_intention_point_cluster); model_ptr_ = std::make_unique(model_path, precision, *config_ptr_.get()); } @@ -501,7 +464,7 @@ std::vector MTRNode::extractTargetAgent(const std::vector return item1.second < item2.second; }); - constexpr size_t max_target_size = 15; // TODO(ktro2828): use parameter + constexpr size_t max_target_size = 15; // TODO(ktro2828): use a parameter std::vector target_indices; target_indices.reserve(max_target_size); for (const auto & [idx, _] : distances) { @@ -564,7 +527,6 @@ PredictedObject MTRNode::generatePredictedObject( return predicted_object; } - } // namespace trt_mtr #include diff --git a/perception/tensorrt_mtr/src/trt_mtr.cpp b/perception/tensorrt_mtr/src/trt_mtr.cpp index ba9615da35c82..5f4899af262eb 100644 --- a/perception/tensorrt_mtr/src/trt_mtr.cpp +++ b/perception/tensorrt_mtr/src/trt_mtr.cpp @@ -27,6 +27,11 @@ TrtMTR::TrtMTR( : config_(config), intention_point_(config_.intention_point_filepath, config_.num_intention_point_cluster) { + max_num_polyline_ = config_.max_num_polyline; + num_mode_ = config_.num_mode; + num_future_ = config_.num_future; + + // build engine builder_ = std::make_unique( model_path, precision, batch_config, max_workspace_size, build_config); builder_->setup(); @@ -53,7 +58,7 @@ bool TrtMTR::doInference( d_in_polyline_.get(), d_in_polyline_mask_.get(), d_in_polyline_center_.get(), d_in_last_pos_.get(), d_target_index_.get(), d_intention_points_.get(), - d_out_score_.get(), d_out_trajectory_.get()}; + d_out_trajectory_.get(), d_out_score_.get()}; if (!builder_->enqueueV2(buffer.data(), stream_, nullptr)) { std::cerr << "Fail to do inference" << std::endl; @@ -70,110 +75,108 @@ bool TrtMTR::doInference( void TrtMTR::initCudaPtr(AgentData & agent_data, PolylineData & polyline_data) { + num_target_ = agent_data.num_target(); + num_agent_ = agent_data.num_agent(); + time_length_ = agent_data.time_length(); + agent_input_dim_ = agent_data.input_dim(); + num_polyline_ = polyline_data.num_polyline(); + num_point_ = polyline_data.num_point(); + polyline_input_dim_ = polyline_data.input_dim(); + // source data - d_target_index_ = cuda::make_unique(agent_data.num_target()); - d_label_index_ = cuda::make_unique(agent_data.num_agent()); - d_timestamps_ = cuda::make_unique(agent_data.time_length()); + d_target_index_ = cuda::make_unique(num_target_); + d_label_index_ = cuda::make_unique(num_agent_); + d_timestamps_ = cuda::make_unique(time_length_); d_trajectory_ = cuda::make_unique(agent_data.size()); - d_target_state_ = cuda::make_unique(agent_data.num_target() * agent_data.state_dim()); - d_intention_points_ = - cuda::make_unique(agent_data.num_target() * intention_point_.size()); + d_target_state_ = cuda::make_unique(num_target_ * agent_data.state_dim()); + d_intention_points_ = cuda::make_unique(num_target_ * intention_point_.size()); d_polyline_ = cuda::make_unique(polyline_data.size()); - d_topk_index_ = cuda::make_unique(config_.max_num_polyline); // preprocessed input - // TODO(ktro2828): define this in global - d_in_trajectory_ = cuda::make_unique( - agent_data.num_target() * agent_data.num_agent() * agent_data.time_length() * - agent_data.input_dim()); - d_in_trajectory_mask_ = cuda::make_unique( - agent_data.num_target() * agent_data.num_agent() * agent_data.time_length()); - d_in_last_pos_ = cuda::make_unique(agent_data.num_target() * agent_data.num_agent() * 3); - d_in_polyline_ = cuda::make_unique( - agent_data.num_target() * config_.max_num_polyline * polyline_data.num_point() * - (polyline_data.state_dim() + 2)); - d_in_polyline_mask_ = cuda::make_unique( - agent_data.num_target() * config_.max_num_polyline * polyline_data.num_point()); - d_in_polyline_center_ = - cuda::make_unique(agent_data.num_target() * config_.max_num_polyline * 3); + d_in_trajectory_ = + cuda::make_unique(num_target_ * num_agent_ * time_length_ * agent_input_dim_); + d_in_trajectory_mask_ = cuda::make_unique(num_target_ * num_agent_ * time_length_); + d_in_last_pos_ = cuda::make_unique(num_target_ * num_agent_ * 3); + d_in_polyline_ = + cuda::make_unique(num_target_ * max_num_polyline_ * num_point_ * polyline_input_dim_); + d_in_polyline_mask_ = cuda::make_unique(num_target_ * max_num_polyline_ * num_point_); + d_in_polyline_center_ = cuda::make_unique(num_target_ * max_num_polyline_ * 3); + + if (max_num_polyline_ < num_polyline_) { + d_tmp_polyline_ = + cuda::make_unique(num_target_ * num_polyline_ * num_point_ * polyline_input_dim_); + d_tmp_polyline_mask_ = cuda::make_unique(num_target_ * num_polyline_ * num_point_); + d_tmp_distance_ = cuda::make_unique(num_target_ * num_polyline_); + } // outputs - d_out_score_ = cuda::make_unique(agent_data.num_target() * config_.num_mode); - d_out_trajectory_ = cuda::make_unique( - agent_data.num_target() * config_.num_mode * config_.num_future * PredictedStateDim); + d_out_score_ = cuda::make_unique(num_target_ * num_mode_); + d_out_trajectory_ = + cuda::make_unique(num_target_ * num_mode_ * num_future_ * PredictedStateDim); } bool TrtMTR::preProcess(AgentData & agent_data, PolylineData & polyline_data) { CHECK_CUDA_ERROR(cudaMemcpyAsync( - d_target_index_.get(), agent_data.target_index().data(), sizeof(int) * agent_data.num_target(), + d_target_index_.get(), agent_data.target_index().data(), sizeof(int) * num_target_, cudaMemcpyHostToDevice, stream_)); CHECK_CUDA_ERROR(cudaMemcpyAsync( - d_label_index_.get(), agent_data.label_index().data(), sizeof(int) * agent_data.num_agent(), + d_label_index_.get(), agent_data.label_index().data(), sizeof(int) * num_agent_, cudaMemcpyHostToDevice, stream_)); CHECK_CUDA_ERROR(cudaMemcpyAsync( - d_timestamps_.get(), agent_data.timestamps().data(), sizeof(float) * agent_data.time_length(), + d_timestamps_.get(), agent_data.timestamps().data(), sizeof(float) * time_length_, cudaMemcpyHostToDevice, stream_)); CHECK_CUDA_ERROR(cudaMemcpyAsync( - d_trajectory_.get(), agent_data.data_ptr(), - sizeof(float) * agent_data.num_agent() * agent_data.time_length() * agent_data.state_dim(), + d_trajectory_.get(), agent_data.data_ptr(), sizeof(float) * agent_data.size(), cudaMemcpyHostToDevice, stream_)); CHECK_CUDA_ERROR(cudaMemcpyAsync( d_target_state_.get(), agent_data.target_data_ptr(), - sizeof(float) * agent_data.num_target() * agent_data.state_dim(), cudaMemcpyHostToDevice, - stream_)); + sizeof(float) * num_target_ * agent_data.state_dim(), cudaMemcpyHostToDevice, stream_)); CHECK_CUDA_ERROR(cudaMemcpyAsync( - d_polyline_.get(), polyline_data.data_ptr(), - sizeof(float) * polyline_data.num_polyline() * polyline_data.num_point() * - polyline_data.state_dim(), + d_polyline_.get(), polyline_data.data_ptr(), sizeof(float) * polyline_data.size(), cudaMemcpyHostToDevice, stream_)); const auto target_label_names = getLabelNames(agent_data.target_label_index()); const auto intention_points = intention_point_.get_points(target_label_names); CHECK_CUDA_ERROR(cudaMemcpyAsync( d_intention_points_.get(), intention_points.data(), - sizeof(float) * agent_data.num_target() * intention_point_.size(), cudaMemcpyHostToDevice, - stream_)); + sizeof(float) * num_target_ * intention_point_.size(), cudaMemcpyHostToDevice, stream_)); - // Preprocess CHECK_CUDA_ERROR(agentPreprocessLauncher( - agent_data.num_target(), agent_data.num_agent(), agent_data.time_length(), - agent_data.state_dim(), agent_data.num_class(), agent_data.sdc_index(), d_target_index_.get(), - d_label_index_.get(), d_timestamps_.get(), d_trajectory_.get(), d_in_trajectory_.get(), - d_in_trajectory_mask_.get(), d_in_last_pos_.get(), stream_)); + num_target_, num_agent_, time_length_, agent_data.state_dim(), agent_data.num_class(), + agent_data.sdc_index(), d_target_index_.get(), d_label_index_.get(), d_timestamps_.get(), + d_trajectory_.get(), d_in_trajectory_.get(), d_in_trajectory_mask_.get(), d_in_last_pos_.get(), + stream_)); - // TODO(ktro2828) - if (config_.max_num_polyline < polyline_data.num_polyline()) { + if (max_num_polyline_ < num_polyline_) { CHECK_CUDA_ERROR(polylinePreprocessWithTopkLauncher( - polyline_data.num_polyline(), config_.max_num_polyline, polyline_data.num_point(), - polyline_data.state_dim(), d_polyline_.get(), agent_data.num_target(), agent_data.state_dim(), - d_target_state_.get(), config_.offset_xy[0], config_.offset_xy[1], d_topk_index_.get(), - d_in_polyline_.get(), d_in_polyline_mask_.get(), d_in_polyline_center_.get(), stream_)); + max_num_polyline_, num_polyline_, num_point_, polyline_data.state_dim(), d_polyline_.get(), + num_target_, agent_data.state_dim(), d_target_state_.get(), d_tmp_polyline_.get(), + d_tmp_polyline_mask_.get(), d_tmp_distance_.get(), d_in_polyline_.get(), + d_in_polyline_mask_.get(), d_in_polyline_center_.get(), stream_)); } else { assert(config_.max_num_polyline == polyline_data.num_polyline()); CHECK_CUDA_ERROR(polylinePreprocessLauncher( - polyline_data.num_polyline(), polyline_data.num_point(), polyline_data.state_dim(), - d_polyline_.get(), agent_data.num_target(), agent_data.state_dim(), d_target_state_.get(), - d_in_polyline_.get(), d_in_polyline_mask_.get(), d_in_polyline_center_.get(), stream_)); + num_polyline_, num_point_, polyline_data.state_dim(), d_polyline_.get(), num_target_, + agent_data.state_dim(), d_target_state_.get(), d_in_polyline_.get(), + d_in_polyline_mask_.get(), d_in_polyline_center_.get(), stream_)); } return true; } bool TrtMTR::postProcess(AgentData & agent_data, std::vector & trajectories) { - // Postprocess CHECK_CUDA_ERROR(postprocessLauncher( - agent_data.num_target(), config_.num_mode, config_.num_future, agent_data.state_dim(), - d_target_state_.get(), PredictedStateDim, d_out_score_.get(), d_out_trajectory_.get(), - stream_)); + num_target_, num_mode_, num_future_, agent_data.state_dim(), d_target_state_.get(), + PredictedStateDim, d_out_trajectory_.get(), stream_)); - trajectories.reserve(agent_data.num_target()); - for (size_t b = 0; b < agent_data.num_target(); ++b) { - const auto score_ptr = d_out_score_.get() + b * config_.num_mode; + trajectories.reserve(num_target_); + for (size_t b = 0; b < num_target_; ++b) { + const auto score_ptr = d_out_score_.get() + b * num_mode_; const auto trajectory_ptr = - d_out_trajectory_.get() + b * config_.num_mode * config_.num_future * PredictedStateDim; - trajectories.emplace_back(score_ptr, trajectory_ptr, config_.num_mode, config_.num_future); + d_out_trajectory_.get() + b * num_mode_ * num_future_ * PredictedStateDim; + trajectories.emplace_back(score_ptr, trajectory_ptr, num_mode_, num_future_); } return true; }