From e635d41acd7d2ee6b4b369c06243d6352af0ed7a Mon Sep 17 00:00:00 2001 From: kevin Date: Mon, 25 Nov 2024 17:45:26 -0500 Subject: [PATCH 01/13] initial changes --- src/init.hpp | 1 + src/json_serialization.hpp | 1 + src/types.hpp | 11 ++++++----- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/init.hpp b/src/init.hpp index 7c458168..2101c4e2 100755 --- a/src/init.hpp +++ b/src/init.hpp @@ -24,6 +24,7 @@ namespace gpudrive MapVector2 position[MAX_POSITIONS]; float width; float length; + float height; float heading[MAX_POSITIONS]; MapVector2 velocity[MAX_POSITIONS]; bool valid[MAX_POSITIONS]; diff --git a/src/json_serialization.hpp b/src/json_serialization.hpp index 40e8762c..2cf932d4 100644 --- a/src/json_serialization.hpp +++ b/src/json_serialization.hpp @@ -34,6 +34,7 @@ namespace gpudrive obj.numPositions = i; j.at("width").get_to(obj.width); j.at("length").get_to(obj.length); + j.at("height").get_to(obj.height); i = 0; for (const auto &h : j.at("heading")) diff --git a/src/types.hpp b/src/types.hpp index 13dc4dd4..d8051a11 100755 --- a/src/types.hpp +++ b/src/types.hpp @@ -72,6 +72,7 @@ struct AgentID { { float length; float width; + float height; }; struct Goal @@ -184,14 +185,14 @@ struct AgentID { { return SelfObservation{ .speed = 0, - .vehicle_size = {0, 0}, + .vehicle_size = {0, 0, 0}, .goal = {.position = {0, 0}}, .collisionState = 0, .id = -1}; } }; - const size_t SelfObservationExportSize = 7; + const size_t SelfObservationExportSize = 8; static_assert(sizeof(SelfObservation) == sizeof(float) * SelfObservationExportSize); @@ -235,7 +236,7 @@ struct AgentID { .speed = 0, .position = {0, 0}, .heading = 0, - .vehicle_size = {0, 0}, + .vehicle_size = {0, 0, 0}, .type = static_cast(EntityType::None), .id = -1}; } @@ -255,7 +256,7 @@ struct AgentID { PartnerObservation obs[consts::kMaxAgentCount - 1]; }; - const size_t PartnerObservationExportSize = 8; + const size_t PartnerObservationExportSize = 9; static_assert(sizeof(PartnerObservations) == sizeof(float) * (consts::kMaxAgentCount - 1) * PartnerObservationExportSize); @@ -353,7 +354,7 @@ struct AgentID { float id; }; - const size_t AbsoluteSelfObservationExportSize = 13; // 3 + 4 + 1 + 2 + 2 + const size_t AbsoluteSelfObservationExportSize = 14; // 3 + 4 + 1 + 2 + 2 ?? static_assert(sizeof(AbsoluteSelfObservation) == sizeof(float) * AbsoluteSelfObservationExportSize); From b81617bbd2d13699edf1b1593b663cb0eba7207e Mon Sep 17 00:00:00 2001 From: kevin Date: Tue, 3 Dec 2024 18:03:03 -0500 Subject: [PATCH 02/13] using vehicle_size struct and comments --- src/init.hpp | 4 +-- src/json_serialization.hpp | 6 ++-- src/level_gen.cpp | 4 +-- src/types.hpp | 59 +++++++++++++++++++------------------- 4 files changed, 36 insertions(+), 37 deletions(-) diff --git a/src/init.hpp b/src/init.hpp index 2101c4e2..3b522034 100755 --- a/src/init.hpp +++ b/src/init.hpp @@ -22,9 +22,7 @@ namespace gpudrive struct MapObject { MapVector2 position[MAX_POSITIONS]; - float width; - float length; - float height; + VehicleSize vehicle_size; float heading[MAX_POSITIONS]; MapVector2 velocity[MAX_POSITIONS]; bool valid[MAX_POSITIONS]; diff --git a/src/json_serialization.hpp b/src/json_serialization.hpp index 2cf932d4..86b0b552 100644 --- a/src/json_serialization.hpp +++ b/src/json_serialization.hpp @@ -32,9 +32,9 @@ namespace gpudrive } } obj.numPositions = i; - j.at("width").get_to(obj.width); - j.at("length").get_to(obj.length); - j.at("height").get_to(obj.height); + j.at("width").get_to(obj.vehicle_size.width); + j.at("length").get_to(obj.vehicle_size.length); + j.at("height").get_to(obj.vehicle_size.height); i = 0; for (const auto &h : j.at("heading")) diff --git a/src/level_gen.cpp b/src/level_gen.cpp index add4cc4f..0385bfd5 100755 --- a/src/level_gen.cpp +++ b/src/level_gen.cpp @@ -121,8 +121,8 @@ static inline Entity createAgent(Engine &ctx, const MapObject &agentInit) { auto agent = ctx.makeRenderableEntity(); auto agent_iface = ctx.get(agent).e = ctx.makeEntity(); - ctx.get(agent) = {.length = agentInit.length, .width = agentInit.width}; - ctx.get(agent) = Diag3x3{.d0 = agentInit.length/2, .d1 = agentInit.width/2, .d2 = 1}; + ctx.get(agent) = {.length = agentInit.vehicle_size.length, .width = agentInit.vehicle_size.width, .height = agentInit.vehicle_size.height}; + ctx.get(agent) = Diag3x3{.d0 = agentInit.vehicle_size.length/2, .d1 = agentInit.vehicle_size.width/2, .d2 = 1}; ctx.get(agent) *= consts::vehicleLengthScale; ctx.get(agent) = ObjectID{(int32_t)SimObject::Agent}; ctx.get(agent) = agentInit.type; diff --git a/src/types.hpp b/src/types.hpp index d8051a11..c8ec7db2 100755 --- a/src/types.hpp +++ b/src/types.hpp @@ -64,9 +64,10 @@ namespace gpudrive NUM_TYPES = 21, }; -struct AgentID { - int32_t id; -}; + struct AgentID + { + int32_t id; + }; struct VehicleSize { @@ -180,7 +181,7 @@ struct AgentID { VehicleSize vehicle_size; Goal goal; float collisionState; - float id; + float id; static inline SelfObservation zero() { return SelfObservation{ @@ -188,11 +189,11 @@ struct AgentID { .vehicle_size = {0, 0, 0}, .goal = {.position = {0, 0}}, .collisionState = 0, - .id = -1}; + .id = -1}; } }; - const size_t SelfObservationExportSize = 8; + const size_t SelfObservationExportSize = 8; // 1 + 3 + 2 + 1 + 1 static_assert(sizeof(SelfObservation) == sizeof(float) * SelfObservationExportSize); @@ -218,7 +219,7 @@ struct AgentID { } }; - const size_t MapObservationExportSize = 9; + const size_t MapObservationExportSize = 9; // 2 + 3 + 1 + 1 + 1 + 1 static_assert(sizeof(MapObservation) == sizeof(float) * MapObservationExportSize); @@ -229,37 +230,37 @@ struct AgentID { float heading; VehicleSize vehicle_size; float type; - float id; - - static inline PartnerObservation zero() { - return PartnerObservation{ - .speed = 0, - .position = {0, 0}, - .heading = 0, - .vehicle_size = {0, 0, 0}, - .type = static_cast(EntityType::None), - .id = -1}; - } -}; + float id; - struct RoadMapId{ - int32_t id; + static inline PartnerObservation zero() { + return PartnerObservation{ + .speed = 0, + .position = {0, 0}, + .heading = 0, + .vehicle_size = {0, 0, 0}, + .type = static_cast(EntityType::None), + .id = -1}; + } }; - const size_t RoadMapIdExportSize = 1; - - static_assert(sizeof(RoadMapId) == sizeof(int) * RoadMapIdExportSize); - // Egocentric observations of other agents struct PartnerObservations { PartnerObservation obs[consts::kMaxAgentCount - 1]; }; - const size_t PartnerObservationExportSize = 9; + const size_t PartnerObservationExportSize = 9; // 1 + 2 + 1 + 3 + 1 + 1 static_assert(sizeof(PartnerObservations) == sizeof(float) * - (consts::kMaxAgentCount - 1) * PartnerObservationExportSize); + (consts::kMaxAgentCount - 1) * PartnerObservationExportSize); + + struct RoadMapId{ + int32_t id; + }; + + const size_t RoadMapIdExportSize = 1; + + static_assert(sizeof(RoadMapId) == sizeof(int) * RoadMapIdExportSize); struct AgentMapObservations { @@ -341,7 +342,7 @@ struct AgentID { struct AbsoluteRotation { - Rotation rotationAsQuat; + Rotation rotationAsQuat; // x, y, z, w float rotationFromAxis; }; @@ -354,7 +355,7 @@ struct AgentID { float id; }; - const size_t AbsoluteSelfObservationExportSize = 14; // 3 + 4 + 1 + 2 + 2 ?? + const size_t AbsoluteSelfObservationExportSize = 14; // 3 + 5 + 2 + 3 + 1 static_assert(sizeof(AbsoluteSelfObservation) == sizeof(float) * AbsoluteSelfObservationExportSize); From 283caa85b3e9c12c9377fd297b6cd9b9fb6ba35d Mon Sep 17 00:00:00 2001 From: kevin Date: Wed, 4 Dec 2024 17:25:30 -0500 Subject: [PATCH 03/13] datatype indexing changes --- pygpudrive/datatypes/observation.py | 24 ++++++++++++++++-------- pygpudrive/env/constants.py | 1 + 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/pygpudrive/datatypes/observation.py b/pygpudrive/datatypes/observation.py index d86d21d7..2949c906 100644 --- a/pygpudrive/datatypes/observation.py +++ b/pygpudrive/datatypes/observation.py @@ -14,6 +14,7 @@ class LocalEgoState: speed: Speed of the agent in relative coordinates. vehicle_length: Length of the agent's bounding box. vehicle_width: Width of the agent's bounding box. + vehicle_height: Height of the agent's bounding box. rel_goal_x: Relative x-coordinate to the goal. rel_goal_y: Relative y-coordinate to the goal. is_collided: Whether the agent is in collision with another object. @@ -25,10 +26,11 @@ def __init__(self, self_obs_tensor: torch.Tensor): self.speed = self_obs_tensor[:, :, 0] self.vehicle_length = self_obs_tensor[:, :, 1] self.vehicle_width = self_obs_tensor[:, :, 2] - self.rel_goal_x = self_obs_tensor[:, :, 3] - self.rel_goal_y = self_obs_tensor[:, :, 4] - self.is_collided = self_obs_tensor[:, :, 5] - self.id = self_obs_tensor[:, :, 6] + self.vehicle_height = self_obs_tensor[:, :, 3] + self.rel_goal_x = self_obs_tensor[:, :, 4] + self.rel_goal_y = self_obs_tensor[:, :, 5] + self.is_collided = self_obs_tensor[:, :, 6] + self.id = self_obs_tensor[:, :, 7] @classmethod def from_tensor( @@ -48,6 +50,7 @@ def normalize(self): self.speed = self.speed / constants.MAX_SPEED self.vehicle_length = self.vehicle_length / constants.MAX_VEH_LEN self.vehicle_width = self.vehicle_width / constants.MAX_VEH_WIDTH + self.vehicle_height = self.vehicle_height / constants.MAX_VEH_HEIGHT self.rel_goal_x = normalize_min_max( tensor=self.rel_goal_x, min_val=constants.MIN_REL_GOAL_COORD, @@ -70,7 +73,7 @@ def shape(self) -> tuple[int, ...]: class GlobalEgoState: """A class to represent the ego state of the agent in global coordinates. Initialized from abs_self_obs_tensor (src/bindings). For details, see - `AbsoluteSelfObservation` in src/types.hpp. Shape: (num_worlds, max_agents, 13). + `AbsoluteSelfObservation` in src/types.hpp. Shape: (num_worlds, max_agents, 14). Attributes: pos_x: Global x-coordinate of the agent. @@ -82,6 +85,7 @@ class GlobalEgoState: goal_y: Global y-coordinate of the goal. vehicle_length: Length of the agent's bounding box. vehicle_width: Width of the agent's bounding box. + vehicle_height: Height of the agent's bounding box. id: Unique identifier of the agent. """ @@ -96,7 +100,8 @@ def __init__(self, abs_self_obs_tensor: torch.Tensor): self.goal_y = abs_self_obs_tensor[:, :, 9] self.vehicle_length = abs_self_obs_tensor[:, :, 10] self.vehicle_width = abs_self_obs_tensor[:, :, 11] - self.id = abs_self_obs_tensor[:, :, 12] + self.vehicle_height = abs_self_obs_tensor[:, :, 12] + self.id = abs_self_obs_tensor[:, :, 13] @classmethod def from_tensor( @@ -130,6 +135,7 @@ class PartnerObs: orientation: torch.Tensor vehicle_length: torch.Tensor vehicle_width: torch.Tensor + vehicle_height: torch.Tensor agent_type: torch.Tensor ids: torch.Tensor @@ -148,8 +154,9 @@ def __init__(self, partner_obs_tensor: torch.Tensor): self.orientation = partner_obs_tensor[:, :, :, 3].unsqueeze(-1) self.vehicle_length = partner_obs_tensor[:, :, :, 4].unsqueeze(-1) self.vehicle_width = partner_obs_tensor[:, :, :, 5].unsqueeze(-1) - self.agent_type = partner_obs_tensor[:, :, :, 6].unsqueeze(-1) - self.ids = partner_obs_tensor[:, :, :, 7].unsqueeze(-1) + self.vehicle_height = partner_obs_tensor[:, :, :, 6].unsqueeze(-1) + self.agent_type = partner_obs_tensor[:, :, :, 7].unsqueeze(-1) + self.ids = partner_obs_tensor[:, :, :, 8].unsqueeze(-1) @classmethod def from_tensor( @@ -180,6 +187,7 @@ def normalize(self): self.orientation = self.orientation / constants.MAX_ORIENTATION_RAD self.vehicle_length = self.vehicle_length / constants.MAX_VEH_LEN self.vehicle_width = self.vehicle_width / constants.MAX_VEH_WIDTH + self.vehicle_heights = self.vehicle_heights / constants.MAX_VEH_HEIGHT self.agent_type = self.agent_type.long() self.ids = self.ids diff --git a/pygpudrive/env/constants.py b/pygpudrive/env/constants.py index a8ea6201..cb221b55 100644 --- a/pygpudrive/env/constants.py +++ b/pygpudrive/env/constants.py @@ -6,6 +6,7 @@ MAX_SPEED = 100 MAX_VEH_LEN = 30 MAX_VEH_WIDTH = 10 +MAX_VEH_HEIGHT = 3 # What's the appropriate value to set this to? MIN_REL_GOAL_COORD = -1000 MAX_REL_GOAL_COORD = 1000 MIN_REL_AGENT_POS = -1000 From e0e987332ce83cb344a1f187c6cd3cfec7588f1e Mon Sep 17 00:00:00 2001 From: kevin Date: Wed, 4 Dec 2024 18:50:06 -0500 Subject: [PATCH 04/13] removed comment --- pygpudrive/env/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygpudrive/env/constants.py b/pygpudrive/env/constants.py index cb221b55..9f0d336f 100644 --- a/pygpudrive/env/constants.py +++ b/pygpudrive/env/constants.py @@ -6,7 +6,7 @@ MAX_SPEED = 100 MAX_VEH_LEN = 30 MAX_VEH_WIDTH = 10 -MAX_VEH_HEIGHT = 3 # What's the appropriate value to set this to? +MAX_VEH_HEIGHT = 3 MIN_REL_GOAL_COORD = -1000 MAX_REL_GOAL_COORD = 1000 MIN_REL_AGENT_POS = -1000 From 0a09dec4ed4425447d42eedac4d6f1ce2b18fbda Mon Sep 17 00:00:00 2001 From: kevin Date: Thu, 5 Dec 2024 17:37:45 -0500 Subject: [PATCH 05/13] minor cleanup --- src/level_gen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/level_gen.cpp b/src/level_gen.cpp index 0385bfd5..2d49e5ff 100755 --- a/src/level_gen.cpp +++ b/src/level_gen.cpp @@ -121,7 +121,7 @@ static inline Entity createAgent(Engine &ctx, const MapObject &agentInit) { auto agent = ctx.makeRenderableEntity(); auto agent_iface = ctx.get(agent).e = ctx.makeEntity(); - ctx.get(agent) = {.length = agentInit.vehicle_size.length, .width = agentInit.vehicle_size.width, .height = agentInit.vehicle_size.height}; + ctx.get(agent) = agentInit.vehicle_size; ctx.get(agent) = Diag3x3{.d0 = agentInit.vehicle_size.length/2, .d1 = agentInit.vehicle_size.width/2, .d2 = 1}; ctx.get(agent) *= consts::vehicleLengthScale; ctx.get(agent) = ObjectID{(int32_t)SimObject::Agent}; From 070e76375356b88a27de9c27e402915120a634a2 Mon Sep 17 00:00:00 2001 From: kevin Date: Fri, 6 Dec 2024 15:53:00 -0500 Subject: [PATCH 06/13] new dataset yay --- README.md | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 86ab6757..81572345 100644 --- a/README.md +++ b/README.md @@ -211,16 +211,41 @@ We are open-sourcing a policy trained on 1,000 randomly sampled scenarios. You c ### Download the dataset -Two versions of the dataset are available: +- Two versions of the dataset are available, a mini version with a 1000 training files and 300 test/validation files, and the full sized dataset with over a 100k unique scenes. +- Replace 'GPUDrive_mini' with 'GPUDrive' below if you wish to download the full dataset.) -- a mini-one that is about 1 GB and consists of 1000 training files and 100 validation / test files at: [Dropbox Link](https://www.dropbox.com/sh/8mxue9rdoizen3h/AADGRrHYBb86pZvDnHplDGvXa?dl=0). -- the full dataset (150 GB) and consists of 134453 training files and 12205 validation / test files: [Dropbox Link](https://www.dropbox.com/sh/wv75pjd8phxizj3/AABfNPWfjQdoTWvdVxsAjUL_a?dl=0) +Option 1: You can download the dataset programmatically using the Hugging Face `datasets` library: +```python +from datasets import load_dataset +dataset = load_dataset("EMERGE-lab/GPUDrive_mini", cache_dir="data/processed") #OR path/to/your/dir +``` +Option 2: Use the huggingface-cli: + +1. First, install the Hugging Face CLI: +```bash +pip install huggingface_hub +``` + +2. Log in to your Hugging Face account: +```bash +huggingface-cli login +``` + +3. Download the dataset: +```bash +huggingface-cli download EMERGE-lab/GPUDrive_mini --local-dir data/processed #OR path/to/your/dir +``` + +Option 3: Manual Download: + +1. Visit https://huggingface.co/datasets/EMERGE-lab/GPUDrive +2. Navigate to the Files and versions tab. +3. Download the desired files/directories. -The simulator supports initializing scenes from the `Nocturne` dataset. The input parameter for the simulator `json_path` takes in a path to a directory containing the files in the Nocturne format. The `SceneConfig` dataclass in `pygpudrive/env/config.py` dataclass is used to configure how scenes are selected from a folder with traffic scenarios. ### Re-building the dataset -GPUDrive is compatible with the complete [Waymo Open Motion Dataset](https://github.com/waymo-research/waymo-open-dataset), which contains over 100,000 scenarios. To download new files and create scenarios for the simulator, follow these three steps. +GPUDrive is compatible with the complete [Waymo Open Motion Dataset](https://github.com/waymo-research/waymo-open-dataset), which contains over 100,000 scenarios. To download new files and create scenarios for the simulator, follow these three steps. (Note: you would only need to do this if there is a newer version of the Waymo dataset that you'd like to test.) 1. First, head to [https://waymo.com/open/](https://waymo.com/open/) and click on the "download" button a the top. After registering, click on the files from `v1.2.1 March 2024`, the newest version of the dataset at the time of wrting (10/2024). This will lead you a Google Cloud page. From here, you should see a folder structure like this: From 2b4ffa9d37bea0a26a45d8a1fc24e53883dc7e15 Mon Sep 17 00:00:00 2001 From: kevin Date: Fri, 6 Dec 2024 16:22:24 -0500 Subject: [PATCH 07/13] fixed downloads and added links --- README.md | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 81572345..fb1bc873 100644 --- a/README.md +++ b/README.md @@ -211,41 +211,41 @@ We are open-sourcing a policy trained on 1,000 randomly sampled scenarios. You c ### Download the dataset -- Two versions of the dataset are available, a mini version with a 1000 training files and 300 test/validation files, and the full sized dataset with over a 100k unique scenes. -- Replace 'GPUDrive_mini' with 'GPUDrive' below if you wish to download the full dataset.) +- Two versions of the dataset are available, a [mini version](https://huggingface.co/datasets/EMERGE-lab/GPUDrive_mini) with a 1000 training files and 300 test/validation files, and the [full sized dataset](https://huggingface.co/datasets/EMERGE-lab/GPUDrive) with over a 100k unique scenes. +- Replace 'GPUDrive_mini' with 'GPUDrive' below if you wish to download the full dataset. +- To download the dataset you need the huggingface_hub library: +```bash +pip install huggingface_hub +``` +Then you can download the dataset using python or just `huggingface-cli`. -Option 1: You can download the dataset programmatically using the Hugging Face `datasets` library: +Option 1: Using Python: ```python -from datasets import load_dataset -dataset = load_dataset("EMERGE-lab/GPUDrive_mini", cache_dir="data/processed") #OR path/to/your/dir +>>> from huggingface_hub import snapshot_download +>>> snapshot_download(repo_id="EMERGE-lab/GPUDrive_mini", repo_type="dataset", local_dir="data/processed") ``` Option 2: Use the huggingface-cli: -1. First, install the Hugging Face CLI: -```bash -pip install huggingface_hub -``` - -2. Log in to your Hugging Face account: +1. Log in to your Hugging Face account: ```bash huggingface-cli login ``` -3. Download the dataset: +2. Download the dataset: ```bash -huggingface-cli download EMERGE-lab/GPUDrive_mini --local-dir data/processed #OR path/to/your/dir +huggingface-cli download EMERGE-lab/GPUDrive_mini --local-dir data/processed --repo-type "dataset" ``` Option 3: Manual Download: -1. Visit https://huggingface.co/datasets/EMERGE-lab/GPUDrive +1. Visit https://huggingface.co/datasets/EMERGE-lab/GPUDrive_mini 2. Navigate to the Files and versions tab. 3. Download the desired files/directories. ### Re-building the dataset -GPUDrive is compatible with the complete [Waymo Open Motion Dataset](https://github.com/waymo-research/waymo-open-dataset), which contains over 100,000 scenarios. To download new files and create scenarios for the simulator, follow these three steps. (Note: you would only need to do this if there is a newer version of the Waymo dataset that you'd like to test.) +If you wish to manually generate the dataset, GPUDrive is compatible with the complete [Waymo Open Motion Dataset](https://github.com/waymo-research/waymo-open-dataset), which contains over 100,000 scenarios. To download new files and create scenarios for the simulator, follow these three steps. 1. First, head to [https://waymo.com/open/](https://waymo.com/open/) and click on the "download" button a the top. After registering, click on the files from `v1.2.1 March 2024`, the newest version of the dataset at the time of wrting (10/2024). This will lead you a Google Cloud page. From here, you should see a folder structure like this: From a8e4bd2d12cf2a5f7dbd055fd49deb2c2052c5f1 Mon Sep 17 00:00:00 2001 From: kevin Date: Sun, 8 Dec 2024 17:04:41 -0500 Subject: [PATCH 08/13] extract script for large dataset --- README.md | 5 ++ data_utils/extract_groups.py | 119 +++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 data_utils/extract_groups.py diff --git a/README.md b/README.md index fb1bc873..8e1653f8 100644 --- a/README.md +++ b/README.md @@ -242,6 +242,11 @@ Option 3: Manual Download: 2. Navigate to the Files and versions tab. 3. Download the desired files/directories. +_NOTE_: If you downloaded the full-sized dataset, it is grouped to subdirectories of 10k files each (according to hugging face constraints). In order for the path to work with GPUDrive, you need to run +```python +python data_utils/extract_groups.py #use --help if you've used a custom download path +``` + ### Re-building the dataset diff --git a/data_utils/extract_groups.py b/data_utils/extract_groups.py new file mode 100644 index 00000000..a43e950b --- /dev/null +++ b/data_utils/extract_groups.py @@ -0,0 +1,119 @@ +import argparse +import shutil +from pathlib import Path +from multiprocessing import Pool, cpu_count +import tqdm + +def move_file(args): + """ + Move a single file to its target location. + + Args: + args (tuple): (source_path, target_dir) + """ + source_path, target_dir = args + target_path = Path(target_dir) / source_path.name + shutil.move(str(source_path), str(target_path)) + return str(source_path) + +def extract_groups(dataset_dir, num_workers=None): + """ + Extract all files from group directories back to the parent directory using parallel processing. + + Args: + dataset_dir (str): Path to the dataset directory containing group folders + num_workers (int, optional): Number of processes to use. Defaults to CPU count. + """ + dataset_path = Path(dataset_dir) + + if not dataset_path.is_dir(): + raise ValueError(f"Directory {dataset_dir} does not exist") + + # Find all group directories + group_dirs = [d for d in dataset_path.iterdir() + if d.is_dir() and d.name.startswith("group_")] + + if not group_dirs: + print(f"No group directories found in {dataset_dir}!") + return + + print(f"\nProcessing {dataset_dir}") + print(f"Found {len(group_dirs)} group directories") + + # Collect all files that need to be moved + all_files = [] + for group_dir in sorted(group_dirs): + files = list(group_dir.glob("*.json")) + all_files.extend([(file, dataset_path) for file in files]) + + total_files = len(all_files) + print(f"Total files to process: {total_files}") + + # Use all available CPUs if num_workers is not specified + if num_workers is None: + num_workers = cpu_count() + + # Create a pool of workers and process files in parallel + with Pool(processes=num_workers) as pool: + # Use tqdm to show progress bar + list(tqdm.tqdm( + pool.imap_unordered(move_file, all_files), + total=total_files, + desc=f"Moving files from {dataset_dir}" + )) + + # Remove empty group directories + for group_dir in group_dirs: + group_dir.rmdir() + + print(f"Completed {dataset_dir}") + print(f"Total files processed: {total_files}") + +def process_default_directory(num_workers=None): + """ + Process the default training, testing, and validation directories in parallel. + + Args: + num_workers (int, optional): Number of processes to use per directory. + """ + default_dir = "data/processed/training" + # Process each directory with its own pool of workers + try: + extract_groups(default_dir, num_workers) + except Exception as e: + print(f"Error processing {default_dir}: {e}") + +def main(): + parser = argparse.ArgumentParser( + description="Extract files from group directory back to parent directory in parallel. " + "If no directory is specified, processes data/processed/training by default." + ) + parser.add_argument( + "dataset_dir", + nargs="?", # Makes the argument optional + help="Path to the dataset directory containing group folders" + ) + parser.add_argument( + "--num_workers", + type=int, + help="Number of processes to use (defaults to number of CPU cores)", + default=None + ) + + args = parser.parse_args() + + try: + if args.dataset_dir: + # Process single specified directory + extract_groups(args.dataset_dir, args.num_workers) + else: + # Process default directories + process_default_directory(args.num_workers) + except Exception as e: + print(f"Error: {e}") + return 1 + + return 0 + +if __name__ == "__main__": + exit(main()) \ No newline at end of file From 098738cccbe8e96f2230fc8881da87c2f9810504 Mon Sep 17 00:00:00 2001 From: kevin Date: Mon, 9 Dec 2024 11:42:49 -0500 Subject: [PATCH 09/13] added hf to env --- environment.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 80666378..9aed3d7c 100644 --- a/environment.yml +++ b/environment.yml @@ -61,4 +61,5 @@ dependencies: - trove-classifiers==2024.3.25 - urllib3==2.2.1 - virtualenv==20.25.1 - - zipp==3.18.1 \ No newline at end of file + - zipp==3.18.1 + - huggingface_hub==0.26.5 \ No newline at end of file From 12f9d9f9ac4974ab21b1d9b932c2afc3b18f0d63 Mon Sep 17 00:00:00 2001 From: kevin Date: Mon, 9 Dec 2024 11:47:51 -0500 Subject: [PATCH 10/13] update dataset size --- README.md | 4 ++-- data_utils/extract_groups.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8e1653f8..41944897 100644 --- a/README.md +++ b/README.md @@ -211,7 +211,7 @@ We are open-sourcing a policy trained on 1,000 randomly sampled scenarios. You c ### Download the dataset -- Two versions of the dataset are available, a [mini version](https://huggingface.co/datasets/EMERGE-lab/GPUDrive_mini) with a 1000 training files and 300 test/validation files, and the [full sized dataset](https://huggingface.co/datasets/EMERGE-lab/GPUDrive) with over a 100k unique scenes. +- Two versions of the dataset are available, a [mini version](https://huggingface.co/datasets/EMERGE-lab/GPUDrive_mini) with a 1000 training files and 300 test/validation files, and a [large dataset](https://huggingface.co/datasets/EMERGE-lab/GPUDrive) with 100k unique scenes. - Replace 'GPUDrive_mini' with 'GPUDrive' below if you wish to download the full dataset. - To download the dataset you need the huggingface_hub library: ```bash @@ -250,7 +250,7 @@ python data_utils/extract_groups.py #use --help if you've used a custom download ### Re-building the dataset -If you wish to manually generate the dataset, GPUDrive is compatible with the complete [Waymo Open Motion Dataset](https://github.com/waymo-research/waymo-open-dataset), which contains over 100,000 scenarios. To download new files and create scenarios for the simulator, follow these three steps. +If you wish to manually generate the dataset, GPUDrive is compatible with the complete [Waymo Open Motion Dataset](https://github.com/waymo-research/waymo-open-dataset), which contains well over 100,000 scenarios. To download new files and create scenarios for the simulator, follow these three steps. 1. First, head to [https://waymo.com/open/](https://waymo.com/open/) and click on the "download" button a the top. After registering, click on the files from `v1.2.1 March 2024`, the newest version of the dataset at the time of wrting (10/2024). This will lead you a Google Cloud page. From here, you should see a folder structure like this: diff --git a/data_utils/extract_groups.py b/data_utils/extract_groups.py index a43e950b..1d28a435 100644 --- a/data_utils/extract_groups.py +++ b/data_utils/extract_groups.py @@ -18,7 +18,7 @@ def move_file(args): def extract_groups(dataset_dir, num_workers=None): """ - Extract all files from group directories back to the parent directory using parallel processing. + Extract all files from group directories back to the parent directory in parallel. Args: dataset_dir (str): Path to the dataset directory containing group folders From 6b45a14f127fb04e9dc12bf8aff9ce7dbdad184f Mon Sep 17 00:00:00 2001 From: kevin Date: Mon, 9 Dec 2024 12:05:17 -0500 Subject: [PATCH 11/13] added hf to env --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 41944897..52fe0007 100644 --- a/README.md +++ b/README.md @@ -213,7 +213,7 @@ We are open-sourcing a policy trained on 1,000 randomly sampled scenarios. You c - Two versions of the dataset are available, a [mini version](https://huggingface.co/datasets/EMERGE-lab/GPUDrive_mini) with a 1000 training files and 300 test/validation files, and a [large dataset](https://huggingface.co/datasets/EMERGE-lab/GPUDrive) with 100k unique scenes. - Replace 'GPUDrive_mini' with 'GPUDrive' below if you wish to download the full dataset. -- To download the dataset you need the huggingface_hub library: +- To download the dataset you need the huggingface_hub library (if you initialized from `environment.yml` then you can skip this step): ```bash pip install huggingface_hub ``` From c7905960759788270351848d002fa860add72684 Mon Sep 17 00:00:00 2001 From: Daphne Cornelisse Date: Mon, 9 Dec 2024 12:54:16 -0500 Subject: [PATCH 12/13] Fix typo --- pygpudrive/datatypes/observation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygpudrive/datatypes/observation.py b/pygpudrive/datatypes/observation.py index 2949c906..e2568d2b 100644 --- a/pygpudrive/datatypes/observation.py +++ b/pygpudrive/datatypes/observation.py @@ -187,7 +187,7 @@ def normalize(self): self.orientation = self.orientation / constants.MAX_ORIENTATION_RAD self.vehicle_length = self.vehicle_length / constants.MAX_VEH_LEN self.vehicle_width = self.vehicle_width / constants.MAX_VEH_WIDTH - self.vehicle_heights = self.vehicle_heights / constants.MAX_VEH_HEIGHT + self.vehicle_height = self.vehicle_height / constants.MAX_VEH_HEIGHT self.agent_type = self.agent_type.long() self.ids = self.ids From f829adc7b3a42c57215cad11c935f4b1f846390b Mon Sep 17 00:00:00 2001 From: kevin Date: Mon, 9 Dec 2024 16:24:15 -0500 Subject: [PATCH 13/13] minor docstring update --- pygpudrive/datatypes/observation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pygpudrive/datatypes/observation.py b/pygpudrive/datatypes/observation.py index e2568d2b..6b1855c6 100644 --- a/pygpudrive/datatypes/observation.py +++ b/pygpudrive/datatypes/observation.py @@ -7,8 +7,8 @@ class LocalEgoState: """A class to represent the ego state of the agent in relative coordinates. - Initialized from agent_roadmap_tensor (src/bindings). For details, see - `agentMapObservations` in src/types.hpp. + Initialized from self_observation_tensor (src/bindings). For details, see + `SelfObservation` in src/types.hpp. Attributes: speed: Speed of the agent in relative coordinates. @@ -225,7 +225,7 @@ class LidarObs: - Axis 3 represents the lidar points per type, which can be configured in src/consts.hpp as `numLidarSamples`. - Axis 4 represents the depth, type and x, y, values of the lidar points. Initialized from lidar_tensor (src/bindings). - For details, see `LidarObservations` in src/types.hpp. + For details, see `Lidar` and `LidarSample` in src/types.hpp. """ def __init__(self, lidar_tensor: torch.Tensor):