Emerge-Lab · nadarenator · Nov 25, 2024 · Dec 3, 2024 · Dec 4, 2024 · Dec 4, 2024
diff --git a/README.md b/README.md
@@ -211,16 +211,46 @@ We are open-sourcing a policy trained on 1,000 randomly sampled scenarios. You c
 
 ### Download the dataset
 
-Two versions of the dataset are available:
+- Two versions of the dataset are available, a [mini version](https://huggingface.co/datasets/EMERGE-lab/GPUDrive_mini) with a 1000 training files and 300 test/validation files, and a [large dataset](https://huggingface.co/datasets/EMERGE-lab/GPUDrive) with 100k unique scenes. 
+- Replace 'GPUDrive_mini' with 'GPUDrive' below if you wish to download the full dataset.
+- To download the dataset you need the huggingface_hub library (if you initialized from `environment.yml` then you can skip this step):
+```bash
+pip install huggingface_hub
+```
+Then you can download the dataset using python or just `huggingface-cli`.
+
+Option 1: Using Python:
+```python
+>>> from huggingface_hub import snapshot_download
+>>> snapshot_download(repo_id="EMERGE-lab/GPUDrive_mini", repo_type="dataset", local_dir="data/processed")
+```
+Option 2: Use the huggingface-cli:
+
+1. Log in to your Hugging Face account:
+```bash
+huggingface-cli login
+```
 
-- a mini-one that is about 1 GB and consists of 1000 training files and 100 validation / test files at: [Dropbox Link](https://www.dropbox.com/sh/8mxue9rdoizen3h/AADGRrHYBb86pZvDnHplDGvXa?dl=0).
-- the full dataset (150 GB) and consists of 134453 training files and 12205 validation / test files: [Dropbox Link](https://www.dropbox.com/sh/wv75pjd8phxizj3/AABfNPWfjQdoTWvdVxsAjUL_a?dl=0)
+2. Download the dataset:
+```bash
+huggingface-cli download EMERGE-lab/GPUDrive_mini --local-dir data/processed --repo-type "dataset"
+```
+
+Option 3: Manual Download:
+
+1. Visit https://huggingface.co/datasets/EMERGE-lab/GPUDrive_mini
+2. Navigate to the Files and versions tab.
+3. Download the desired files/directories.
+
+_NOTE_: If you downloaded the full-sized dataset, it is grouped to subdirectories of 10k files each (according to hugging face constraints). In order for the path to work with GPUDrive, you need to run
+```python
+python data_utils/extract_groups.py #use --help if you've used a custom download path
+```
 
-The simulator supports initializing scenes from the `Nocturne` dataset. The input parameter for the simulator `json_path` takes in a path to a directory containing the files in the Nocturne format. The `SceneConfig` dataclass in `pygpudrive/env/config.py` dataclass is used to configure how scenes are selected from a folder with traffic scenarios.
 
 ### Re-building the dataset
 
-GPUDrive is compatible with the complete [Waymo Open Motion Dataset](https://github.com/waymo-research/waymo-open-dataset), which contains over 100,000 scenarios. To download new files and create scenarios for the simulator, follow these three steps.
+If you wish to manually generate the dataset, GPUDrive is compatible with the complete [Waymo Open Motion Dataset](https://github.com/waymo-research/waymo-open-dataset), which contains well over 100,000 scenarios. To download new files and create scenarios for the simulator, follow these three steps.
 
 1. First, head to [https://waymo.com/open/](https://waymo.com/open/) and click on the "download" button a the top. After registering, click on the files from `v1.2.1 March 2024`, the newest version of the dataset at the time of wrting (10/2024). This will lead you a Google Cloud page. From here, you should see a folder structure like this:
 

diff --git a/data_utils/extract_groups.py b/data_utils/extract_groups.py
@@ -0,0 +1,119 @@
+import argparse
+import shutil
+from pathlib import Path
+from multiprocessing import Pool, cpu_count
+import tqdm
+
+def move_file(args):
+    """
+    Move a single file to its target location.
+
+    Args:
+        args (tuple): (source_path, target_dir)
+    """
+    source_path, target_dir = args
+    target_path = Path(target_dir) / source_path.name
+    shutil.move(str(source_path), str(target_path))
+    return str(source_path)
+
+def extract_groups(dataset_dir, num_workers=None):
+    """
+    Extract all files from group directories back to the parent directory in parallel.
+
+    Args:
+        dataset_dir (str): Path to the dataset directory containing group folders
+        num_workers (int, optional): Number of processes to use. Defaults to CPU count.
+    """
+    dataset_path = Path(dataset_dir)
+
+    if not dataset_path.is_dir():
+        raise ValueError(f"Directory {dataset_dir} does not exist")
+
+    # Find all group directories
+    group_dirs = [d for d in dataset_path.iterdir() 
+                 if d.is_dir() and d.name.startswith("group_")]
+
+    if not group_dirs:
+        print(f"No group directories found in {dataset_dir}!")
+        return
+
+    print(f"\nProcessing {dataset_dir}")
+    print(f"Found {len(group_dirs)} group directories")
+
+    # Collect all files that need to be moved
+    all_files = []
+    for group_dir in sorted(group_dirs):
+        files = list(group_dir.glob("*.json"))
+        all_files.extend([(file, dataset_path) for file in files])
+
+    total_files = len(all_files)
+    print(f"Total files to process: {total_files}")
+
+    # Use all available CPUs if num_workers is not specified
+    if num_workers is None:
+        num_workers = cpu_count()
+
+    # Create a pool of workers and process files in parallel
+    with Pool(processes=num_workers) as pool:
+        # Use tqdm to show progress bar
+        list(tqdm.tqdm(
+            pool.imap_unordered(move_file, all_files),
+            total=total_files,
+            desc=f"Moving files from {dataset_dir}"
+        ))
+
+    # Remove empty group directories
+    for group_dir in group_dirs:
+        group_dir.rmdir()
+
+    print(f"Completed {dataset_dir}")
+    print(f"Total files processed: {total_files}")
+
+def process_default_directory(num_workers=None):
+    """
+    Process the default training, testing, and validation directories in parallel.
+
+    Args:
+        num_workers (int, optional): Number of processes to use per directory.
+    """
+    default_dir = "data/processed/training"
+    # Process each directory with its own pool of workers
+    try:
+        extract_groups(default_dir, num_workers)
+    except Exception as e:
+        print(f"Error processing {default_dir}: {e}")
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Extract files from group directory back to parent directory in parallel. "
+                  "If no directory is specified, processes data/processed/training by default."
+    )
+    parser.add_argument(
+        "dataset_dir",
+        nargs="?",  # Makes the argument optional
+        help="Path to the dataset directory containing group folders"
+    )
+    parser.add_argument(
+        "--num_workers",
+        type=int,
+        help="Number of processes to use (defaults to number of CPU cores)",
+        default=None
+    )
+
+    args = parser.parse_args()
+
+    try:
+        if args.dataset_dir:
+            # Process single specified directory
+            extract_groups(args.dataset_dir, args.num_workers)
+        else:
+            # Process default directories
+            process_default_directory(args.num_workers)
+    except Exception as e:
+        print(f"Error: {e}")
+        return 1
+
+    return 0
+
+if __name__ == "__main__":
+    exit(main())
diff --git a/environment.yml b/environment.yml
@@ -61,4 +61,5 @@ dependencies:
       - trove-classifiers==2024.3.25
       - urllib3==2.2.1
       - virtualenv==20.25.1
-      - zipp==3.18.1
+      - zipp==3.18.1
+      - huggingface_hub==0.26.5
diff --git a/pygpudrive/datatypes/observation.py b/pygpudrive/datatypes/observation.py
@@ -7,13 +7,14 @@
 
 class LocalEgoState:
     """A class to represent the ego state of the agent in relative coordinates.
-    Initialized from agent_roadmap_tensor (src/bindings). For details, see
-    `agentMapObservations` in src/types.hpp.
+    Initialized from self_observation_tensor (src/bindings). For details, see
+    `SelfObservation` in src/types.hpp.
 
     Attributes:
         speed: Speed of the agent in relative coordinates.
         vehicle_length: Length of the agent's bounding box.
         vehicle_width: Width of the agent's bounding box.
+        vehicle_height: Height of the agent's bounding box.
         rel_goal_x: Relative x-coordinate to the goal.
         rel_goal_y: Relative y-coordinate to the goal.
         is_collided: Whether the agent is in collision with another object.
@@ -25,10 +26,11 @@ def __init__(self, self_obs_tensor: torch.Tensor):
         self.speed = self_obs_tensor[:, :, 0]
         self.vehicle_length = self_obs_tensor[:, :, 1]
         self.vehicle_width = self_obs_tensor[:, :, 2]
-        self.rel_goal_x = self_obs_tensor[:, :, 3]
-        self.rel_goal_y = self_obs_tensor[:, :, 4]
-        self.is_collided = self_obs_tensor[:, :, 5]
-        self.id = self_obs_tensor[:, :, 6]
+        self.vehicle_height = self_obs_tensor[:, :, 3]
+        self.rel_goal_x = self_obs_tensor[:, :, 4]
+        self.rel_goal_y = self_obs_tensor[:, :, 5]
+        self.is_collided = self_obs_tensor[:, :, 6]
+        self.id = self_obs_tensor[:, :, 7]
 
     @classmethod
     def from_tensor(
@@ -48,6 +50,7 @@ def normalize(self):
         self.speed = self.speed / constants.MAX_SPEED
         self.vehicle_length = self.vehicle_length / constants.MAX_VEH_LEN
         self.vehicle_width = self.vehicle_width / constants.MAX_VEH_WIDTH
+        self.vehicle_height = self.vehicle_height / constants.MAX_VEH_HEIGHT
         self.rel_goal_x = normalize_min_max(
             tensor=self.rel_goal_x,
             min_val=constants.MIN_REL_GOAL_COORD,
@@ -70,7 +73,7 @@ def shape(self) -> tuple[int, ...]:
 class GlobalEgoState:
     """A class to represent the ego state of the agent in global coordinates.
     Initialized from abs_self_obs_tensor (src/bindings). For details, see
-    `AbsoluteSelfObservation` in src/types.hpp. Shape: (num_worlds, max_agents, 13).
+    `AbsoluteSelfObservation` in src/types.hpp. Shape: (num_worlds, max_agents, 14).
 
     Attributes:
         pos_x: Global x-coordinate of the agent.
@@ -82,6 +85,7 @@ class GlobalEgoState:
         goal_y: Global y-coordinate of the goal.
         vehicle_length: Length of the agent's bounding box.
         vehicle_width: Width of the agent's bounding box.
+        vehicle_height: Height of the agent's bounding box.
         id: Unique identifier of the agent.
     """
 
@@ -96,7 +100,8 @@ def __init__(self, abs_self_obs_tensor: torch.Tensor):
         self.goal_y = abs_self_obs_tensor[:, :, 9]
         self.vehicle_length = abs_self_obs_tensor[:, :, 10]
         self.vehicle_width = abs_self_obs_tensor[:, :, 11]
-        self.id = abs_self_obs_tensor[:, :, 12]
+        self.vehicle_height = abs_self_obs_tensor[:, :, 12]
+        self.id = abs_self_obs_tensor[:, :, 13]
 
     @classmethod
     def from_tensor(
@@ -130,6 +135,7 @@ class PartnerObs:
     orientation: torch.Tensor
     vehicle_length: torch.Tensor
     vehicle_width: torch.Tensor
+    vehicle_height: torch.Tensor
     agent_type: torch.Tensor
     ids: torch.Tensor
 
@@ -148,8 +154,9 @@ def __init__(self, partner_obs_tensor: torch.Tensor):
         self.orientation = partner_obs_tensor[:, :, :, 3].unsqueeze(-1)
         self.vehicle_length = partner_obs_tensor[:, :, :, 4].unsqueeze(-1)
         self.vehicle_width = partner_obs_tensor[:, :, :, 5].unsqueeze(-1)
-        self.agent_type = partner_obs_tensor[:, :, :, 6].unsqueeze(-1)
-        self.ids = partner_obs_tensor[:, :, :, 7].unsqueeze(-1)
+        self.vehicle_height = partner_obs_tensor[:, :, :, 6].unsqueeze(-1)        
+        self.agent_type = partner_obs_tensor[:, :, :, 7].unsqueeze(-1)
+        self.ids = partner_obs_tensor[:, :, :, 8].unsqueeze(-1)
 
     @classmethod
     def from_tensor(
@@ -180,6 +187,7 @@ def normalize(self):
         self.orientation = self.orientation / constants.MAX_ORIENTATION_RAD
         self.vehicle_length = self.vehicle_length / constants.MAX_VEH_LEN
         self.vehicle_width = self.vehicle_width / constants.MAX_VEH_WIDTH
+        self.vehicle_height = self.vehicle_height / constants.MAX_VEH_HEIGHT
         self.agent_type = self.agent_type.long()
         self.ids = self.ids
 
@@ -217,7 +225,7 @@ class LidarObs:
         - Axis 3 represents the lidar points per type, which can be configured in src/consts.hpp as `numLidarSamples`.
         - Axis 4 represents the depth, type and x, y, values of the lidar points.
     Initialized from lidar_tensor (src/bindings).
-    For details, see `LidarObservations` in src/types.hpp.
+    For details, see `Lidar` and `LidarSample` in src/types.hpp.
     """
 
     def __init__(self, lidar_tensor: torch.Tensor):

diff --git a/pygpudrive/env/constants.py b/pygpudrive/env/constants.py
@@ -6,6 +6,7 @@
 MAX_SPEED = 100
 MAX_VEH_LEN = 30
 MAX_VEH_WIDTH = 10
+MAX_VEH_HEIGHT = 3
 MIN_REL_GOAL_COORD = -1000
 MAX_REL_GOAL_COORD = 1000
 MIN_REL_AGENT_POS = -1000

diff --git a/src/init.hpp b/src/init.hpp
@@ -22,8 +22,7 @@ namespace gpudrive
     struct MapObject
     {
         MapVector2 position[MAX_POSITIONS];
-        float width;
-        float length;
+        VehicleSize vehicle_size;
         float heading[MAX_POSITIONS];
         MapVector2 velocity[MAX_POSITIONS];
         bool valid[MAX_POSITIONS];

diff --git a/src/json_serialization.hpp b/src/json_serialization.hpp
@@ -32,8 +32,9 @@ namespace gpudrive
             }
         }
         obj.numPositions = i;
-        j.at("width").get_to(obj.width);
-        j.at("length").get_to(obj.length);
+        j.at("width").get_to(obj.vehicle_size.width);
+        j.at("length").get_to(obj.vehicle_size.length);
+        j.at("height").get_to(obj.vehicle_size.height);
 
         i = 0;
         for (const auto &h : j.at("heading"))

diff --git a/src/level_gen.cpp b/src/level_gen.cpp
@@ -121,8 +121,8 @@ static inline Entity createAgent(Engine &ctx, const MapObject &agentInit) {
     auto agent = ctx.makeRenderableEntity<Agent>();
     auto agent_iface = ctx.get<AgentInterfaceEntity>(agent).e = ctx.makeEntity<AgentInterface>();
 
-    ctx.get<VehicleSize>(agent) = {.length = agentInit.length, .width = agentInit.width};
-    ctx.get<Scale>(agent) = Diag3x3{.d0 = agentInit.length/2, .d1 = agentInit.width/2, .d2 = 1};
+    ctx.get<VehicleSize>(agent) = agentInit.vehicle_size;
+    ctx.get<Scale>(agent) = Diag3x3{.d0 = agentInit.vehicle_size.length/2, .d1 = agentInit.vehicle_size.width/2, .d2 = 1};
     ctx.get<Scale>(agent) *= consts::vehicleLengthScale;
     ctx.get<ObjectID>(agent) = ObjectID{(int32_t)SimObject::Agent};
     ctx.get<EntityType>(agent) = agentInit.type;