Merge remote-tracking branch 'upstream/main' into main

Emerge-Lab · Dec 14, 2024 · 7ada291 · 7ada291
2 parents 887a004 + c2ebb83
commit 7ada291
Show file tree

Hide file tree

Showing 28 changed files with 186,408 additions and 85,443 deletions.
diff --git a/.gitignore b/.gitignore
@@ -25,6 +25,7 @@ data/raw/*
 data/processed/validation/*
 data/processed/testing/*
 data/processed/training/*
+data/formatted_json_v2_no_tl_valid/*
 
 # Logging
 /wandb

diff --git a/baselines/ippo/config/ippo_ff_puffer.yaml b/baselines/ippo/config/ippo_ff_puffer.yaml
@@ -2,36 +2,60 @@ mode: "train"
 use_rnn: false
 eval_model_path: null
 baseline: false
-data_dir: "data/processed/examples" # Dataset
+data_dir: "data/processed/validation"
 
 environment: # Overrides default environment configs (see pygpudrive/env/config.py)
   name: "gpudrive"
-  num_worlds: 50 # Number of parallel environments
-  k_unique_scenes: 1 # Number of unique scenes to sample from
+  num_worlds: 100 # Number of parallel environments
+  k_unique_scenes: 100 # Number of unique scenes to sample from
   max_controlled_agents: 32 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
   ego_state: true
   road_map_obs: true
   partner_obs: true
   normalize_obs: true
-  reward_type: "sparse_on_goal_achieved"
+  remove_non_vehicles: true # If false, all agents are included (vehicles, pedestrians, cyclists)
+  use_lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types
+  reward_type: "weighted_combination"
+  collision_weight: -0.025
+  off_road_weight: -0.025
+  goal_achieved_weight: 1.0
   dynamics_model: "classic"
-
+  collision_behavior: "ignore" # Options: "remove", "stop"
+  dist_to_goal_threshold: 3.0
+  polyline_reduction_threshold: 0.2 # Rate at which to sample points from the polyline (0 is use all closest points, 1 maximum sparsity), needs to be balanced with kMaxAgentMapObservationsCount
+  sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random
+  obs_radius: 60.0 # Visibility radius of the agents
 wandb:
   entity: ""
   project: "gpudrive"
-  group: "my_group"
+  group: "rl_scale"
   mode: "online" # Options: online, offline, disabled
-  tags: ["ppo", "ff", "single_scene"]
+  tags: ["ppo", "ff"]
+
+## NOTES
+## Good batch size: 128 * number of controlled agents (e.g. 2**18)
+## Minibatch size 1/16 of batch size, eg. 16_000
 
 train:
-  exp_id: null  # Set dynamically in the script if needed
+  exp_id: PPO # Set dynamically in the script if needed
   seed: 42
   cpu_offload: false
   device: "cuda"  # Dynamically set to cuda if available, else cpu
+  bptt_horizon: 2
+  compile: false
+  compile_mode: "reduce-overhead"
+
+  # # # Data sampling # # #
+  resample_scenes: false
+  resample_criterion: "global_step"
+  resample_interval: 3_000_000
+  resample_mode: "random" # Options: random
+
+  # # # PPO # # #
   torch_deterministic: false
-  total_timesteps: 2_000_0000
-  batch_size: 25_000
-  minibatch_size: 5_000
+  total_timesteps: 500_000_000
+  batch_size: 131_072
+  minibatch_size: 16_384
   learning_rate: 3e-4
   anneal_lr: false
   gamma: 0.99
@@ -41,19 +65,23 @@ train:
   clip_coef: 0.2
   clip_vloss: false
   vf_clip_coef: 0.2
-  ent_coef: 0.0001
+  ent_coef: 0.0003
   vf_coef: 0.5
   max_grad_norm: 0.5
   target_kl: null
-  checkpoint_interval: 500 # Save policy every k iterations
+
+  # # # Checkpointing # # #
+  checkpoint_interval: 1000 # Save policy every k iterations
   checkpoint_path: "./runs"
-  render: true
-  render_interval: 500
-  render_k_scenarios: 1 # Number of scenarios to render
-  render_fps: 20
-  bptt_horizon: 50
-  compile: false
-  compile_mode: "reduce-overhead"
+
+  # # # Rendering # # #
+  render: false # Determines whether to render the environment (note: will slow down training)
+  render_interval: 500 # Render every k iterations
+  render_k_scenarios: 5 # Number of scenarios to render
+  render_simulator_state: true # Plot the simulator state from bird's eye view
+  render_agent_obs: false # Debugging tool, plot what an agent is seing
+  render_fps: 15 # Frames per second
+  render_format: "mp4" # Options: gif, mp4
 
 vec:
   backend: "native" # Only native is currently supported

diff --git a/baselines/ippo/config/ippo_ff_sb3.yaml b/baselines/ippo/config/ippo_ff_sb3.yaml
@@ -1,15 +1,16 @@
-data_dir: "data/processed/examples"
+data_dir: "data/processed/examples" #"data/data_old_debug" #"data/processed/training"
 
-num_worlds: 50
+num_worlds: 100
 
 selection_discipline: "K_UNIQUE_N"  # K_UNIQUE_N / PAD_N
-k_unique_scenes: 3
+k_unique_scenes: 1
 device: "cuda"  # or "cpu"
 
-reward_type: "weighted_combination"
+reward_type: "sparse_on_goal_achieved"
 collision_weight: 0.0
 goal_achieved_weight: 1.0
 off_road_weight: 0.0
+remove_non_vehicles: false
 
 resample_scenarios: false
 resample_criterion: "global_step"  # Options: "global_step"
@@ -18,18 +19,18 @@ resample_mode: "random"  # Options: "random"
 
 render: true
 render_mode: "rgb_array"
-render_freq: 500  # Render every k rollouts
-render_n_worlds: 3  # Number of worlds to render
+render_freq: 50  # Render every k rollouts
+render_n_worlds: 10  # Number of scenarios to render
 
 track_time_to_solve: false
 
 sync_tensorboard: true
 logging_collection_window: 100  # How many trajectories we average logs over
 log_freq: 100
 project_name: "gpudrive"
-group_name: " "
+group_name: "algorithm_logic"
 entity: " "
-tags: 
+tags:
   - "IPPO"
   - "LATE_FUSION"
   - "PERM_EQ"
@@ -49,23 +50,23 @@ vf_coef: 0.5
 n_steps: 91  # Number of steps per rollout
 num_minibatches: 5  # Used to determine the minibatch size
 verbose: 0
-total_timesteps: 30_000_00000
+total_timesteps: 100_000_000
 ent_coef: 0.0001
 lr: 0.0003
 n_epochs: 5
 
 mlp_class: "late_fusion"
 policy: "late_fusion_policy"
-ego_state_layers: 
+ego_state_layers:
   - 64
   - 32
-road_object_layers: 
+road_object_layers:
   - 64
   - 64
-road_graph_layers: 
+road_graph_layers:
   - 64
   - 64
-shared_layers: 
+shared_layers:
   - 64
   - 64
 act_func: "tanh"

diff --git a/baselines/ippo/ippo_pufferlib.py b/baselines/ippo/ippo_pufferlib.py
@@ -12,10 +12,9 @@
 import torch
 import wandb
 from box import Box
-
 from integrations.rl.puffer import ppo
 from integrations.rl.puffer.puffer_env import env_creator
-from integrations.rl.puffer.utils import Policy
+from integrations.rl.puffer.utils import Policy, LiDARPolicy
 
 import pufferlib
 import pufferlib.vector
@@ -30,7 +29,7 @@ def load_config(config_path):
         config = Box(yaml.safe_load(f))
 
     datetime_ = datetime.now().strftime("%m_%d_%H_%M_%S")
-    config["train"]["exp_id"] = config["train"]["exp_id"] or datetime_
+    config["train"]["exp_id"] = f'{config["train"]["exp_id"]}__S_{str(config["environment"]["k_unique_scenes"])}__{datetime_}'
     config["train"]["device"] = config["train"].get("device", "cpu")  # Default to 'cpu' if not set
     if torch.cuda.is_available():
         config["train"]["device"] = "cuda"  # Set to 'cuda' if available
@@ -70,6 +69,7 @@ def train(args):
     )
 
     policy = make_policy(vecenv.driver_env).to(args.train.device)
+
     args.train.env = args.environment.name
 
     data = ppo.create(args.train, vecenv, policy, wandb=args.wandb)
@@ -97,7 +97,11 @@ def init_wandb(args, name, id=None, resume=True):
         group=args.wandb.group,
         mode=args.wandb.mode,
         tags=args.wandb.tags,
-        config={"train": dict(args.train), "vec": dict(args.vec)},
+        config={
+            "environment": dict(args.environment),
+            "train": dict(args.train),
+            "vec": dict(args.vec),
+        },
         name=name,
         save_code=True,
         resume=resume,
@@ -134,6 +138,7 @@ def sweep(args, project="PPO", sweep_name="my_sweep"):
     make_env = env_creator(
         data_dir=config.data_dir,
         environment_config=config.environment,
+        train_config=config.train,
         device=config.train.device,
     )
 

diff --git a/baselines/ippo/ippo_sb3.py b/baselines/ippo/ippo_sb3.py
@@ -1,6 +1,6 @@
 import wandb
 import yaml
-from box import Box 
+from box import Box
 from typing import Callable
 from datetime import datetime
 import dataclasses
@@ -16,8 +16,10 @@
 
 def linear_schedule(initial_value: float) -> Callable[[float], float]:
     """Linear learning rate schedule."""
+
     def func(progress_remaining: float) -> float:
         return progress_remaining * initial_value
+
     return func
 
 
@@ -26,6 +28,7 @@ def load_config(config_path):
     with open(config_path, "r") as f:
         return Box(yaml.safe_load(f))
 
+
 def train(exp_config: Box, scene_config: SceneConfig):
     """Run PPO training with stable-baselines3."""
 
@@ -36,8 +39,9 @@ def train(exp_config: Box, scene_config: SceneConfig):
         goal_achieved_weight=exp_config.goal_achieved_weight,
         off_road_weight=exp_config.off_road_weight,
         episode_len=exp_config.episode_len,
+        remove_non_vehicles=exp_config.remove_non_vehicles,
     )
-    
+
     # Select model
     if exp_config.mlp_class == "late_fusion":
         exp_config.mlp_class = LateFusionNet
@@ -46,8 +50,10 @@ def train(exp_config: Box, scene_config: SceneConfig):
         exp_config.mlp_class = FFN
         exp_config.policy = FeedForwardPolicy
     else:
-        raise NotImplementedError(f"Unsupported MLP class: {exp_config.mlp_class}")
-
+        raise NotImplementedError(
+            f"Unsupported MLP class: {exp_config.mlp_class}"
+        )
+
     # Make environment
     env = SB3MultiAgentEnv(
         config=env_config,
@@ -56,13 +62,13 @@ def train(exp_config: Box, scene_config: SceneConfig):
         max_cont_agents=env_config.max_num_agents_in_scene,
         device=exp_config.device,
     )
-    
+
     exp_config.batch_size = (
         exp_config.num_worlds * exp_config.n_steps
     ) // exp_config.num_minibatches
 
     datetime_ = datetime.now().strftime("%m_%d_%H_%S")
-    run_id = f"{datetime_}_{exp_config.k_unique_scenes}scenes"
+    run_id = f"SB3_{datetime_}_{exp_config.k_unique_scenes}scenes"
     run = wandb.init(
         project=exp_config.project_name,
         name=run_id,
@@ -116,7 +122,9 @@ def train(exp_config: Box, scene_config: SceneConfig):
     scene_config = SceneConfig(
         path=exp_config.data_dir,
         num_scenes=exp_config.num_worlds,
-        discipline=SelectionDiscipline.K_UNIQUE_N if exp_config.selection_discipline == "K_UNIQUE_N" else SelectionDiscipline.PAD_N,
+        discipline=SelectionDiscipline.K_UNIQUE_N
+        if exp_config.selection_discipline == "K_UNIQUE_N"
+        else SelectionDiscipline.PAD_N,
         k_unique_scenes=exp_config.k_unique_scenes,
     )