Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
zhengzl18 committed Dec 14, 2024
2 parents 887a004 + c2ebb83 commit 7ada291
Show file tree
Hide file tree
Showing 28 changed files with 186,408 additions and 85,443 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ data/raw/*
data/processed/validation/*
data/processed/testing/*
data/processed/training/*
data/formatted_json_v2_no_tl_valid/*

# Logging
/wandb
Expand Down
68 changes: 48 additions & 20 deletions baselines/ippo/config/ippo_ff_puffer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,60 @@ mode: "train"
use_rnn: false
eval_model_path: null
baseline: false
data_dir: "data/processed/examples" # Dataset
data_dir: "data/processed/validation"

environment: # Overrides default environment configs (see pygpudrive/env/config.py)
name: "gpudrive"
num_worlds: 50 # Number of parallel environments
k_unique_scenes: 1 # Number of unique scenes to sample from
num_worlds: 100 # Number of parallel environments
k_unique_scenes: 100 # Number of unique scenes to sample from
max_controlled_agents: 32 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
ego_state: true
road_map_obs: true
partner_obs: true
normalize_obs: true
reward_type: "sparse_on_goal_achieved"
remove_non_vehicles: true # If false, all agents are included (vehicles, pedestrians, cyclists)
use_lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types
reward_type: "weighted_combination"
collision_weight: -0.025
off_road_weight: -0.025
goal_achieved_weight: 1.0
dynamics_model: "classic"

collision_behavior: "ignore" # Options: "remove", "stop"
dist_to_goal_threshold: 3.0
polyline_reduction_threshold: 0.2 # Rate at which to sample points from the polyline (0 is use all closest points, 1 maximum sparsity), needs to be balanced with kMaxAgentMapObservationsCount
sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random
obs_radius: 60.0 # Visibility radius of the agents
wandb:
entity: ""
project: "gpudrive"
group: "my_group"
group: "rl_scale"
mode: "online" # Options: online, offline, disabled
tags: ["ppo", "ff", "single_scene"]
tags: ["ppo", "ff"]

## NOTES
## Good batch size: 128 * number of controlled agents (e.g. 2**18)
## Minibatch size 1/16 of batch size, eg. 16_000

train:
exp_id: null # Set dynamically in the script if needed
exp_id: PPO # Set dynamically in the script if needed
seed: 42
cpu_offload: false
device: "cuda" # Dynamically set to cuda if available, else cpu
bptt_horizon: 2
compile: false
compile_mode: "reduce-overhead"

# # # Data sampling # # #
resample_scenes: false
resample_criterion: "global_step"
resample_interval: 3_000_000
resample_mode: "random" # Options: random

# # # PPO # # #
torch_deterministic: false
total_timesteps: 2_000_0000
batch_size: 25_000
minibatch_size: 5_000
total_timesteps: 500_000_000
batch_size: 131_072
minibatch_size: 16_384
learning_rate: 3e-4
anneal_lr: false
gamma: 0.99
Expand All @@ -41,19 +65,23 @@ train:
clip_coef: 0.2
clip_vloss: false
vf_clip_coef: 0.2
ent_coef: 0.0001
ent_coef: 0.0003
vf_coef: 0.5
max_grad_norm: 0.5
target_kl: null
checkpoint_interval: 500 # Save policy every k iterations

# # # Checkpointing # # #
checkpoint_interval: 1000 # Save policy every k iterations
checkpoint_path: "./runs"
render: true
render_interval: 500
render_k_scenarios: 1 # Number of scenarios to render
render_fps: 20
bptt_horizon: 50
compile: false
compile_mode: "reduce-overhead"

# # # Rendering # # #
render: false # Determines whether to render the environment (note: will slow down training)
render_interval: 500 # Render every k iterations
render_k_scenarios: 5 # Number of scenarios to render
render_simulator_state: true # Plot the simulator state from bird's eye view
render_agent_obs: false # Debugging tool, plot what an agent is seing
render_fps: 15 # Frames per second
render_format: "mp4" # Options: gif, mp4

vec:
backend: "native" # Only native is currently supported
Expand Down
27 changes: 14 additions & 13 deletions baselines/ippo/config/ippo_ff_sb3.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
data_dir: "data/processed/examples"
data_dir: "data/processed/examples" #"data/data_old_debug" #"data/processed/training"

num_worlds: 50
num_worlds: 100

selection_discipline: "K_UNIQUE_N" # K_UNIQUE_N / PAD_N
k_unique_scenes: 3
k_unique_scenes: 1
device: "cuda" # or "cpu"

reward_type: "weighted_combination"
reward_type: "sparse_on_goal_achieved"
collision_weight: 0.0
goal_achieved_weight: 1.0
off_road_weight: 0.0
remove_non_vehicles: false

resample_scenarios: false
resample_criterion: "global_step" # Options: "global_step"
Expand All @@ -18,18 +19,18 @@ resample_mode: "random" # Options: "random"

render: true
render_mode: "rgb_array"
render_freq: 500 # Render every k rollouts
render_n_worlds: 3 # Number of worlds to render
render_freq: 50 # Render every k rollouts
render_n_worlds: 10 # Number of scenarios to render

track_time_to_solve: false

sync_tensorboard: true
logging_collection_window: 100 # How many trajectories we average logs over
log_freq: 100
project_name: "gpudrive"
group_name: " "
group_name: "algorithm_logic"
entity: " "
tags:
tags:
- "IPPO"
- "LATE_FUSION"
- "PERM_EQ"
Expand All @@ -49,23 +50,23 @@ vf_coef: 0.5
n_steps: 91 # Number of steps per rollout
num_minibatches: 5 # Used to determine the minibatch size
verbose: 0
total_timesteps: 30_000_00000
total_timesteps: 100_000_000
ent_coef: 0.0001
lr: 0.0003
n_epochs: 5

mlp_class: "late_fusion"
policy: "late_fusion_policy"
ego_state_layers:
ego_state_layers:
- 64
- 32
road_object_layers:
road_object_layers:
- 64
- 64
road_graph_layers:
road_graph_layers:
- 64
- 64
shared_layers:
shared_layers:
- 64
- 64
act_func: "tanh"
Expand Down
13 changes: 9 additions & 4 deletions baselines/ippo/ippo_pufferlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@
import torch
import wandb
from box import Box

from integrations.rl.puffer import ppo
from integrations.rl.puffer.puffer_env import env_creator
from integrations.rl.puffer.utils import Policy
from integrations.rl.puffer.utils import Policy, LiDARPolicy

import pufferlib
import pufferlib.vector
Expand All @@ -30,7 +29,7 @@ def load_config(config_path):
config = Box(yaml.safe_load(f))

datetime_ = datetime.now().strftime("%m_%d_%H_%M_%S")
config["train"]["exp_id"] = config["train"]["exp_id"] or datetime_
config["train"]["exp_id"] = f'{config["train"]["exp_id"]}__S_{str(config["environment"]["k_unique_scenes"])}__{datetime_}'
config["train"]["device"] = config["train"].get("device", "cpu") # Default to 'cpu' if not set
if torch.cuda.is_available():
config["train"]["device"] = "cuda" # Set to 'cuda' if available
Expand Down Expand Up @@ -70,6 +69,7 @@ def train(args):
)

policy = make_policy(vecenv.driver_env).to(args.train.device)

args.train.env = args.environment.name

data = ppo.create(args.train, vecenv, policy, wandb=args.wandb)
Expand Down Expand Up @@ -97,7 +97,11 @@ def init_wandb(args, name, id=None, resume=True):
group=args.wandb.group,
mode=args.wandb.mode,
tags=args.wandb.tags,
config={"train": dict(args.train), "vec": dict(args.vec)},
config={
"environment": dict(args.environment),
"train": dict(args.train),
"vec": dict(args.vec),
},
name=name,
save_code=True,
resume=resume,
Expand Down Expand Up @@ -134,6 +138,7 @@ def sweep(args, project="PPO", sweep_name="my_sweep"):
make_env = env_creator(
data_dir=config.data_dir,
environment_config=config.environment,
train_config=config.train,
device=config.train.device,
)

Expand Down
22 changes: 15 additions & 7 deletions baselines/ippo/ippo_sb3.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import wandb
import yaml
from box import Box
from box import Box
from typing import Callable
from datetime import datetime
import dataclasses
Expand All @@ -16,8 +16,10 @@

def linear_schedule(initial_value: float) -> Callable[[float], float]:
"""Linear learning rate schedule."""

def func(progress_remaining: float) -> float:
return progress_remaining * initial_value

return func


Expand All @@ -26,6 +28,7 @@ def load_config(config_path):
with open(config_path, "r") as f:
return Box(yaml.safe_load(f))


def train(exp_config: Box, scene_config: SceneConfig):
"""Run PPO training with stable-baselines3."""

Expand All @@ -36,8 +39,9 @@ def train(exp_config: Box, scene_config: SceneConfig):
goal_achieved_weight=exp_config.goal_achieved_weight,
off_road_weight=exp_config.off_road_weight,
episode_len=exp_config.episode_len,
remove_non_vehicles=exp_config.remove_non_vehicles,
)

# Select model
if exp_config.mlp_class == "late_fusion":
exp_config.mlp_class = LateFusionNet
Expand All @@ -46,8 +50,10 @@ def train(exp_config: Box, scene_config: SceneConfig):
exp_config.mlp_class = FFN
exp_config.policy = FeedForwardPolicy
else:
raise NotImplementedError(f"Unsupported MLP class: {exp_config.mlp_class}")

raise NotImplementedError(
f"Unsupported MLP class: {exp_config.mlp_class}"
)

# Make environment
env = SB3MultiAgentEnv(
config=env_config,
Expand All @@ -56,13 +62,13 @@ def train(exp_config: Box, scene_config: SceneConfig):
max_cont_agents=env_config.max_num_agents_in_scene,
device=exp_config.device,
)

exp_config.batch_size = (
exp_config.num_worlds * exp_config.n_steps
) // exp_config.num_minibatches

datetime_ = datetime.now().strftime("%m_%d_%H_%S")
run_id = f"{datetime_}_{exp_config.k_unique_scenes}scenes"
run_id = f"SB3_{datetime_}_{exp_config.k_unique_scenes}scenes"
run = wandb.init(
project=exp_config.project_name,
name=run_id,
Expand Down Expand Up @@ -116,7 +122,9 @@ def train(exp_config: Box, scene_config: SceneConfig):
scene_config = SceneConfig(
path=exp_config.data_dir,
num_scenes=exp_config.num_worlds,
discipline=SelectionDiscipline.K_UNIQUE_N if exp_config.selection_discipline == "K_UNIQUE_N" else SelectionDiscipline.PAD_N,
discipline=SelectionDiscipline.K_UNIQUE_N
if exp_config.selection_discipline == "K_UNIQUE_N"
else SelectionDiscipline.PAD_N,
k_unique_scenes=exp_config.k_unique_scenes,
)

Expand Down
Loading

0 comments on commit 7ada291

Please sign in to comment.