Skip to content

Commit

Permalink
Merge branch 'main' into FixViewer
Browse files Browse the repository at this point in the history
  • Loading branch information
aaravpandya committed May 2, 2024
2 parents 145794d + 630ffbb commit cde55c4
Show file tree
Hide file tree
Showing 35 changed files with 1,249 additions and 430 deletions.
34 changes: 34 additions & 0 deletions .github/workflows/manual.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Run C++ tests

on:
pull_request:

jobs:
build-and-test:

runs-on: ubuntu-latest
defaults:
run:
shell: bash -l {0}
steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Install system dependencies
run: |
sudo apt update
sudo apt install -y libx11-dev libxrandr-dev libxinerama-dev libxcursor-dev libxi-dev mesa-common-dev libc++1
- name: Setup Conda environment.
uses: conda-incubator/setup-miniconda@v2
with:
activate-environment: gpudrive
environment-file: environment.yml

- name: Install dependencies and build the project with Poetry
run: |
poetry install
- name: Run tests
run: |
cd build/
ctest --rerun-failed --output-on-failure
8 changes: 4 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@

# Data
/formatted_json_v2_no_tl_train
/data_10
/data_100
/data_1000
/waymo_data_new



# Logging
/wandb
Expand Down Expand Up @@ -232,4 +232,4 @@ pyrightconfig.json

*~

# End of https://www.toptal.com/developers/gitignore/api/python,c++
# End of https://www.toptal.com/developers/gitignore/api/python,c++
71 changes: 42 additions & 29 deletions algorithms/ppo/sb3/callbacks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import numpy as np
import torch
import wandb
from stable_baselines3.common.callbacks import BaseCallback


Expand All @@ -7,10 +9,12 @@ class MultiAgentCallback(BaseCallback):

def __init__(
self,
config,
wandb_run=None,
**kwargs,
) -> None:
super().__init__(**kwargs)
self.config = config
self.wandb_run = wandb_run

def _on_training_start(self) -> None:
Expand Down Expand Up @@ -44,8 +48,13 @@ def _on_rollout_end(self) -> None:
nan=0,
)

# TODO: note that this only works when we have a fixed number of agents
num_controlled_agents = rewards.shape[1]
# Get the total number of controlled agents we are controlling
# The number of controllable agents is different per scenario
num_controlled_agents = self.locals[
"env"
]._get_sum_controlled_valid_agents

print(f"num_controlled_agents: {num_controlled_agents}")

# Number of episodes in the rollout
num_episodes_in_rollout = (
Expand All @@ -61,45 +70,49 @@ def _on_rollout_end(self) -> None:
/ num_controlled_agents
)

# Rewards for each agent
for agent_idx in range(num_controlled_agents):
self.logger.record(
f"rollout/avg_agent_rew{agent_idx}",
rewards[:, agent_idx].sum() / num_episodes_in_rollout,
)
mean_reward_per_agent_per_episode = (
rewards.sum() / num_episodes_in_rollout / num_controlled_agents
)

observations = (
self.locals["rollout_buffer"].observations.cpu().detach().numpy()
)

num_episodes_in_rollout = np.nan_to_num(
(
self.locals["rollout_buffer"]
.episode_starts.cpu()
.detach()
.numpy()
),
nan=0,
).sum()

self.logger.record("rollout/global_step", self.num_timesteps)
self.logger.record(
"rollout/num_episodes_in_rollout",
num_episodes_in_rollout.item() / num_controlled_agents,
num_episodes_in_rollout.item(),
)
self.logger.record("rollout/sum_reward", rewards.sum())
self.logger.record(
"rollout/avg_reward",
(rewards.sum() / (num_episodes_in_rollout)).item(),
"rollout/avg_reward", mean_reward_per_agent_per_episode.item()
)

self.logger.record("rollout/obs_max", observations.max())
self.logger.record("rollout/obs_min", observations.min())

# Get categorical max values
self.logger.record("norm/speed_max", observations[:, :, 0].max())
self.logger.record("norm/veh_len_max", observations[:, :, 1].max())
self.logger.record("norm/veh_width_max", observations[:, :, 2].max())
self.logger.record("norm/goal_coord_x", observations[:, :, 3].max())
self.logger.record("norm/goal_coord_y", observations[:, :, 4].max())
self.logger.record("norm/L2_norm_to_goal", observations[:, :, 5].max())
# Render the environment
if self.config.render:
self._create_and_log_video()

def _create_and_log_video(self):
"""Make a video and log to wandb.
Note: Currently only works a single world."""
policy = self.model
env = self.locals["env"]

obs = env.reset()
frames = []

for _ in range(90):

action, _ = policy.predict(obs.detach().cpu().numpy())

# Step the environment
obs, _, _, _ = env.step(action)

frame = env.render()
frames.append(frame.T)

frames = np.array(frames)

wandb.log({"video": wandb.Video(frames, fps=5, format="gif")})
58 changes: 48 additions & 10 deletions algorithms/ppo/sb3/mappo.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,18 +81,56 @@ def collect_rollouts(
with torch.no_grad():
obs_tensor = self._last_obs

# # EDIT_1: Mask out invalid observations (NaN dimensions and/or dead agents)
# # Create dummy actions, values and log_probs (NaN)
# actions = torch.full(fill_value=float('nan'), size=(self.n_envs,)).to(self.device)
# log_probs = torch.full(fill_value=float('nan'), size=(self.n_envs,), dtype=torch.float32).to(self.device)
# values = (
# torch.full(fill_value=float('nan'), size=(self.n_envs,), dtype=torch.float32)
# .unsqueeze(dim=1)
# .to(self.device)
# )
# TODO: Check
# EDIT_1: Mask out invalid observations (NaN axes and/or dead agents)
# Create dummy actions, values and log_probs (NaN)
actions = torch.full(
fill_value=float("nan"), size=(self.n_envs,)
).to(self.device)
log_probs = torch.full(
fill_value=float("nan"),
size=(self.n_envs,),
dtype=torch.float32,
).to(self.device)
values = (
torch.full(
fill_value=float("nan"),
size=(self.n_envs,),
dtype=torch.float32,
)
.unsqueeze(dim=1)
.to(self.device)
)

# Get indices of alive agent ids
# Convert env_dead_agent_mask to boolean tensor with the same shape as obs_tensor
alive_agent_mask = ~(
env.dead_agent_mask.reshape(env.num_envs, 1)
) # .expand_as(obs_tensor)

# Use boolean indexing to select elements in obs_tensor
obs_tensor_alive = obs_tensor[
alive_agent_mask.expand_as(obs_tensor)
].reshape(-1, obs_tensor.shape[-1])

# Predict actions, vals and log_probs given obs
actions_tmp, values_tmp, log_prob_tmp = self.policy(
obs_tensor_alive
)

# Store
(
actions[alive_agent_mask.squeeze(dim=1)],
values[alive_agent_mask.squeeze(dim=1)],
log_probs[alive_agent_mask.squeeze(dim=1)],
) = (
actions_tmp.float(),
values_tmp.float(),
log_prob_tmp.float(),
)

# Predict actions, vals and log_probs given obs
actions, values, log_probs = self.policy(obs_tensor)
# actions, values, log_probs = self.policy(obs_tensor)

# Rescale and perform action
clipped_actions = actions
Expand Down
File renamed without changes.
23 changes: 23 additions & 0 deletions baselines/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@

from dataclasses import dataclass
import torch

@dataclass
class ExperimentConfig:
"""
Configurations for experiments.
"""
# Rendering options
render: bool = False
render_mode: str = "rgb_array"
render_freq: int = 1

# TODO: Logging
log_dir: str = "logs"

# Hyperparameters
policy: str = "MlpPolicy"
seed: int = 42
n_steps: int = 2048
batch_size: int = 256
verbose: int = 0
39 changes: 25 additions & 14 deletions experiments/run_ppo_sb3.py → baselines/run_ppo_sb3.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,49 +11,60 @@
# Import adapted PPO version
from algorithms.ppo.sb3.mappo import MAPPO

from baselines.config import ExperimentConfig

if __name__ == "__main__":

config = EnvConfig()
env_config = EnvConfig(
ego_state=True,
road_map_obs=False,
partner_obs=True,
normalize_obs=False,
)

exp_config = ExperimentConfig(
render=False,
)

# Make SB3-compatible environment
env = SB3MultiAgentEnv(
config=config,
config=env_config,
num_worlds=2,
max_cont_agents=3,
max_cont_agents=10,
data_dir="waymo_data",
device="cuda",
)

# Initialize wandb
wandb.login()
run = wandb.init(
project="please_drive",
group="single_agent_sparse",
project="rl_benchmarking",
group="different_scenes",
sync_tensorboard=True,
)
run_id = run.id

# Initialize custom callback
custom_callback = MultiAgentCallback(
config=exp_config,
wandb_run=run if run_id is not None else None,
)

model = MAPPO(
policy="MlpPolicy", # Policy type
n_steps=2048, # Number of steps per rollout
batch_size=256, # Minibatch size
env=env, # Our wrapped environment
seed=42, # Always seed for reproducibility
verbose=0,
policy=exp_config.policy,
n_steps=exp_config.n_steps,
batch_size=exp_config.batch_size,
env=env,
seed=exp_config.seed,
verbose=exp_config.verbose,
tensorboard_log=f"runs/{run_id}"
if run_id is not None
else None, # Sync with wandb
)

# Learn
model.learn(
total_timesteps=3_000_000,
total_timesteps=10_000_000,
callback=custom_callback,
)

run.finish()
env.close()
File renamed without changes.
Loading

0 comments on commit cde55c4

Please sign in to comment.