Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
daphne-cornelisse committed Jan 27, 2025
1 parent 98166f7 commit 29a01da
Showing 11 changed files with 193 additions and 73 deletions.
2 changes: 1 addition & 1 deletion baselines/ippo/config/ippo_ff_puffer.yaml
Original file line number Diff line number Diff line change
@@ -41,7 +41,7 @@ network:
hidden_dim: 128 # Latent dimension
num_transformer_layers: 0 # Number of transformer layers
pred_heads_arch: [64] # Arch of the prediction heads (actor and critic)
class_name: "LateFusionTransformer"
class_name: "NeuralNet"
num_parameters: 0 # Total trainable parameters, to be filled at runtime

train:
4 changes: 2 additions & 2 deletions baselines/ippo/ippo_pufferlib.py
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@
from integrations.rl.puffer import ppo
from integrations.rl.puffer.puffer_env import env_creator

from networks.late_fusion import LateFusionTransformer
from networks.late_fusion import NeuralNet
from pygpudrive.env.dataset import SceneDataLoader

import pufferlib
@@ -48,7 +48,7 @@ def load_config(config_path):

def make_policy(env, config):
"""Create a policy based on the environment."""
return LateFusionTransformer(
return NeuralNet(
input_dim=config.train.network.input_dim,
action_dim=env.single_action_space.n,
hidden_dim=config.train.network.hidden_dim,
2 changes: 1 addition & 1 deletion examples/experiments/eval/config/eval_config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
res_path: examples/experiments/eval/dataframes/sota_paper # Store dataframes here
test_dataset_size: 10_000 # Number of test scenarios to evaluate on
test_dataset_size: 1000 # Number of test scenarios to evaluate on

# Environment settings
train_dir: "/scratch/kj2676/gpudrive/data/processed/training"
64 changes: 35 additions & 29 deletions examples/experiments/eval/config/model_config.yaml
Original file line number Diff line number Diff line change
@@ -1,42 +1,48 @@
models_path: examples/experiments/eval/models/0120
models_path: examples/experiments/eval/models/sota_paper

models:
- name: random_baseline
train_dataset_size: null
wandb: null
trained_on: null

- name: model_PPO__R_1000__01_19_11_15_25_854_002500
train_dataset_size: 1000
wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_1000__01_19_11_15_25_854?nw=nwuserdaphnecor
trained_on: cluster

# - name: model_PPO__R_1000__01_10_17_06_33_697_003500
# train_dataset_size: 1000
# wandb: https://wandb.ai/emerge_/paper_1_self_play/runs/PPO__R_1000__01_10_17_06_33_697?nw=nwuserdaphnecor
# - name: random_baseline
# train_dataset_size: null
# wandb: null
# trained_on: null

# - name: model_PPO__R_100__01_24_00_46_48_778_006500
# train_dataset_size: 100
# wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_100__01_24_00_46_48_778?nw=nwuserdaphnecor
# trained_on: cluster

# - name: model_PPO__R_10000__01_10_17_13_39_676
# train_dataset_size: 10_000
# wandb: https://wandb.ai/emerge_/paper_1_self_play/runs/PPO__R_10000__01_10_17_13_39_676?nw=nwuserdaphnecor
# - name: model_PPO__R_1000__01_23_21_02_58_768_007500
# train_dataset_size: 1000
# wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_1000__01_23_21_02_58_768?nw=nwuserdaphnecor
# trained_on: cluster

- name: model_PPO__R_10000__01_23_21_02_58_770_007000
train_dataset_size: 10_000
wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_10000__01_23_21_02_58_770?nw=nwuserdaphnecor
trained_on: cluster

- name: model_PPO__R_100000__01_23_21_08_41_367_007500
train_dataset_size: 100_000
wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_100000__01_23_21_08_41_367?nw=nwuserdaphnecor
trained_on: cluster

# - name: model_PPO__R_100__01_06_11_16_08_389_003500
# - name: model_PPO__R_100__01_23_21_02_58_768_005000
# train_dataset_size: 100
# wandb: https://wandb.ai/emerge_/paper_1_self_play/runs/PPO__R_100__01_06_11_16_09_712?nw=nwuserdaphnecor
# wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_100__01_23_21_02_58_768?nw=nwuserdaphnecor
# trained_on: cluster

# # - name: model_PPO__R_1000__01_06_11_16_08_389_005500
# # train_dataset_size: 1000
# # wandb: https://wandb.ai/emerge_/paper_1_self_play/runs/PPO__R_1000__01_06_11_16_08_389?nw=nwuserdaphnecor
# # trained_on: cluster
# - name: model_PPO__R_1000__01_23_21_02_58_768_006000
# train_dataset_size: 1000
# wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_1000__01_23_21_02_58_768?nw=nwuserdaphnecor
# trained_on: cluster

# - name: model_PPO__R_10000__01_06_11_16_21_945_012500
# - name: model_PPO__R_10000__01_23_21_02_58_770_005500
# train_dataset_size: 10_000
# wandb: https://wandb.ai/emerge_/paper_1_self_play/runs/PPO__R_10000__01_06_11_16_21_945?nw=nwuserdaphnecor
# trained_on: cluster
# wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_10000__01_23_21_02_58_770?nw=nwuserdaphnecor
# trained_on: cluster

# # - name: model_PPO__R_100000__01_06_11_29_36_390_012000
# # train_dataset_size: 100_000
# # wandb: https://wandb.ai/emerge_/paper_1_self_play/runs/PPO__R_100000__01_06_11_29_36_390?nw=nwuserdaphnecor
# # #Best (old) model
# # - name: model_PPO__R_10000__01_19_17_27_13_052_007500
# # train_dataset_size: 10_000
# # wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_10000__01_19_17_27_13_052?nw=nwuserdaphnecor
# # trained_on: cluster
6 changes: 2 additions & 4 deletions examples/experiments/eval/eval_utils.py
Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@
from pygpudrive.visualize.utils import img_from_fig
from pygpudrive.datatypes.observation import GlobalEgoState

from networks.late_fusion import LateFusionTransformer
from networks.late_fusion import NeuralNet

import logging
import torch
@@ -57,14 +57,12 @@ def load_policy(path_to_cpt, model_name, device, env=None):
logging.info(f"Load model from {path_to_cpt}/{model_name}.pt")

# Create policy architecture from saved checkpoint
#TODO: Change depending on the network
policy = LateFusionTransformer(
input_dim=saved_cpt["model_arch"]["input_dim"],
action_dim=saved_cpt["action_dim"],
hidden_dim=saved_cpt["model_arch"]["hidden_dim"],
pred_heads_arch=saved_cpt["model_arch"]["pred_heads_arch"],
num_transformer_layers=saved_cpt["model_arch"][
"num_transformer_layers"
],
).to(device)

# Load the model parameters
8 changes: 7 additions & 1 deletion examples/experiments/eval/get_model_performance.py
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@
from pygpudrive.env.dataset import SceneDataLoader
from eval_utils import load_config, make_env, load_policy, rollout

import pdb

logging.basicConfig(level=logging.INFO)

@@ -64,7 +65,12 @@ def evaluate_policy(

# Store results for the current batch
scenario_names = [Path(path).stem for path in batch]
res_dict["scene"].extend(scenario_names)

# Get names from env
pdb.set_trace()
scenario_to_worlds_dict = env.get_env_filenames()

res_dict["scene"].extend(scenario_to_worlds_dict.values())
res_dict["goal_achieved"].extend(goal_achieved.cpu().numpy())
res_dict["collided"].extend(collided.cpu().numpy())
res_dict["off_road"].extend(off_road.cpu().numpy())
6 changes: 3 additions & 3 deletions examples/experiments/eval/show_agent_behaviors.py
Original file line number Diff line number Diff line change
@@ -88,9 +88,9 @@ def make_videos(
if __name__ == "__main__":

# Specify which model to load and the dataset to evaluate
MODEL_TO_LOAD = "model_PPO__R_10000__01_23_21_02_58_770_005500" #"model_PPO__R_10000__01_23_21_02_58_770_005500"
DATASET = "train"
SORT_BY = "off_road" #"goal_achieved"
MODEL_TO_LOAD = "model_PPO__R_10000__01_23_21_02_58_770_007000" #"model_PPO__R_10000__01_23_21_02_58_770_005500"
DATASET = "test"
SORT_BY = "collided" #"goal_achieved"
SHOW_TOP_K = 25 # Render this many scenes

# Configurations
15 changes: 8 additions & 7 deletions examples/experiments/generate_sbatch.py
Original file line number Diff line number Diff line change
@@ -246,7 +246,7 @@ def save_script(filename, file_path, fields, params, param_order=None):
group = "clean_data_0123"

fields = {
"time_h": 72, # Max time per job (job will finish if run is done before)
"time_h": 24, # Max time per job (job will finish if run is done before)
"num_gpus": 1, # GPUs per job
"max_sim_jobs": 30, # Max jobs at the same time
"memory": 70,
@@ -259,14 +259,15 @@ def save_script(filename, file_path, fields, params, param_order=None):
"resample_scenes": [1], # Yes
"k_unique_scenes": [500], # Sample in batches of 500
"resample_interval": [2_000_000],
"total_timesteps": [2_000_000_000],
"resample_dataset_size": [10_000, 100_000],
"batch_size": [131_072],
"minibatch_size": [8_192],
"total_timesteps": [500_000_000],
"resample_dataset_size": [100],
"batch_size": [262_144, 524_288],
"minibatch_size": [65_536, 16_384],
"update_epochs": [2, 5],
"ent_coef": [0.0001],
"ent_coef": [0.0001, 0.001],
"learning_rate": [3e-4],
"vf_coef": [0.5],
"gamma": [0.99],
#"anneal_lr": [0, 1],
"render": [0],
}

21 changes: 10 additions & 11 deletions examples/experiments/ippo_ff_p1_self_play.yaml
Original file line number Diff line number Diff line change
@@ -2,12 +2,12 @@ mode: "train"
use_rnn: false
eval_model_path: null
baseline: false
data_dir: "data/processed/training"
data_dir: "/scratch/kj2676/gpudrive/data/processed/training"

environment: # Overrides default environment configs (see pygpudrive/env/config.py)
name: "gpudrive"
num_worlds: 100 # Number of parallel environments
k_unique_scenes: 2 # Number of unique scenes to sample from
num_worlds: 2 # Number of parallel environments
k_unique_scenes: 1 # Number of unique scenes to sample from
max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
ego_state: true
road_map_obs: true
@@ -42,15 +42,15 @@ train:
compile_mode: "reduce-overhead"

# # # Data sampling # # #
resample_scenes: false
resample_dataset_size: 500 # Number of unique scenes to sample from
resample_interval: 1_000_000
resample_scenes: true
resample_dataset_size: 100 # Number of unique scenes to sample from
resample_interval: 2_000_000
sample_with_replacement: true
shuffle_dataset: false

# # # PPO # # #
torch_deterministic: false
total_timesteps: 1_000_000_000
total_timesteps: 2_000_000_000
batch_size: 131_072
minibatch_size: 8192
learning_rate: 3e-4
@@ -66,20 +66,19 @@ train:
vf_coef: 0.5
max_grad_norm: 0.5
target_kl: null
log_window: 500
log_window: 1000

# # # Network # # #
network:
input_dim: 64 # Embedding of the input features
hidden_dim: 128 # Latent dimension
pred_heads_arch: [64] # Arch of the prediction heads (actor and critic)
num_transformer_layers: 0 # Number of transformer layers
dropout: 0.01
class_name: "LateFusionTransformer"
class_name: "NeuralNet"
num_parameters: 0 # Total trainable parameters, to be filled at runtime

# # # Checkpointing # # #
checkpoint_interval: 100 # Save policy every k iterations
checkpoint_interval: 500 # Save policy every k iterations
checkpoint_path: "./runs"

# # # Rendering # # #
134 changes: 122 additions & 12 deletions networks/late_fusion.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
from typing import List, Union
import torch
from torch import nn
@@ -92,26 +93,136 @@ def sample_logits(
return action.squeeze(0), logprob.squeeze(0), logits_entropy.squeeze(0)


class EncoderBlock(nn.Module):
def __init__(self, input_dim, hidden_dim, nhead=1, dropout=0.05):
super().__init__()
self.transformer_encoder = nn.TransformerEncoderLayer(
d_model=input_dim, nhead=nhead, dim_feedforward=hidden_dim
)
self.dropout = nn.Dropout(dropout)
# class NeuralNet(nn.Module):
# def __init__(
# self,
# action_dim,
# input_dim=64,
# hidden_dim=128,
# pred_heads_arch=[64],
# dropout=0.00,
# act_func="tanh",
# ):
# super().__init__()
# self.input_dim = input_dim
# self.hidden_dim = hidden_dim
# self.action_dim = action_dim
# self.pred_heads_arch = pred_heads_arch
# self.num_modes = 3 # Ego, partner, road graph
# self.dropout = dropout
# self.act_func = nn.Tanh() if act_func == "tanh" else nn.ReLU()

# self.ego_embed_actor = nn.Sequential(
# pufferlib.pytorch.layer_init(
# nn.Linear(constants.EGO_FEAT_DIM, input_dim)
# ),
# nn.LayerNorm(input_dim),
# self.act_func,
# nn.Dropout(self.dropout),
# pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)),
# )

# self.partner_embed_actor = nn.Sequential(
# pufferlib.pytorch.layer_init(
# nn.Linear(constants.PARTNER_FEAT_DIM, input_dim)
# ),
# nn.LayerNorm(input_dim),
# self.act_func,
# nn.Dropout(self.dropout),
# pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)),
# )

# self.road_map_embed_actor = nn.Sequential(
# pufferlib.pytorch.layer_init(
# nn.Linear(constants.ROAD_GRAPH_FEAT_DIM, input_dim)
# ),
# nn.LayerNorm(input_dim),
# self.act_func,
# nn.Dropout(self.dropout),
# pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)),
# )

# self.shared_embed_actor = nn.Sequential(
# nn.Linear(self.input_dim * self.num_modes, self.hidden_dim),
# nn.Dropout(self.dropout)
# )

# # Value function
# self.ego_embed_critic = copy.deepcopy(self.ego_embed_actor)
# self.partner_embed_critic = copy.deepcopy(self.partner_embed_actor)
# self.road_map_embed_critic = copy.deepcopy(self.road_map_embed_actor)
# self.shared_embed_critic = copy.deepcopy(self.shared_embed_actor)

# self.actor = pufferlib.pytorch.layer_init(
# nn.Linear(hidden_dim, action_dim), std=0.01
# )
# self.critic = pufferlib.pytorch.layer_init(
# nn.Linear(hidden_dim, 1), std=1
# )

# def encode_observations(self, observation):

# ego_state, road_objects, road_graph = unpack_obs(observation)

# # Actor
# ego_embed_actor = self.ego_embed_actor(ego_state)
# partner_embed_actor, _ = self.partner_embed_actor(road_objects).max(dim=1)
# road_map_embed_actor, _ = self.road_map_embed_actor(road_graph).max(dim=1)
# embed_actor = torch.cat([ego_embed_actor, partner_embed_actor, road_map_embed_actor], dim=1)

# # Critic
# ego_embed_critic = self.ego_embed_critic(ego_state)
# partner_embed_critic, _ = self.partner_embed_critic(road_objects).max(dim=1)
# road_map_embed_critic, _ = self.road_map_embed_critic(road_graph).max(dim=1)
# embed_critic = torch.cat([ego_embed_critic, partner_embed_critic, road_map_embed_critic], dim=1)

# return self.shared_embed_actor(embed_actor), self.shared_embed_critic(embed_critic)

def forward(self, x):
return self.dropout(self.transformer_encoder(x))
# def forward(self, obs, action=None, deterministic=False):

# # Encode the observations
# hidden_actor, hidden_critic = self.encode_observations(obs)

# # Decode the actions
# value = self.critic(hidden_critic)
# logits = self.actor(hidden_actor)

class LateFusionTransformer(nn.Module):
# action, logprob, entropy = sample_logits(logits, action, deterministic)

# return action, logprob, entropy, value

# def _build_network(self, input_dim, net_arch, network_type):
# layers = []
# last_dim = input_dim
# for layer_dim in net_arch:
# layers.extend(
# [
# nn.Linear(last_dim, layer_dim),
# nn.Dropout(self.dropout),
# nn.LayerNorm(layer_dim),
# self.act_func,
# ]
# )
# last_dim = layer_dim

# output_dim = self.action_dim if network_type == "actor" else 1
# std = 0.01 if network_type == "actor" else 1.0
# layers.append(
# pufferlib.pytorch.layer_init(
# nn.Linear(last_dim, output_dim), std=std
# )
# )

# return nn.Sequential(*layers)


class NeuralNet(nn.Module):
def __init__(
self,
action_dim,
input_dim=64,
hidden_dim=128,
pred_heads_arch=[128],
num_transformer_layers=0,
dropout=0.00,
act_func="tanh",
):
@@ -120,7 +231,6 @@ def __init__(
self.hidden_dim = hidden_dim
self.action_dim = action_dim
self.pred_heads_arch = pred_heads_arch
self.num_transformer_layers = num_transformer_layers
self.num_modes = 3 # Ego, partner, road graph
self.dropout = dropout
self.act_func = nn.Tanh() if act_func == "tanh" else nn.ReLU()
4 changes: 2 additions & 2 deletions pygpudrive/env/config.py
Original file line number Diff line number Diff line change
@@ -60,10 +60,10 @@ class EnvConfig:
# Action space settings (if discretized)
# Classic or Invertible Bicycle dynamics model
steer_actions: torch.Tensor = torch.round(
torch.linspace(-torch.pi, torch.pi, 15), decimals=3
torch.linspace(-torch.pi, torch.pi, 41), decimals=3 #15
)
accel_actions: torch.Tensor = torch.round(
torch.linspace(-4.0, 4.0, 9), decimals=3
torch.linspace(-4.0, 4.0, 17), decimals=3 #9
)
head_tilt_actions: torch.Tensor = torch.Tensor([0])

0 comments on commit 29a01da

Please sign in to comment.