diff --git a/baselines/ippo/config/ippo_ff_puffer.yaml b/baselines/ippo/config/ippo_ff_puffer.yaml index 95303c93..a9b9da39 100644 --- a/baselines/ippo/config/ippo_ff_puffer.yaml +++ b/baselines/ippo/config/ippo_ff_puffer.yaml @@ -41,7 +41,7 @@ network: hidden_dim: 128 # Latent dimension num_transformer_layers: 0 # Number of transformer layers pred_heads_arch: [64] # Arch of the prediction heads (actor and critic) - class_name: "LateFusionTransformer" + class_name: "NeuralNet" num_parameters: 0 # Total trainable parameters, to be filled at runtime train: diff --git a/baselines/ippo/ippo_pufferlib.py b/baselines/ippo/ippo_pufferlib.py index 9ff56e32..d0a4a42f 100644 --- a/baselines/ippo/ippo_pufferlib.py +++ b/baselines/ippo/ippo_pufferlib.py @@ -19,7 +19,7 @@ from integrations.rl.puffer import ppo from integrations.rl.puffer.puffer_env import env_creator -from networks.late_fusion import LateFusionTransformer +from networks.late_fusion import NeuralNet from pygpudrive.env.dataset import SceneDataLoader import pufferlib @@ -48,7 +48,7 @@ def load_config(config_path): def make_policy(env, config): """Create a policy based on the environment.""" - return LateFusionTransformer( + return NeuralNet( input_dim=config.train.network.input_dim, action_dim=env.single_action_space.n, hidden_dim=config.train.network.hidden_dim, diff --git a/examples/experiments/eval/config/eval_config.yaml b/examples/experiments/eval/config/eval_config.yaml index 73b2e7ca..3c599ff8 100644 --- a/examples/experiments/eval/config/eval_config.yaml +++ b/examples/experiments/eval/config/eval_config.yaml @@ -1,5 +1,5 @@ res_path: examples/experiments/eval/dataframes/sota_paper # Store dataframes here -test_dataset_size: 10_000 # Number of test scenarios to evaluate on +test_dataset_size: 1000 # Number of test scenarios to evaluate on # Environment settings train_dir: "/scratch/kj2676/gpudrive/data/processed/training" diff --git a/examples/experiments/eval/config/model_config.yaml b/examples/experiments/eval/config/model_config.yaml index 509c0507..78531c76 100644 --- a/examples/experiments/eval/config/model_config.yaml +++ b/examples/experiments/eval/config/model_config.yaml @@ -1,42 +1,48 @@ -models_path: examples/experiments/eval/models/0120 +models_path: examples/experiments/eval/models/sota_paper models: - - name: random_baseline - train_dataset_size: null - wandb: null - trained_on: null - - - name: model_PPO__R_1000__01_19_11_15_25_854_002500 - train_dataset_size: 1000 - wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_1000__01_19_11_15_25_854?nw=nwuserdaphnecor - trained_on: cluster - - # - name: model_PPO__R_1000__01_10_17_06_33_697_003500 - # train_dataset_size: 1000 - # wandb: https://wandb.ai/emerge_/paper_1_self_play/runs/PPO__R_1000__01_10_17_06_33_697?nw=nwuserdaphnecor + # - name: random_baseline + # train_dataset_size: null + # wandb: null + # trained_on: null + + # - name: model_PPO__R_100__01_24_00_46_48_778_006500 + # train_dataset_size: 100 + # wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_100__01_24_00_46_48_778?nw=nwuserdaphnecor # trained_on: cluster - # - name: model_PPO__R_10000__01_10_17_13_39_676 - # train_dataset_size: 10_000 - # wandb: https://wandb.ai/emerge_/paper_1_self_play/runs/PPO__R_10000__01_10_17_13_39_676?nw=nwuserdaphnecor + # - name: model_PPO__R_1000__01_23_21_02_58_768_007500 + # train_dataset_size: 1000 + # wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_1000__01_23_21_02_58_768?nw=nwuserdaphnecor # trained_on: cluster + + - name: model_PPO__R_10000__01_23_21_02_58_770_007000 + train_dataset_size: 10_000 + wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_10000__01_23_21_02_58_770?nw=nwuserdaphnecor + trained_on: cluster + + - name: model_PPO__R_100000__01_23_21_08_41_367_007500 + train_dataset_size: 100_000 + wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_100000__01_23_21_08_41_367?nw=nwuserdaphnecor + trained_on: cluster - # - name: model_PPO__R_100__01_06_11_16_08_389_003500 + # - name: model_PPO__R_100__01_23_21_02_58_768_005000 # train_dataset_size: 100 - # wandb: https://wandb.ai/emerge_/paper_1_self_play/runs/PPO__R_100__01_06_11_16_09_712?nw=nwuserdaphnecor + # wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_100__01_23_21_02_58_768?nw=nwuserdaphnecor # trained_on: cluster - # # - name: model_PPO__R_1000__01_06_11_16_08_389_005500 - # # train_dataset_size: 1000 - # # wandb: https://wandb.ai/emerge_/paper_1_self_play/runs/PPO__R_1000__01_06_11_16_08_389?nw=nwuserdaphnecor - # # trained_on: cluster + # - name: model_PPO__R_1000__01_23_21_02_58_768_006000 + # train_dataset_size: 1000 + # wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_1000__01_23_21_02_58_768?nw=nwuserdaphnecor + # trained_on: cluster - # - name: model_PPO__R_10000__01_06_11_16_21_945_012500 + # - name: model_PPO__R_10000__01_23_21_02_58_770_005500 # train_dataset_size: 10_000 - # wandb: https://wandb.ai/emerge_/paper_1_self_play/runs/PPO__R_10000__01_06_11_16_21_945?nw=nwuserdaphnecor - # trained_on: cluster + # wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_10000__01_23_21_02_58_770?nw=nwuserdaphnecor + # trained_on: cluster - # # - name: model_PPO__R_100000__01_06_11_29_36_390_012000 - # # train_dataset_size: 100_000 - # # wandb: https://wandb.ai/emerge_/paper_1_self_play/runs/PPO__R_100000__01_06_11_29_36_390?nw=nwuserdaphnecor + # # #Best (old) model + # # - name: model_PPO__R_10000__01_19_17_27_13_052_007500 + # # train_dataset_size: 10_000 + # # wandb: https://wandb.ai/emerge_/self_play_rl_safe/runs/PPO__R_10000__01_19_17_27_13_052?nw=nwuserdaphnecor # # trained_on: cluster \ No newline at end of file diff --git a/examples/experiments/eval/eval_utils.py b/examples/experiments/eval/eval_utils.py index b4beb87b..37d1fc63 100644 --- a/examples/experiments/eval/eval_utils.py +++ b/examples/experiments/eval/eval_utils.py @@ -14,7 +14,7 @@ from pygpudrive.visualize.utils import img_from_fig from pygpudrive.datatypes.observation import GlobalEgoState -from networks.late_fusion import LateFusionTransformer +from networks.late_fusion import NeuralNet import logging import torch @@ -57,14 +57,12 @@ def load_policy(path_to_cpt, model_name, device, env=None): logging.info(f"Load model from {path_to_cpt}/{model_name}.pt") # Create policy architecture from saved checkpoint + #TODO: Change depending on the network policy = LateFusionTransformer( input_dim=saved_cpt["model_arch"]["input_dim"], action_dim=saved_cpt["action_dim"], hidden_dim=saved_cpt["model_arch"]["hidden_dim"], pred_heads_arch=saved_cpt["model_arch"]["pred_heads_arch"], - num_transformer_layers=saved_cpt["model_arch"][ - "num_transformer_layers" - ], ).to(device) # Load the model parameters diff --git a/examples/experiments/eval/get_model_performance.py b/examples/experiments/eval/get_model_performance.py index 60b000ef..42e18bd5 100644 --- a/examples/experiments/eval/get_model_performance.py +++ b/examples/experiments/eval/get_model_performance.py @@ -12,6 +12,7 @@ from pygpudrive.env.dataset import SceneDataLoader from eval_utils import load_config, make_env, load_policy, rollout +import pdb logging.basicConfig(level=logging.INFO) @@ -64,7 +65,12 @@ def evaluate_policy( # Store results for the current batch scenario_names = [Path(path).stem for path in batch] - res_dict["scene"].extend(scenario_names) + + # Get names from env + pdb.set_trace() + scenario_to_worlds_dict = env.get_env_filenames() + + res_dict["scene"].extend(scenario_to_worlds_dict.values()) res_dict["goal_achieved"].extend(goal_achieved.cpu().numpy()) res_dict["collided"].extend(collided.cpu().numpy()) res_dict["off_road"].extend(off_road.cpu().numpy()) diff --git a/examples/experiments/eval/show_agent_behaviors.py b/examples/experiments/eval/show_agent_behaviors.py index a9122b6a..dbf71090 100644 --- a/examples/experiments/eval/show_agent_behaviors.py +++ b/examples/experiments/eval/show_agent_behaviors.py @@ -88,9 +88,9 @@ def make_videos( if __name__ == "__main__": # Specify which model to load and the dataset to evaluate - MODEL_TO_LOAD = "model_PPO__R_10000__01_23_21_02_58_770_005500" #"model_PPO__R_10000__01_23_21_02_58_770_005500" - DATASET = "train" - SORT_BY = "off_road" #"goal_achieved" + MODEL_TO_LOAD = "model_PPO__R_10000__01_23_21_02_58_770_007000" #"model_PPO__R_10000__01_23_21_02_58_770_005500" + DATASET = "test" + SORT_BY = "collided" #"goal_achieved" SHOW_TOP_K = 25 # Render this many scenes # Configurations diff --git a/examples/experiments/generate_sbatch.py b/examples/experiments/generate_sbatch.py index c709cc16..de333539 100644 --- a/examples/experiments/generate_sbatch.py +++ b/examples/experiments/generate_sbatch.py @@ -246,7 +246,7 @@ def save_script(filename, file_path, fields, params, param_order=None): group = "clean_data_0123" fields = { - "time_h": 72, # Max time per job (job will finish if run is done before) + "time_h": 24, # Max time per job (job will finish if run is done before) "num_gpus": 1, # GPUs per job "max_sim_jobs": 30, # Max jobs at the same time "memory": 70, @@ -259,14 +259,15 @@ def save_script(filename, file_path, fields, params, param_order=None): "resample_scenes": [1], # Yes "k_unique_scenes": [500], # Sample in batches of 500 "resample_interval": [2_000_000], - "total_timesteps": [2_000_000_000], - "resample_dataset_size": [10_000, 100_000], - "batch_size": [131_072], - "minibatch_size": [8_192], + "total_timesteps": [500_000_000], + "resample_dataset_size": [100], + "batch_size": [262_144, 524_288], + "minibatch_size": [65_536, 16_384], "update_epochs": [2, 5], - "ent_coef": [0.0001], + "ent_coef": [0.0001, 0.001], "learning_rate": [3e-4], - "vf_coef": [0.5], + "gamma": [0.99], + #"anneal_lr": [0, 1], "render": [0], } diff --git a/examples/experiments/ippo_ff_p1_self_play.yaml b/examples/experiments/ippo_ff_p1_self_play.yaml index af8424cb..04522941 100644 --- a/examples/experiments/ippo_ff_p1_self_play.yaml +++ b/examples/experiments/ippo_ff_p1_self_play.yaml @@ -2,12 +2,12 @@ mode: "train" use_rnn: false eval_model_path: null baseline: false -data_dir: "data/processed/training" +data_dir: "/scratch/kj2676/gpudrive/data/processed/training" environment: # Overrides default environment configs (see pygpudrive/env/config.py) name: "gpudrive" - num_worlds: 100 # Number of parallel environments - k_unique_scenes: 2 # Number of unique scenes to sample from + num_worlds: 2 # Number of parallel environments + k_unique_scenes: 1 # Number of unique scenes to sample from max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp ego_state: true road_map_obs: true @@ -42,15 +42,15 @@ train: compile_mode: "reduce-overhead" # # # Data sampling # # # - resample_scenes: false - resample_dataset_size: 500 # Number of unique scenes to sample from - resample_interval: 1_000_000 + resample_scenes: true + resample_dataset_size: 100 # Number of unique scenes to sample from + resample_interval: 2_000_000 sample_with_replacement: true shuffle_dataset: false # # # PPO # # # torch_deterministic: false - total_timesteps: 1_000_000_000 + total_timesteps: 2_000_000_000 batch_size: 131_072 minibatch_size: 8192 learning_rate: 3e-4 @@ -66,20 +66,19 @@ train: vf_coef: 0.5 max_grad_norm: 0.5 target_kl: null - log_window: 500 + log_window: 1000 # # # Network # # # network: input_dim: 64 # Embedding of the input features hidden_dim: 128 # Latent dimension pred_heads_arch: [64] # Arch of the prediction heads (actor and critic) - num_transformer_layers: 0 # Number of transformer layers dropout: 0.01 - class_name: "LateFusionTransformer" + class_name: "NeuralNet" num_parameters: 0 # Total trainable parameters, to be filled at runtime # # # Checkpointing # # # - checkpoint_interval: 100 # Save policy every k iterations + checkpoint_interval: 500 # Save policy every k iterations checkpoint_path: "./runs" # # # Rendering # # # diff --git a/networks/late_fusion.py b/networks/late_fusion.py index bbc5998b..2fa194ef 100644 --- a/networks/late_fusion.py +++ b/networks/late_fusion.py @@ -1,3 +1,4 @@ +import copy from typing import List, Union import torch from torch import nn @@ -92,26 +93,136 @@ def sample_logits( return action.squeeze(0), logprob.squeeze(0), logits_entropy.squeeze(0) -class EncoderBlock(nn.Module): - def __init__(self, input_dim, hidden_dim, nhead=1, dropout=0.05): - super().__init__() - self.transformer_encoder = nn.TransformerEncoderLayer( - d_model=input_dim, nhead=nhead, dim_feedforward=hidden_dim - ) - self.dropout = nn.Dropout(dropout) +# class NeuralNet(nn.Module): +# def __init__( +# self, +# action_dim, +# input_dim=64, +# hidden_dim=128, +# pred_heads_arch=[64], +# dropout=0.00, +# act_func="tanh", +# ): +# super().__init__() +# self.input_dim = input_dim +# self.hidden_dim = hidden_dim +# self.action_dim = action_dim +# self.pred_heads_arch = pred_heads_arch +# self.num_modes = 3 # Ego, partner, road graph +# self.dropout = dropout +# self.act_func = nn.Tanh() if act_func == "tanh" else nn.ReLU() + +# self.ego_embed_actor = nn.Sequential( +# pufferlib.pytorch.layer_init( +# nn.Linear(constants.EGO_FEAT_DIM, input_dim) +# ), +# nn.LayerNorm(input_dim), +# self.act_func, +# nn.Dropout(self.dropout), +# pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)), +# ) + +# self.partner_embed_actor = nn.Sequential( +# pufferlib.pytorch.layer_init( +# nn.Linear(constants.PARTNER_FEAT_DIM, input_dim) +# ), +# nn.LayerNorm(input_dim), +# self.act_func, +# nn.Dropout(self.dropout), +# pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)), +# ) + +# self.road_map_embed_actor = nn.Sequential( +# pufferlib.pytorch.layer_init( +# nn.Linear(constants.ROAD_GRAPH_FEAT_DIM, input_dim) +# ), +# nn.LayerNorm(input_dim), +# self.act_func, +# nn.Dropout(self.dropout), +# pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)), +# ) + +# self.shared_embed_actor = nn.Sequential( +# nn.Linear(self.input_dim * self.num_modes, self.hidden_dim), +# nn.Dropout(self.dropout) +# ) + +# # Value function +# self.ego_embed_critic = copy.deepcopy(self.ego_embed_actor) +# self.partner_embed_critic = copy.deepcopy(self.partner_embed_actor) +# self.road_map_embed_critic = copy.deepcopy(self.road_map_embed_actor) +# self.shared_embed_critic = copy.deepcopy(self.shared_embed_actor) + +# self.actor = pufferlib.pytorch.layer_init( +# nn.Linear(hidden_dim, action_dim), std=0.01 +# ) +# self.critic = pufferlib.pytorch.layer_init( +# nn.Linear(hidden_dim, 1), std=1 +# ) + +# def encode_observations(self, observation): + +# ego_state, road_objects, road_graph = unpack_obs(observation) + +# # Actor +# ego_embed_actor = self.ego_embed_actor(ego_state) +# partner_embed_actor, _ = self.partner_embed_actor(road_objects).max(dim=1) +# road_map_embed_actor, _ = self.road_map_embed_actor(road_graph).max(dim=1) +# embed_actor = torch.cat([ego_embed_actor, partner_embed_actor, road_map_embed_actor], dim=1) + +# # Critic +# ego_embed_critic = self.ego_embed_critic(ego_state) +# partner_embed_critic, _ = self.partner_embed_critic(road_objects).max(dim=1) +# road_map_embed_critic, _ = self.road_map_embed_critic(road_graph).max(dim=1) +# embed_critic = torch.cat([ego_embed_critic, partner_embed_critic, road_map_embed_critic], dim=1) + +# return self.shared_embed_actor(embed_actor), self.shared_embed_critic(embed_critic) - def forward(self, x): - return self.dropout(self.transformer_encoder(x)) +# def forward(self, obs, action=None, deterministic=False): +# # Encode the observations +# hidden_actor, hidden_critic = self.encode_observations(obs) + +# # Decode the actions +# value = self.critic(hidden_critic) +# logits = self.actor(hidden_actor) -class LateFusionTransformer(nn.Module): +# action, logprob, entropy = sample_logits(logits, action, deterministic) + +# return action, logprob, entropy, value + +# def _build_network(self, input_dim, net_arch, network_type): +# layers = [] +# last_dim = input_dim +# for layer_dim in net_arch: +# layers.extend( +# [ +# nn.Linear(last_dim, layer_dim), +# nn.Dropout(self.dropout), +# nn.LayerNorm(layer_dim), +# self.act_func, +# ] +# ) +# last_dim = layer_dim + +# output_dim = self.action_dim if network_type == "actor" else 1 +# std = 0.01 if network_type == "actor" else 1.0 +# layers.append( +# pufferlib.pytorch.layer_init( +# nn.Linear(last_dim, output_dim), std=std +# ) +# ) + +# return nn.Sequential(*layers) + + +class NeuralNet(nn.Module): def __init__( self, action_dim, input_dim=64, hidden_dim=128, pred_heads_arch=[128], - num_transformer_layers=0, dropout=0.00, act_func="tanh", ): @@ -120,7 +231,6 @@ def __init__( self.hidden_dim = hidden_dim self.action_dim = action_dim self.pred_heads_arch = pred_heads_arch - self.num_transformer_layers = num_transformer_layers self.num_modes = 3 # Ego, partner, road graph self.dropout = dropout self.act_func = nn.Tanh() if act_func == "tanh" else nn.ReLU() diff --git a/pygpudrive/env/config.py b/pygpudrive/env/config.py index dd5d6b97..7029d9ac 100755 --- a/pygpudrive/env/config.py +++ b/pygpudrive/env/config.py @@ -60,10 +60,10 @@ class EnvConfig: # Action space settings (if discretized) # Classic or Invertible Bicycle dynamics model steer_actions: torch.Tensor = torch.round( - torch.linspace(-torch.pi, torch.pi, 15), decimals=3 + torch.linspace(-torch.pi, torch.pi, 41), decimals=3 #15 ) accel_actions: torch.Tensor = torch.round( - torch.linspace(-4.0, 4.0, 9), decimals=3 + torch.linspace(-4.0, 4.0, 17), decimals=3 #9 ) head_tilt_actions: torch.Tensor = torch.Tensor([0])