Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Puffer speed up #321

Merged
merged 8 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions baselines/ippo/ippo_pufferlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,5 +285,6 @@ def run(


if __name__ == "__main__":

app()
import cProfile
cProfile.run('app()', 'profiled')
#app()
14 changes: 7 additions & 7 deletions examples/experiments/ippo_ff_p1_self_play.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ mode: "train"
use_rnn: false
eval_model_path: null
baseline: false
data_dir: "data/processed/training"
data_dir: "data/processed/examples"

environment: # Overrides default environment configs (see pygpudrive/env/config.py)
name: "gpudrive"
num_worlds: 100 # Number of parallel environments
num_worlds: 300 # Number of parallel environments
k_unique_scenes: 100 # Number of unique scenes to sample from
max_controlled_agents: 128 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
ego_state: true
road_map_obs: true
partner_obs: true
Expand Down Expand Up @@ -49,7 +49,7 @@ train:
resample_scenes: true
resample_criterion: "global_step"
resample_dataset_size: 500 # Number of unique scenes to sample from
resample_interval: 1_000_000
resample_interval: 20_000_000
resample_limit: 1000 # Resample until the limit is reached; set to a large number to continue resampling indefinitely
sample_with_replacement: true
shuffle_dataset: false
Expand All @@ -58,12 +58,12 @@ train:
torch_deterministic: false
total_timesteps: 1_000_000_000
batch_size: 131_072
minibatch_size: 8192
minibatch_size: 16384
learning_rate: 3e-4
anneal_lr: false
gamma: 0.99
gae_lambda: 0.95
update_epochs: 5
update_epochs: 1
norm_adv: true
clip_coef: 0.2
clip_vloss: false
Expand All @@ -76,7 +76,7 @@ train:
# # # Network # # #
network:
input_dim: 64 # Embedding of the input features
hidden_dim: 128 # Latent dimension
hidden_dim: 192 # Latent dimension
pred_heads_arch: [64] # Arch of the prediction heads (actor and critic)
num_transformer_layers: 0 # Number of transformer layers
dropout: 0.01
Expand Down
47 changes: 25 additions & 22 deletions integrations/rl/puffer/puffer_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def __init__(
self.num_agents = self.controlled_agent_mask.sum().item()

# Reset the environment and get the initial observations
self.observations = self.env.reset()[self.controlled_agent_mask]
self.observations = self.env.reset(self.controlled_agent_mask)

# This assigns a bunch of buffers to self.
# You can't use them because you want torch, not numpy
Expand Down Expand Up @@ -215,9 +215,8 @@ def step(self, action):
)
== self.controlled_agent_mask.sum(dim=1)
)[0]
.cpu()
.numpy()
)
done_worlds_cpu = done_worlds.cpu().numpy()

# Add rewards for living agents
self.agent_episode_returns[self.live_agent_mask] += reward[
Expand Down Expand Up @@ -309,14 +308,18 @@ def step(self, action):
controlled_mask
]

'''
ego_state = LocalEgoState.from_tensor(
self_obs_tensor=self.env.sim.self_observation_tensor(),
backend="torch",
device=self.device,
mask=controlled_mask
)
agent_speeds = (
ego_state.speed[done_worlds][controlled_mask].cpu().numpy()
)
'''
agent_speeds = 0 #(
# TODO: What are you logging here? Final speed of last agents in finished worlds?
# ego_state.speed[done_worlds][controlled_mask].cpu().numpy()
#)

if num_finished_agents > 0:
# fmt: off
Expand All @@ -328,27 +331,28 @@ def step(self, action):
"perc_veh_collisions": collision_rate.item(),
"total_controlled_agents": self.num_agents,
"control_density": self.num_agents / self.controlled_agent_mask.numel(),
"mean_agent_speed": agent_speeds.mean().item(),
#"mean_agent_speed": agent_speeds.mean().item(),
"mean_agent_speed": 0,
"episode_length": self.episode_lengths[done_worlds, :].mean().item(),
}
)
# fmt: on
# Asynchronously reset the done worlds and empty storage
for idx in done_worlds:
self.env.sim.reset([idx])
self.episode_returns[idx] = 0
self.agent_episode_returns[idx, :] = 0
self.episode_lengths[idx, :] = 0
# Reset the live agent mask so that the next alive mask will mark
# all agents as alive for the next step
self.live_agent_mask[idx] = self.controlled_agent_mask[idx]
self.offroad_in_episode[idx, :] = 0
self.collided_in_episode[idx, :] = 0

# Asynchronously reset the done worlds and empty storage
self.env.sim.reset(done_worlds_cpu)
self.episode_returns[done_worlds] = 0
self.agent_episode_returns[done_worlds, :] = 0
self.episode_lengths[done_worlds, :] = 0
# Reset the live agent mask so that the next alive mask will mark
# all agents as alive for the next step
self.live_agent_mask[done_worlds] = self.controlled_agent_mask[done_worlds]
self.offroad_in_episode[done_worlds, :] = 0
self.collided_in_episode[done_worlds, :] = 0

# (6) Get the next observations. Note that we do this after resetting
# the worlds so that we always return a fresh observation
next_obs = self.env.get_obs()[self.controlled_agent_mask]

#next_obs = self.env.get_obs()[self.controlled_agent_mask]
next_obs = self.env.get_obs(self.controlled_agent_mask)
self.observations = next_obs
self.rewards = reward_controlled
self.terminals = terminal
Expand Down Expand Up @@ -416,7 +420,6 @@ def render_agent_observations(self, env_idx):

def resample_scenario_batch(self):
"""Sample and set new batch of WOMD scenarios."""

# Swap the data batch
self.env.swap_data_batch()

Expand All @@ -428,7 +431,7 @@ def resample_scenario_batch(self):

self.reset() # Reset storage
# Get info from new worlds
self.observations = self.env.reset()[self.controlled_agent_mask]
self.observations = self.env.reset(self.controlled_agent_mask)

self.log_data_coverage()

Expand Down
1 change: 0 additions & 1 deletion integrations/rl/puffer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ def encode_observations(self, observations):
observations, self.env
)
ego_embed = self.ego_embed(ego_state)

partner_embed, _ = self.partner_embed(road_objects).max(dim=1)
road_map_embed, _ = self.road_map_embed(road_graph).max(dim=1)
embed = torch.cat([ego_embed, partner_embed, road_map_embed], dim=1)
Expand Down
26 changes: 15 additions & 11 deletions networks/late_fusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,11 @@ def __init__(
self,
action_dim,
input_dim=64,
hidden_dim=128,
hidden_dim=192,
pred_heads_arch=[128],
num_transformer_layers=0,
dropout=0.00,
act_func="tanh",
act_func="relu",
):
super().__init__()
self.input_dim = input_dim
Expand All @@ -129,36 +129,39 @@ def __init__(
pufferlib.pytorch.layer_init(
nn.Linear(constants.EGO_FEAT_DIM, input_dim)
),
nn.LayerNorm(input_dim),
#nn.LayerNorm(input_dim),
self.act_func,
nn.Dropout(self.dropout),
#nn.Dropout(self.dropout),
pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)),
)

self.partner_embed = nn.Sequential(
pufferlib.pytorch.layer_init(
nn.Linear(constants.PARTNER_FEAT_DIM, input_dim)
),
nn.LayerNorm(input_dim),
#nn.LayerNorm(input_dim),
self.act_func,
nn.Dropout(self.dropout),
#nn.Dropout(self.dropout),
pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)),
)

self.road_map_embed = nn.Sequential(
pufferlib.pytorch.layer_init(
nn.Linear(constants.ROAD_GRAPH_FEAT_DIM, input_dim)
),
nn.LayerNorm(input_dim),
#nn.LayerNorm(input_dim),
self.act_func,
nn.Dropout(self.dropout),
#nn.Dropout(self.dropout),
pufferlib.pytorch.layer_init(nn.Linear(input_dim, input_dim)),
)

'''
self.shared_embed = nn.Sequential(
nn.Linear(self.input_dim * self.num_modes, self.hidden_dim),
nn.Dropout(self.dropout)
#nn.Dropout(self.dropout)
self.act_func,
)
'''

if self.num_transformer_layers > 0:
self.transformer_layers = nn.Sequential(
Expand Down Expand Up @@ -190,7 +193,8 @@ def encode_observations(self, observation):
if self.num_transformer_layers > 0:
embed = self.transformer_layers(embed)

return self.shared_embed(embed)
return embed
#return self.shared_embed(embed)

def forward(self, obs, action=None, deterministic=False):

Expand Down Expand Up @@ -227,4 +231,4 @@ def _build_network(self, input_dim, net_arch, network_type):
)
)

return nn.Sequential(*layers)
return nn.Sequential(*layers)
Loading