DDQN

paul90317-NYCU · Oct 12, 2023 · 2131c1c · 2131c1c
1 parent 70c157e
commit 2131c1c
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 3 deletions.
diff --git a/dqn_agent_atari.py b/dqn_agent_atari.py
@@ -98,9 +98,10 @@ def update_behavior_network(self):
 		# 5. update behavior net
 
 		with torch.no_grad():
+			q_max : torch.Tensor = self.behavior_net(next_state)
+			q_max = torch.argmax(q_max, dim=1).reshape(self.batch_size,1)
 			q_next = self.target_net(next_state)
-			q_next : torch.Tensor = torch.max(q_next, dim = 1)[0]
-			q_next = q_next.reshape(self.batch_size, 1)
+			q_next : torch.Tensor = q_next.gather(1,q_max)
 
 			# if episode terminates at next_state, then q_target = reward
 			q_target = self.gamma * q_next * yet + reward

diff --git a/main.py b/main.py
@@ -12,7 +12,7 @@
 		"eps_decay": 1000000,
 		"eval_epsilon": 0.01,
 		"replay_buffer_capacity": 100000,
-		"logdir": 'log/DQN/',
+		"logdir": 'log/DDQN/',
 		"update_freq": 4,
 		"update_target_freq": 10000,
 		"learning_rate": 0.0000625,