-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain_dqn.py
62 lines (51 loc) · 2.06 KB
/
main_dqn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#import gym
import numpy as np
from dqn_agent import DQNAgent
from utils import plot_learning_curve, make_env
if __name__ == '__main__':
env = make_env('PongNoFrameskip-v4')
best_score = -np.inf
load_checkpoint = False
n_games = 200
agent = DQNAgent(gamma=0.99, epsilon=0.8, lr=0.0001,
input_dims=(env.observation_space.shape),
n_actions=env.action_space.n, mem_size=20000, eps_min=0.05,
batch_size=32, replace=1000, eps_dec=1e-5,
chkpt_dir='models/', algo='DQNAgent',
env_name='PongNoFrameskip-v4')
if load_checkpoint:
agent.load_models()
fname = agent.algo + '_' + agent.env_name + '_lr' + str(agent.lr) +'_' \
+ str(n_games) + 'games'
figure_file = 'plots/' + fname + '.png'
n_steps = 0
scores, eps_history, steps_array = [], [], []
for i in range(n_games):
done = False
observation = env.reset()
score = 0
while not done:
action = agent.choose_action(observation)
observation_, reward, done, info = env.step(action)
score += reward
if not load_checkpoint:
agent.store_transition(observation, action,
reward, observation_, int(done))
agent.learn()
observation = observation_
n_steps += 1
scores.append(score)
steps_array.append(n_steps)
avg_score = np.mean(scores[-40:])
print('episode: ', i,'score: ', score,
' average score %.1f' % avg_score, 'best score %.2f' % best_score,
'epsilon %.2f' % agent.epsilon, 'steps', n_steps)
if avg_score > best_score:
#if not load_checkpoint:
# agent.save_models()
best_score = avg_score
eps_history.append(agent.epsilon)
if load_checkpoint and n_steps >= 18000:
break
x = [i+1 for i in range(len(scores))]
plot_learning_curve(steps_array, scores, eps_history, figure_file)