-
Notifications
You must be signed in to change notification settings - Fork 6
/
Train.py
executable file
·70 lines (55 loc) · 1.59 KB
/
Train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
"""
A simple demo of pendulum control using ddpg algorithm.
Feel free to use the code.
If you like it, please follow me or give me a star.
If you don't, any comment is welcome.
Just open an issue in my repository, I'll handle it ASAP.
There will be more deep RL algorithm coming soon.
"""
from ddpg import *
import matplotlib.pyplot as plt
import seaborn as sns
import time
start = time.clock()
np.random.seed(1)
tf.set_random_seed(1)
# hyper parameters
MAX_EPISODES = 2000
MAX_EP_STEPS = 200
RENDER = False
ENV_NAME = 'Pendulum-v0'
env = gym.make(ENV_NAME)
agent = DDPG(env)
var = 3 # control exploration
episode_reward = []
for i in range(MAX_EPISODES+1):
s = env.reset()
ep_reward = 0
if var >= 0.1:
var *= .985 # decay the action randomness
for j in range(MAX_EP_STEPS):
if RENDER:
env.render()
# Add exploration noise
a = agent.action(s)
a = np.clip(np.random.normal(a, var), -2, 2) # add randomness to action selection for exploration
s_, r, done, info = env.step(a)
agent.perceive(s, a, r, s_)
s = s_
ep_reward += r
if j == MAX_EP_STEPS-1:
episode_reward.append(ep_reward)
print('Episode:', i, ' Reward: %i' % int(ep_reward), 'Explore: %.2f' % var, )
if ep_reward > 10:
RENDER = True
break
agent.sess.close()
end = time.clock()
print('Running time: %s Seconds' % (end - start))
plt.figure(1)
sns.set(style="darkgrid")
plt.plot(episode_reward, label='DDPG')
plt.xlabel('Episode')
plt.ylabel('Reward')
plt.legend(loc='best')
plt.show()