forked from StevenLOL/icra2017-visual-navigation
-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathevaluate.py
executable file
·100 lines (77 loc) · 2.88 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import random
import sys
from network import ActorCriticFFNetwork
from training_thread import A3CTrainingThread
from scene_loader import THORDiscreteEnvironment as Environment
from utils.ops import sample_action
from constants import ACTION_SIZE
from constants import CHECKPOINT_DIR
from constants import NUM_EVAL_EPISODES
from constants import VERBOSE
from constants import TASK_TYPE
from constants import TASK_LIST
if __name__ == '__main__':
device = "/cpu:0" # use CPU for display tool
network_scope = TASK_TYPE
list_of_tasks = TASK_LIST
scene_scopes = list_of_tasks.keys()
global_network = ActorCriticFFNetwork(action_size=ACTION_SIZE,
device=device,
network_scope=network_scope,
scene_scopes=scene_scopes)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
saver = tf.train.Saver()
checkpoint = tf.train.get_checkpoint_state(CHECKPOINT_DIR)
if checkpoint and checkpoint.model_checkpoint_path:
saver.restore(sess, checkpoint.model_checkpoint_path)
print("checkpoint loaded: {}".format(checkpoint.model_checkpoint_path))
else:
print("Could not find old checkpoint")
scene_stats = dict()
for scene_scope in scene_scopes:
scene_stats[scene_scope] = []
for task_scope in list_of_tasks[scene_scope]:
env = Environment({
'scene_name': scene_scope,
'terminal_state_id': int(task_scope)
})
ep_rewards = []
ep_lengths = []
ep_collisions = []
scopes = [network_scope, scene_scope, task_scope]
for i_episode in range(NUM_EVAL_EPISODES):
env.reset()
terminal = False
ep_reward = 0
ep_collision = 0
ep_t = 0
while not terminal:
pi_values = global_network.run_policy(sess, env.s_t, env.target, scopes)
action = sample_action(pi_values)
env.step(action)
env.update()
terminal = env.terminal
if ep_t == 10000: break
if env.collided: ep_collision += 1
ep_reward += env.reward
ep_t += 1
ep_lengths.append(ep_t)
ep_rewards.append(ep_reward)
ep_collisions.append(ep_collision)
if VERBOSE: print("episode #{} ends after {} steps".format(i_episode, ep_t))
print('evaluation: %s %s' % (scene_scope, task_scope))
print('mean episode reward: %.2f' % np.mean(ep_rewards))
print('mean episode length: %.2f' % np.mean(ep_lengths))
print('mean episode collision: %.2f' % np.mean(ep_collisions))
scene_stats[scene_scope].extend(ep_lengths)
break
break
print('\nResults (average trajectory length):')
for scene_scope in scene_stats:
print('%s: %.2f steps'%(scene_scope, np.mean(scene_stats[scene_scope])))