-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathhuman_player.py
92 lines (82 loc) · 2.35 KB
/
human_player.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import contextlib
import gymnasium as gym
import sys
import termios
import time
import utils
@contextlib.contextmanager
def raw_mode(file):
old_attrs = termios.tcgetattr(file.fileno())
new_attrs = old_attrs[:]
new_attrs[3] = new_attrs[3] & ~(termios.ECHO | termios.ICANON)
try:
termios.tcsetattr(file.fileno(), termios.TCSADRAIN, new_attrs)
yield
finally:
termios.tcsetattr(file.fileno(), termios.TCSADRAIN, old_attrs)
def keyboard_code_to_action(keyboard_code: int) -> int:
"""
Keyboard code transferred to action id (for Taxi-v3)
:param keyboard_code (int): code of keyboard detected
:return (int): action id for Taxi-v3 (and -1 for ESC)
"""
if keyboard_code == 115:
# S
return 0
elif keyboard_code == 119:
# W
return 1
elif keyboard_code == 100:
# D
return 2
elif keyboard_code == 97:
# A
return 3
elif keyboard_code == 106:
# J
return 4
elif keyboard_code == 107:
# K
return 5
elif keyboard_code == 27:
# ESC
return -1
else:
raise ValueError(f"Unknown keyboard code {keyboard_code}!")
def get_keyboard_code():
with raw_mode(sys.stdin):
try:
ch = sys.stdin.read(1)
if ch and ch != chr(4):
return keyboard_code_to_action(ord(ch))
except EOFError:
pass
def human_player(env):
"""
Play Taxi-v3 as a human player with WASD keys + J (Pickup) K (Dropoff)
"""
print("Use WASD to move in the environment, use J and K to Pickup or Dropoff the passenger and end game with STOP or keyboard interrupt (Ctrl-C)")
env.reset()
ep_return = 0
env.render()
while True:
input = get_keyboard_code()
if input == -1:
return
_, rew, terminated, truncated, _ = env.step(input)
done = terminated or truncated
ep_return += rew
env.render()
if done:
if ep_return > 0:
print(f"EPISODE FINISHED WITH RETURN {ep_return} - SOLVED")
else:
print(f"EPISODE FINISHED WITH RETURN {ep_return} - FAILED")
env.reset()
ep_return = 0
env.render()
return
if __name__ == '__main__':
env = gym.make('Taxi-v3')
human_player(env)
env.close()