-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexpectimaxAgents.py
64 lines (51 loc) · 1.99 KB
/
expectimaxAgents.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# -*- coding: utf-8 -*-
"""
© Copyright 2014. Joon Yang & Jaemin Cheun. All rights reserved.
Finds an optimal policy using expectimax search
"""
import progress
class ExpectimaxAgent:
def __init__(self, depth='2'):
self.index = 0 # Computer is agent 0
self.depth = int(depth)
def getPolicy(self, initialState):
"""Returns the expectimax action using self.depth"""
def getReward(state):
return state.getAverage()
def terminalTest(state, depth):
return depth == 0
def ExpectimaxDecision(state):
"""returns action that maximizes minValue"""
# base case: action = None
max_value, policy = -float('inf'), None
# get all possible actions of computer, i.e. all possible questions
actions = state.getLegalActions("computer")
for act in actions:
new_value = playerNode(state.generateSuccessor("computer", act), self.depth - 1)
if max_value < new_value:
max_value, policy = new_value, act
return policy
def playerNode(state, depth):
"""Nodes where player makes the move"""
if terminalTest(state, depth):
return getReward(state)
QValue = getReward(state)
actions = state.getLegalActions("human")
for act in actions:
if act == 0:
QValue += (1 - state.getProbability()) * MaxValue(state.generateSuccessor("human", act), depth)
else:
QValue += state.getProbability() * MaxValue(state.generateSuccessor("human", act), depth)
return QValue
def MaxValue(state, depth):
"""Nodes where computer asks a question"""
max_value = -float('inf')
# get all possible actions of computer
actions = state.getLegalActions("computer")
for act in actions:
new_value = playerNode(state.generateSuccessor("computer", act), depth - 1)
if max_value < new_value:
max_value = new_value
return max_value
# return the result of expectimax algorithm
return ExpectimaxDecision(initialState)