-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathStrategies.java
180 lines (149 loc) · 5.86 KB
/
Strategies.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import java.lang.Runtime;
import java.util.List;
import java.util.Map;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.*;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
public class Strategies {
private Strategies() {
// uninstantiable utility class
}
private static class IterState {
double expectedPayoffs[];
double maxRegret;
public IterState(int numPlayers) {
expectedPayoffs = new double[numPlayers];
maxRegret = Double.MAX_VALUE;
}
public void setExpectedPayoff(int player, double payoff) {
expectedPayoffs[player] = payoff;
}
public double[] getExpectedPayoffs() {
return expectedPayoffs;
}
public void setMaxRegret(double maxRegret) {
this.maxRegret = maxRegret;
}
public double getMaxRegret() {
return maxRegret;
}
}
public static double[] expectedPayoffs(GameTree gt, Strategy s) {
List<Strategy> strats = Lists.newArrayList();
for (int p = 0; p < gt.getGame().getNumPlayers(); p++) {
strats.add(s);
}
int numPlayers = gt.getGame().getNumPlayers();
double[] epayoffs = new double[numPlayers];
for (int p = 0; p < numPlayers; p++) {
gt.reset();
epayoffs[p] = payoff(gt, gt.getRoot(), strats, p, false);
}
return epayoffs;
}
public static Strategy nes(GameTree gt, double epsilon) {
Strategy s = new Strategy(gt);
PokerGame game = gt.getGame();
IterState state = new IterState(game.getNumPlayers());
int iter = 0;
while (state.getMaxRegret() >= epsilon) {
iter++;
s.average(response(gt, s, state), (1.0/(iter+1)));
System.out.println(state.getMaxRegret());
}
System.out.println(iter + " iterations");
return s;
}
private static Strategy response(GameTree gt, Strategy s, IterState state) {
List<Strategy> strats = Lists.newArrayList();
Strategy response = new Strategy(gt);
for (int p = 1; p < gt.getGame().getNumPlayers(); p++) {
strats.add(s);
}
double maxRegret = 0;
for (int p = 0; p < gt.getGame().getNumPlayers(); p++) {
gt.reset();
strats.add(p, s);
double prePayoff = payoff(gt, gt.getRoot(), strats, p, false);
strats.remove(p);
gt.reset();
strats.add(p, response);
double postPayoff = payoff(gt, gt.getRoot(), strats, p, true);
strats.remove(p);
assert postPayoff >= prePayoff;
maxRegret = Math.max(maxRegret, postPayoff - prePayoff);
state.setExpectedPayoff(p, prePayoff);
}
state.setMaxRegret(maxRegret);
return response;
}
private static double payoff(GameTree gt, GameNode n, List<Strategy> strats, int rplayer, boolean compBestResp) {
if (n.getPayoff() != null) {
return n.getPayoff();
}
List<GameNode> children = n.getChildren();
if (children.size() == 0) {
return n.setPayoff(terminalPayoff(n, strats, rplayer, compBestResp));
}
InfoSet iset = n.getInfoSet();
double[][] cpayoffs = new double[iset.size()][children.size()];
double[] tpayoffs = new double[children.size()];
for (int i = 0; i < iset.size(); i++) {
GameNode in = iset.get(i);
List<GameNode> nchildren = in.getChildren();
for (int cindex = 0; cindex < nchildren.size(); cindex++) {
GameNode child = nchildren.get(cindex);
double pf = payoff(gt, child, strats, rplayer, compBestResp);
cpayoffs[i][cindex] += pf;
tpayoffs[cindex] += pf;
}
}
int player = n.getPlayer();
// compute best action for a responding player
int aindex = -1;
if (player == rplayer && compBestResp) {
Preconditions.checkArgument(n instanceof ActionNode, "responding player must have actions");
ActionNode an = (ActionNode) n;
aindex = Arrays.maxarg(tpayoffs);
strats.get(player).setAction(iset, an.getBet(aindex));
}
// set payoffs for all nodes in the information set
for (int i = 0; i < iset.size(); i++) {
GameNode in = iset.get(i);
double payoff = (player == rplayer && compBestResp)
? cpayoffs[i][aindex] : Arrays.sum(cpayoffs[i]);
in.setPayoff(payoff);
}
return n.getPayoff();
}
public static double terminalPayoff(GameNode n, List<Strategy> strats, int player, boolean compBestResp) {
Preconditions.checkArgument(n instanceof PayoffNode, "terminal node must be a PayoffNode");
double payoff = ((PayoffNode) n).getPayoffForPlayer(player);
return freq(n, strats, player, compBestResp) * payoff;
}
private static double freq(GameNode n, List<Strategy> strats, int rplayer, boolean compBestResp) {
if (n.getFreq() != null) {
return n.getFreq();
}
GameNode p = n.getParent();
int cindex = n.getChildIndex();
if (p == null) {
return 1.0;
}
double f = 1.0;
if (p instanceof DealNode) {
DealNode dn = (DealNode) p;
f = dn.getFreq(cindex);
} else if (p instanceof ActionNode) {
ActionNode an = (ActionNode) p;
if (an.getPlayer() != rplayer || !compBestResp) {
InfoSet iset = an.getInfoSet();
f = strats.get(an.getPlayer()).getProb(iset, an.getBet(cindex));
}
}
return n.setFreq(freq(p, strats, rplayer, compBestResp) * f);
}
}