forked from marcotcr/anchor-experiments
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompute_explanations.py
97 lines (84 loc) · 3.8 KB
/
compute_explanations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from __future__ import print_function
import argparse
import pickle
import xgboost
import sklearn
import sklearn.neural_network
from . import utils
from . import anchor_tabular
def main():
parser = argparse.ArgumentParser(description='Compute some explanations.')
parser.add_argument('-d', dest='dataset', required=True,
choices=['adult', 'recidivism', 'lending', 'moons', 'circles', 'linear'],
help='dataset to use')
parser.add_argument('-e', dest='explainer', required=True,
choices=['lime', 'anchor'],
help='explainer, either anchor or lime')
parser.add_argument('-m', dest='model', required=True,
choices=['xgboost', 'logistic', 'nn', 'svm'],
help='model: xgboost, logistic, nn or svm')
parser.add_argument('-c', dest='checkpoint', required=False,
default=200, type=int,
help='checkpoint after this many explanations')
parser.add_argument('-o', dest='output', required=True)
args = parser.parse_args()
dataset = utils.load_dataset(args.dataset, balance=True)
ret = {}
ret['dataset'] = args.dataset
for x in ['train_idx', 'test_idx', 'validation_idx']:
ret[x] = getattr(dataset, x)
explainer = anchor_tabular.AnchorTabularExplainer(
dataset.class_names, dataset.feature_names,
dataset.data, dataset.categorical_names)
explainer.fit(dataset.train, dataset.labels_train,
dataset.validation, dataset.labels_validation)
if args.model == 'xgboost':
c = xgboost.XGBClassifier(n_estimators=400, nthread=10, seed=1)
c.fit(explainer.encoder.transform(dataset.train), dataset.labels_train)
if args.model == 'logistic':
c = sklearn.linear_model.LogisticRegression()
c.fit(explainer.encoder.transform(dataset.train), dataset.labels_train)
if args.model == 'nn':
c = sklearn.neural_network.MLPClassifier(hidden_layer_sizes=(50,50))
c.fit(explainer.encoder.transform(dataset.train), dataset.labels_train)
if args.model == 'svm':
c = sklearn.svm.SVC(gamma=2, C=1, probability=True)
c.fit(explainer.encoder.transform(dataset.train), dataset.labels_train)
ret['encoder'] = explainer.encoder
ret['model'] = c
ret['model_name'] = args.model
def predict_fn(x):
return c.predict(explainer.encoder.transform(x))
def predict_proba_fn(x):
return c.predict_proba(explainer.encoder.transform(x))
print('Train', sklearn.metrics.accuracy_score(dataset.labels_train,
predict_fn(dataset.train)))
print('Test', sklearn.metrics.accuracy_score(dataset.labels_test,
predict_fn(dataset.test)))
threshold = 0.95
tau = 0.1
delta = 0.05
epsilon_stop = 0.05
batch_size = 100
if args.explainer == 'anchor':
explain_fn = utils.get_reduced_explain_fn(
explainer.explain_lucb_beam, c.predict, threshold=threshold,
delta=delta, tau=tau, batch_size=batch_size / 2,
sample_whole_instances=True,
beam_size=10, epsilon_stop=epsilon_stop)
elif args.explainer == 'lime':
explain_fn = utils.get_reduced_explain_fn(
explainer.explain_lime, c.predict_proba, num_features=5,
use_same_dist=True)
ret['exps'] = []
for i, d in enumerate(dataset.validation, start=1):
# print(i)
if i % 100 == 0:
print(i)
if i % args.checkpoint == 0:
print('Checkpointing')
pickle.dump(ret, open(args.output + '.checkpoint', 'w'))
ret['exps'].append(explain_fn(d))
pickle.dump(ret, open(args.output, 'w'))
if __name__ == '__main__':
main()