-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhyperparam_tuning_ffnn.py
81 lines (61 loc) · 1.97 KB
/
hyperparam_tuning_ffnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""
script to do hyperparameter tuning for ffnn on mcrae data
"""
import subprocess, os
from ray import tune
import classifier_main
from ray.tune.schedulers import ASHAScheduler
if __name__ == '__main__':
models = ['ffnn']
#datasets = ['mc_rae_real', 'buchanan', 'binder']
#datasets = ['mc_rae_real', 'binder']
datasets = ['binder']
# uncomment do 1k and glove
# clusters = [1]
# embedding_type = ['bert', 'glove']
# uncomment to do 5k; avoids repeating glove trials
clusters = [5]
embedding_type = ['bert']
#epochs = [30, 50]
epochs = [50]
dropouts = [0.5, 0.2, 0.0]
learning_rates = [1e-5, 1e-4, 1e-3]
hidden_sizes = [50, 100, 300]
config = {
"seed": 42,
"layer": 8,
"clusters": tune.grid_search(clusters),
"embedding_type": tune.grid_search(embedding_type),
"model": tune.grid_search(models),
"train_data": tune.grid_search(datasets),
"epochs": tune.choice(epochs),
"dropout": tune.choice(dropouts),
"lr": tune.choice(learning_rates),
"hidden_size": tune.choice(hidden_sizes),
"batch_size": 1,
'TUNE_ORIG_WORKING_DIR': os.getcwd(),
"k_fold": 10,
# BS stuff??
"print_dataset": False,
"save_path": None,
"do_dumb_thing": False,
"dev_equals_train": False,
"tuning": True,
"allbuthomonyms": False,
"zscore": False
}
analysis = tune.run(
classifier_main.main,
config=config,
scheduler=ASHAScheduler(metric="dev_MAP_at_k", mode="max"),
num_samples=25,
# i have 16 cpus, use this to limit the number used. allocates 4 per trial, stops crashing my laptop.
# uncomment or decrease to use more firepower
#resources_per_trial={'cpu': 4},
name="binder_ffnn_5k_tuning_kfold_10_14_2022",
#trial_name_creator = tune.function(lambda trial: trial.config['embedding_type'] + str(trial.config['clusters']) + '_' + trial.trial_id),
#esume="AUTO"
resume = False
)
# Obtain a trial dataframe from all run trials of this `tune.run` call.
dfs = analysis.trial_dataframes