-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrun-experiment
executable file
·137 lines (124 loc) · 5.53 KB
/
run-experiment
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/env python3
import sys
import os
import datetime
import subprocess
import shutil
import fileinput
from threading import Thread
from config import ConfigHelper, ConfigKWArg
def get_dir(config):
with subprocess.Popen(["hostname"], stdout=subprocess.PIPE) as p:
hostname = p.stdout.read().decode('utf8').strip()
visible = os.getenv("CUDA_VISIBLE_DEVICES", "")
if len(visible) > 0:
visible = "GPUs" + visible
else:
visible = "CPU"
now = datetime.datetime.now().strftime("%y%m%d%H%M%S")
name_vals = (now, config.name, hostname, visible)
name = "%s.exp" % "-".join(name_vals)
dirname = os.path.join(config.exp_dir, name)
os.makedirs(dirname, exist_ok=True)
return dirname
def make_config_helper():
helper = ConfigHelper('''
Runs an experiment and saves the output and import contextual files to an experiments directory.
Default configuration args can be specified in a YAML or JSON configuration file as well and
then overriden with command line args.
The command is run with OUTPUT_DIR env variable set to the output directory for the experiment, if
files are to written by the experiment to an experiment-specific context.
''')
helper.add_argument(ConfigKWArg("context", "./src/train-model.py", type='filesr', required=False,
short_name="c", nargs='+',
help='Context files (or directories) to save with the results. They are saved before running the experiment. This is usually the code for the experiment.'))
helper.add_argument(ConfigKWArg("output", "./tf-logdir", type='filesr', required=False,
short_name="o", nargs='+',
help='Output files (or directories) from the experiment. They are saved after running the experiment. NOTE that OUTPUT_DIR is also passed to the experiment where output files can be saved directly without using the -o flag.'))
helper.add_argument(ConfigKWArg("name", "preliminary-experiment-lambda-1e-1", type=str, required=False,
short_name="n", default='untitled',
help='The name of the experiment'))
helper.add_argument(ConfigKWArg("desc", "The first lambda setting tried.", type=str, required=False,
short_name="d", default='',
help='The description of the experiment'))
helper.add_argument(ConfigKWArg("exp_dir", "~/scratch/gan-experiments",
type='filer', required=False,
default="/tmp/%s-experiments" % os.getenv("USER"),
help='The base experiments directory in which to save all experiments'))
helper.add_argument(ConfigKWArg("command", "python3 train-model.py",
type=str, required=True,
help='The command to run'))
return helper
def tee(stream_in, stream_outs):
try:
while True:
for data in stream_in:
#data = stream_in.read(1)
for stream_out in stream_outs:
stream_out.write(data)
stream_out.flush()
except ValueError:
pass
finally:
pass
def copy(x, y):
if os.path.exists(x):
os.makedirs(os.path.dirname(y), exist_ok=True)
if os.path.isdir(x):
shutil.copytree(x, y, symlinks=True)
else:
shutil.copy(x, y)
def main():
config_helper = make_config_helper()
try:
config = config_helper.parse_args()
except RuntimeError as details:
print("run-experiment: %s" % details, file=sys.stderr)
print("run-experiment: Consult --help", file=sys.stderr)
sys.exit(1)
dirname = get_dir(config)
with open(os.path.join(dirname, "meta.yaml"), "w") as f:
config_helper.make_yaml_stub(f, config)
context_dir = os.path.join(dirname, "context")
os.makedirs(context_dir, exist_ok=True)
output_dir = os.path.join(dirname, "output")
os.makedirs(output_dir, exist_ok=True)
for filename in config.context:
source = os.path.abspath(filename)
dest = os.path.join(context_dir, os.path.basename(source))
copy(source, dest)
return_code = -1
try:
tflogdir = os.path.join(output_dir, "logdir")
os.makedirs(tflogdir, exist_ok=True)
env = dict(os.environ)
env.update({
"OUTPUT_DIR": output_dir,
"TF_LOGDIR": tflogdir,
})
print("Running %s" % config.command)
with subprocess.Popen(["%s && sleep 5" % config.command], stdin=sys.stdin,
stdout=sys.stdout.buffer, stderr=sys.stderr.buffer, # subprocess.PIPE, stderr=subprocess.PIPE,
env=env, shell="/bin/bash") as p:
with open(os.path.join(dirname, "stdout.log"), "wb") as stdout, \
open(os.path.join(dirname, "stderr.log"), "wb") as stderr:
# thread1 = Thread(target=tee, args=(p.stdout, [sys.stdout.buffer, stdout]))
# thread2 = Thread(target=tee, args=(p.stderr, [sys.stderr.buffer, stderr]))
# thread1.start()
# thread2.start()
return_code = p.wait()
# p.stdout.close()
# p.stderr.close()
# thread1.join()
# thread2.join()
except KeyboardInterrupt:
print("run-experiment: Keyboard interrupt.", file=sys.stderr)
finally:
with open(os.path.join(dirname, "return-code.txt"), "w") as f:
f.write("%d" % return_code)
for filename in config.output:
source = os.path.abspath(filename)
dest = os.path.join(output_dir, os.path.basename(source))
copy(source, dest)
if __name__ == "__main__":
main()