-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathextract_cases.py
executable file
·33 lines (29 loc) · 1.3 KB
/
extract_cases.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/usr/bin/env python3
import sys
import pandas as pd
import subprocess
def extract_cases(cases_file, output_folder):
files = pd.read_csv(cases_file)
for _, row in files.iterrows():
output_file = output_folder + "/" + row["before"].replace("/", "_") + "_opt.html"
print(build_command(row["before"], row["after"]))
with open(output_file, 'w') as output_file_handle:
process = subprocess.Popen(build_command(row["before"], row["after"]), stdout=output_file_handle)
process.wait()
output_file = output_folder + "/" + row["before"].replace("/", "_") + "_simple.html"
with open(output_file, 'w') as output_file_handle:
process = subprocess.Popen(build_command(row["before"], row["after"], matcher="gumtree-simple"), stdout=output_file_handle)
process.wait()
def build_command(before, after, matcher=None):
command = ["gumtree", "htmldiff", before, after]
if matcher != None:
command += ["-m", matcher]
if before.startswith("bugsinpy") or before.startswith("gh-python"):
command += ["-g", "python-treesitter"]
return command
if __name__ == '__main__':
cases_file = sys.argv[1]
output_folder = sys.argv[2]
print(cases_file)
print(output_folder)
extract_cases(cases_file, output_folder)