Skip to content

Commit

Permalink
Fixed hella bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinlu1248 committed Apr 6, 2024
1 parent 39f5fb8 commit 3430581
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 16 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,8 @@ analysis/**/scratch*
analysis/benchmark/plots/
analysis/evaluation/*.csv
analysis/evaluation/*.pdf

harness/logs/**
harness/predictions/**
harness/testbed/**
**/miniconda.sh
14 changes: 10 additions & 4 deletions harness/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,15 @@
["4.5", "5.0", "5.1", "5.2", "5.3", "6.0", "6.2", "7.0", "7.1", "7.2"]
}
for k in ["3.0", "3.1", "3.2", "3.3", "3.4", "3.5", "4.0"]:
MAP_VERSION_TO_INSTALL_SPHINX[k][
"pre_install"
].append("sed -i 's/Jinja2>=2.3/Jinja2<3.1/' setup.py")
MAP_VERSION_TO_INSTALL_SPHINX[k]["pre_install"].extend([
"sed -i 's/Jinja2>=2.3/Jinja2<3.1/' setup.py",
"sed -i 's/sphinxcontrib-applehelp/sphinxcontrib-applehelp==1.0.4/' setup.py",
"sed -i 's/sphinxcontrib-devhelp/sphinxcontrib-devhelp==1.0.2/' setup.py",
"sed -i 's/sphinxcontrib-htmlhelp/sphinxcontrib-htmlhelp==2.0.1/' setup.py",
"sed -i 's/sphinxcontrib-serializinghtml/sphinxcontrib-serializinghtml==1.1.5/' setup.py",
"sed -i 's/sphinxcontrib-qthelp/sphinxcontrib-qthelp==1.0.3/' setup.py",
"sed -i 's/alabaster>=0.7,<0.8/alabaster<0.7/' setup.py",
])

MAP_VERSION_TO_INSTALL_ASTROPY = {
k: {"python": "3.9", "install": "pip install -e .[test]"}
Expand Down Expand Up @@ -418,7 +424,7 @@
MAP_REPO_TO_TEST_FRAMEWORK = {
"astropy/astropy": TEST_PYTEST,
"dbt-labs/dbt-core": TEST_PYTEST,
"django/django": "./tests/runtests.py --verbosity 2",
"django/django": "./tests/runtests.py --verbosity 2 --parallel=1",
"huggingface/transformers": TEST_PYTEST,
"marshmallow-code/marshmallow": TEST_PYTEST,
"matplotlib/matplotlib": TEST_PYTEST,
Expand Down
30 changes: 26 additions & 4 deletions harness/context_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,14 @@ def __enter__(self):
logger_testbed.info(
f"[Testbed] Installing pip packages for {env_name}; Command: {cmd}"
)
self.exec(cmd, shell=True)
# breakpoint()
self.exec(
cmd,
shell=True,
executable="/bin/bash",
timeout=self.timeout,
env=None
)

return self

Expand Down Expand Up @@ -523,6 +530,8 @@ def run_install_task(self, instance: dict) -> bool:
# Get installation instructions by repo/version
specifications = MAP_VERSION_TO_INSTALL[instance["repo"]][instance["version"]]

# breakpoint()

# Run pre-install set up if provided
if "pre_install" in specifications:
for pre_install in specifications["pre_install"]:
Expand All @@ -531,7 +540,11 @@ def run_install_task(self, instance: dict) -> bool:
f"[{self.testbed_name}] [{instance[KEY_INSTANCE_ID]}] Running pre-install setup command: {cmd_pre_install}"
)
out_pre_install = self.exec(
cmd_pre_install, timeout=self.timeout, shell=True
cmd_pre_install,
timeout=self.timeout,
shell=True,
executable="/bin/bash",
env=None
)
with open(self.log_file, "a") as f:
f.write(f"Pre-installation Command: {cmd_pre_install}\n")
Expand All @@ -555,7 +568,8 @@ def run_install_task(self, instance: dict) -> bool:
)
try:
# Run installation command
out_install = self.exec(cmd_install, timeout=self.timeout, shell=True)
# breakpoint()
out_install = self.exec(cmd_install, timeout=self.timeout, shell=True, executable="/bin/bash", env=None)

# Write installation logs to log file
with open(self.log_file, "a") as f:
Expand Down Expand Up @@ -664,10 +678,18 @@ def run_tests_task(self, instance: dict):
try:
# Run test command for task instance
test_cmd = f"{self.cmd_activate} && {instance['test_cmd']}"
# test_cmd = test_cmd.replace("./tests/runtests.py ", "pip install -e . && ./tests/runtests.py --parallel=1 ") # Fix Django installs
with open(self.log_file, "a") as f:
f.write(f"Test Script: {test_cmd};\n")
breakpoint()
out_test = self.exec(
test_cmd, shell=True, timeout=self.timeout, check=False
test_cmd,
shell=True,
timeout=self.timeout,
check=False,
executable="/bin/bash",
text=True,
env=None
)

# Write test results to log file
Expand Down
20 changes: 12 additions & 8 deletions harness/run_evaluation.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
#!/bin/bash
python run_evaluation.py \
--predictions_path "<path to predictions (.json)>" \
--swe_bench_tasks "<path to `swe-bench.json`>" \
--log_dir "<path to folder>" \
--testbed "<path to folder>" \
--skip_existing \
--timeout 900 \
--verbose
# python run_evaluation.py \
# --predictions_path "<path to predictions (.json)>" \
# --swe_bench_tasks "<path to `swe-bench.json`>" \
# --log_dir "<path to folder>" \
# --testbed "<path to folder>" \
# --skip_existing \
# --timeout 900 \
# --verbose

# python run_evaluation.py --predictions_path=predictions/sweep-04-02__SWE-bench_unassisted__test.jsonl --log_dir=logs --swe_bench_tasks=test --testbed=testbed --num_processes=1 # i don't know if its swe-bench-test
# python run_evaluation.py --predictions_path=predictions/ground_truth__SWE-bench_unassisted__test.jsonl --log_dir=logs --swe_bench_tasks=test --testbed=testbed --num_processes=1 # i don't know if its swe-bench-test
python run_evaluation.py --predictions_path=predictions/ground_truth_subset__SWE-bench_unassisted__test.jsonl --log_dir=logs --swe_bench_tasks=test --testbed=testbed --num_processes=1 # i don't know if its swe-bench-test

0 comments on commit 3430581

Please sign in to comment.