From 3430581821dbd5046369f88ffbd533527a290cdf Mon Sep 17 00:00:00 2001 From: Kevin Lu Date: Sat, 6 Apr 2024 22:49:08 +0000 Subject: [PATCH] Fixed hella bugs --- .gitignore | 5 +++++ harness/constants.py | 14 ++++++++++---- harness/context_manager.py | 30 ++++++++++++++++++++++++++---- harness/run_evaluation.sh | 20 ++++++++++++-------- 4 files changed, 53 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index a3d7955e..b5d0cb3a 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,8 @@ analysis/**/scratch* analysis/benchmark/plots/ analysis/evaluation/*.csv analysis/evaluation/*.pdf + +harness/logs/** +harness/predictions/** +harness/testbed/** +**/miniconda.sh \ No newline at end of file diff --git a/harness/constants.py b/harness/constants.py index 8635b05b..7b6215ab 100644 --- a/harness/constants.py +++ b/harness/constants.py @@ -214,9 +214,15 @@ ["4.5", "5.0", "5.1", "5.2", "5.3", "6.0", "6.2", "7.0", "7.1", "7.2"] } for k in ["3.0", "3.1", "3.2", "3.3", "3.4", "3.5", "4.0"]: - MAP_VERSION_TO_INSTALL_SPHINX[k][ - "pre_install" - ].append("sed -i 's/Jinja2>=2.3/Jinja2<3.1/' setup.py") + MAP_VERSION_TO_INSTALL_SPHINX[k]["pre_install"].extend([ + "sed -i 's/Jinja2>=2.3/Jinja2<3.1/' setup.py", + "sed -i 's/sphinxcontrib-applehelp/sphinxcontrib-applehelp==1.0.4/' setup.py", + "sed -i 's/sphinxcontrib-devhelp/sphinxcontrib-devhelp==1.0.2/' setup.py", + "sed -i 's/sphinxcontrib-htmlhelp/sphinxcontrib-htmlhelp==2.0.1/' setup.py", + "sed -i 's/sphinxcontrib-serializinghtml/sphinxcontrib-serializinghtml==1.1.5/' setup.py", + "sed -i 's/sphinxcontrib-qthelp/sphinxcontrib-qthelp==1.0.3/' setup.py", + "sed -i 's/alabaster>=0.7,<0.8/alabaster<0.7/' setup.py", + ]) MAP_VERSION_TO_INSTALL_ASTROPY = { k: {"python": "3.9", "install": "pip install -e .[test]"} @@ -418,7 +424,7 @@ MAP_REPO_TO_TEST_FRAMEWORK = { "astropy/astropy": TEST_PYTEST, "dbt-labs/dbt-core": TEST_PYTEST, - "django/django": "./tests/runtests.py --verbosity 2", + "django/django": "./tests/runtests.py --verbosity 2 --parallel=1", "huggingface/transformers": TEST_PYTEST, "marshmallow-code/marshmallow": TEST_PYTEST, "matplotlib/matplotlib": TEST_PYTEST, diff --git a/harness/context_manager.py b/harness/context_manager.py index 53a9c725..3e1565c4 100644 --- a/harness/context_manager.py +++ b/harness/context_manager.py @@ -339,7 +339,14 @@ def __enter__(self): logger_testbed.info( f"[Testbed] Installing pip packages for {env_name}; Command: {cmd}" ) - self.exec(cmd, shell=True) + # breakpoint() + self.exec( + cmd, + shell=True, + executable="/bin/bash", + timeout=self.timeout, + env=None + ) return self @@ -523,6 +530,8 @@ def run_install_task(self, instance: dict) -> bool: # Get installation instructions by repo/version specifications = MAP_VERSION_TO_INSTALL[instance["repo"]][instance["version"]] + # breakpoint() + # Run pre-install set up if provided if "pre_install" in specifications: for pre_install in specifications["pre_install"]: @@ -531,7 +540,11 @@ def run_install_task(self, instance: dict) -> bool: f"[{self.testbed_name}] [{instance[KEY_INSTANCE_ID]}] Running pre-install setup command: {cmd_pre_install}" ) out_pre_install = self.exec( - cmd_pre_install, timeout=self.timeout, shell=True + cmd_pre_install, + timeout=self.timeout, + shell=True, + executable="/bin/bash", + env=None ) with open(self.log_file, "a") as f: f.write(f"Pre-installation Command: {cmd_pre_install}\n") @@ -555,7 +568,8 @@ def run_install_task(self, instance: dict) -> bool: ) try: # Run installation command - out_install = self.exec(cmd_install, timeout=self.timeout, shell=True) + # breakpoint() + out_install = self.exec(cmd_install, timeout=self.timeout, shell=True, executable="/bin/bash", env=None) # Write installation logs to log file with open(self.log_file, "a") as f: @@ -664,10 +678,18 @@ def run_tests_task(self, instance: dict): try: # Run test command for task instance test_cmd = f"{self.cmd_activate} && {instance['test_cmd']}" + # test_cmd = test_cmd.replace("./tests/runtests.py ", "pip install -e . && ./tests/runtests.py --parallel=1 ") # Fix Django installs with open(self.log_file, "a") as f: f.write(f"Test Script: {test_cmd};\n") + breakpoint() out_test = self.exec( - test_cmd, shell=True, timeout=self.timeout, check=False + test_cmd, + shell=True, + timeout=self.timeout, + check=False, + executable="/bin/bash", + text=True, + env=None ) # Write test results to log file diff --git a/harness/run_evaluation.sh b/harness/run_evaluation.sh index d1091813..e7c394c9 100755 --- a/harness/run_evaluation.sh +++ b/harness/run_evaluation.sh @@ -1,9 +1,13 @@ #!/bin/bash -python run_evaluation.py \ - --predictions_path "" \ - --swe_bench_tasks "" \ - --log_dir "" \ - --testbed "" \ - --skip_existing \ - --timeout 900 \ - --verbose +# python run_evaluation.py \ +# --predictions_path "" \ +# --swe_bench_tasks "" \ +# --log_dir "" \ +# --testbed "" \ +# --skip_existing \ +# --timeout 900 \ +# --verbose + +# python run_evaluation.py --predictions_path=predictions/sweep-04-02__SWE-bench_unassisted__test.jsonl --log_dir=logs --swe_bench_tasks=test --testbed=testbed --num_processes=1 # i don't know if its swe-bench-test +# python run_evaluation.py --predictions_path=predictions/ground_truth__SWE-bench_unassisted__test.jsonl --log_dir=logs --swe_bench_tasks=test --testbed=testbed --num_processes=1 # i don't know if its swe-bench-test +python run_evaluation.py --predictions_path=predictions/ground_truth_subset__SWE-bench_unassisted__test.jsonl --log_dir=logs --swe_bench_tasks=test --testbed=testbed --num_processes=1 # i don't know if its swe-bench-test