From 1e2917990e9a0a587a33ad342652c4eb1d42144c Mon Sep 17 00:00:00 2001 From: AI Christianson Date: Mon, 30 Dec 2024 14:01:14 -0500 Subject: [PATCH] SWEBench updates. --- scripts/generate_swebench_dataset.py | 243 +++++++++++++++++++++++---- 1 file changed, 210 insertions(+), 33 deletions(-) diff --git a/scripts/generate_swebench_dataset.py b/scripts/generate_swebench_dataset.py index 380e0ba..acc8f24 100755 --- a/scripts/generate_swebench_dataset.py +++ b/scripts/generate_swebench_dataset.py @@ -3,10 +3,32 @@ Script to generate predictions for SWE-bench Lite (princeton-nlp/SWE-bench_Lite). This script: - Loads the SWE-bench Lite dataset -- Clones each repo at the specified commit +- For each instance, clones the repo at the specified commit into a user-defined projects directory +- Creates a dedicated Python virtual environment in the cloned repo using 'uv venv' + (the default system Python is used unless overridden in the `PYTHON_VERSION_OVERRIDES` dictionary) +- Installs `ra-aid` (in editable mode) plus any project dependencies from: + - pyproject.toml (pip install .) + - requirements.txt + - requirements-dev.txt - Forms a prompt from the instance fields (problem_statement, FAIL_TO_PASS, PASS_TO_PASS) -- Calls ra-aid to create a patch +- Calls ra-aid (from the venv) to create a patch - Writes out predictions in the required JSON format + +Additionally, we provide an internal dictionary for per-project Python version overrides: + e.g.: + + PYTHON_VERSION_OVERRIDES = { + "org/repo": "3.9", + "some-other-org/another-repo": "3.10", + } + +If a repo name is not found in that dictionary, this script will just use the default system Python. + +Required parameters: + --projects-dir : Directory where all repos are cloned. + +Optional parameters: + --cleanup : If set, remove the cloned repos after processing. """ import argparse @@ -15,7 +37,6 @@ import logging import shutil import subprocess import sys -import tempfile from datetime import datetime from pathlib import Path from typing import Optional, Tuple, Dict, Any, List @@ -26,6 +47,14 @@ from rich.logging import RichHandler from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn +# If you'd like to override Python versions for specific repos: +# For example: "pandas-dev/pandas": "3.9" +PYTHON_VERSION_OVERRIDES = { + # "org/repo": "3.9", + # "another-org/another-repo": "3.10", +} + + def setup_logging(log_dir: Path, verbose: bool = False) -> None: """Configure logging with both file and console handlers.""" log_dir.mkdir(parents=True, exist_ok=True) @@ -71,14 +100,122 @@ def create_output_dirs() -> Tuple[Path, Path]: return base_dir, log_dir +def install_local_raaid(pip_path: Path) -> None: + """ + Install ra-aid (in editable mode) into the local environment. + We assume that this script lives in /scripts, so the + root directory is one level up from __file__. + """ + script_dir = Path(__file__).resolve().parent + repo_root = script_dir.parent # one level up + try: + subprocess.run( + [str(pip_path), "install", "-e", str(repo_root)], + cwd=str(repo_root), + check=True + ) + except Exception as e: + logging.error(f"Failed to install ra-aid in editable mode from {repo_root}: {e}") + + +def setup_repo_venv(repo_dir: Path, repo_name: str) -> Path: + """ + Create a Python virtual environment in `repo_dir/.venv` using `uv venv`. + Installs: + - local ra-aid (editable mode) + - pyproject.toml => pip install . + - requirements.txt => pip install -r ... + - requirements-dev.txt => pip install -r ... + + Steps to determine Python version: + 1) Check the PYTHON_VERSION_OVERRIDES dict for the given repo_name. + If found, use that as the --python= argument. + 2) Otherwise, let uv pick the default system Python. + + Returns: + Path to the .venv directory + """ + venv_dir = repo_dir / ".venv" + + # Check for Python version override + python_version = PYTHON_VERSION_OVERRIDES.get(repo_name, None) + + # Construct the uv command + uv_cmd = ["uv", "venv"] + if python_version: + uv_cmd.append(f"--python={python_version}") + uv_cmd.append(str(venv_dir)) + + try: + subprocess.run(uv_cmd, cwd=repo_dir, check=True) + except Exception as e: + logging.error(f"Failed to create venv in {repo_dir} using uv: {e}") + return venv_dir # Return anyway for partial info + + pip_path = venv_dir / "bin" / "pip" + + # Upgrade pip + try: + subprocess.run( + [str(pip_path), "install", "--upgrade", "pip"], + cwd=repo_dir, + check=False + ) + except Exception as e: + logging.error(f"Failed to upgrade pip in {venv_dir}: {e}") + + # 1) Install ra-aid in editable mode from our local repo + install_local_raaid(pip_path) + + # 2) If pyproject.toml is present, install local project + pyproject_path = repo_dir / "pyproject.toml" + if pyproject_path.is_file(): + try: + subprocess.run( + [str(pip_path), "install", "."], + cwd=repo_dir, + check=True + ) + except Exception as e: + logging.error(f"Failed to install project from pyproject.toml in {repo_dir}: {e}") + + # 3) If requirements.txt is present + req_path = repo_dir / "requirements.txt" + if req_path.is_file(): + try: + subprocess.run( + [str(pip_path), "install", "-r", str(req_path)], + cwd=repo_dir, + check=True + ) + except Exception as e: + logging.error(f"Failed to install from requirements.txt: {e}") + + # 4) If requirements-dev.txt is present + req_dev_path = repo_dir / "requirements-dev.txt" + if req_dev_path.is_file(): + try: + subprocess.run( + [str(pip_path), "install", "-r", str(req_dev_path)], + cwd=repo_dir, + check=True + ) + except Exception as e: + logging.error(f"Failed to install from requirements-dev.txt: {e}") + + return venv_dir + + def run_raaid( repo_dir: Path, + venv_dir: Path, problem_statement: str, fail_tests: List[str], pass_tests: List[str] ) -> Optional[str]: - """Run ra-aid on the problem statement, returning a generated patch if possible.""" - # Create prompt + """ + Run ra-aid on the problem statement (using the local venv), returning a generated patch if possible. + """ prompt = f"{problem_statement}\n\nTests that need to be fixed:\n```\n" for t in fail_tests: prompt += f"- {t}\n" @@ -89,13 +226,14 @@ def run_raaid( prompt += f"- {t}\n" prompt += "```\n\n" - # Implementation phase + # Use ra-aid from the local venv + raaid_exe = venv_dir / "bin" / "ra-aid" impl_cmd = [ - 'ra-aid', + str(raaid_exe), '--cowboy-mode', '-m', prompt, ] - + try: impl_result = subprocess.run( impl_cmd, @@ -116,7 +254,6 @@ def run_raaid( logging.error(f"ra-aid error: {e}") return None - # Collect patch repo = Repo(repo_dir) patch = get_git_patch(repo) return patch @@ -139,11 +276,17 @@ def get_git_patch(repo: Repo) -> Optional[str]: return None -def process_instance(instance: Dict[str, Any], output_repo_dir: Path) -> Dict[str, Any]: +def process_instance( + instance: Dict[str, Any], + projects_dir: Path, + cleanup: bool +) -> Dict[str, Any]: """ Process a single dataset instance: - - Clone the repo + - Clone the repo into projects_dir/ - Checkout commit + - Build a local Python venv in that repo (checking override dict) + - Install ra-aid + any project dependencies - Build prompt from problem_statement, FAIL_TO_PASS, PASS_TO_PASS - Return dict in required format: { @@ -151,6 +294,7 @@ def process_instance(instance: Dict[str, Any], output_repo_dir: Path) -> Dict[st "model_patch": ..., "model_name_or_path": ... } + - If cleanup is True, remove the cloned repo after generating a patch """ inst_id = instance.get("instance_id", "") repo_name = instance["repo"] @@ -159,37 +303,53 @@ def process_instance(instance: Dict[str, Any], output_repo_dir: Path) -> Dict[st fail_tests = instance.get("FAIL_TO_PASS", []) pass_tests = instance.get("PASS_TO_PASS", []) - # Convert to lists if they're strings if isinstance(fail_tests, str): fail_tests = [fail_tests] if isinstance(pass_tests, str): pass_tests = [pass_tests] - # Attempt to build a github url if not provided - # If 'repo' is "org/repo", create https://github.com/org/repo.git + # Build GitHub URL if "github.com" not in repo_name: repo_url = f"https://github.com/{repo_name}.git" else: repo_url = repo_name + checkout_dir = projects_dir / f"{inst_id}" patch_str = None - with tempfile.TemporaryDirectory() as tmp: - tmp_path = Path(tmp) - try: - # Clone & checkout - repo = Repo.clone_from(repo_url, tmp_path) - repo.git.checkout(commit) - except Exception as e: - logging.error(f"Failed to clone/check out {repo_url}:{commit} - {e}") - return { - "instance_id": inst_id, - "model_patch": "", - "model_name_or_path": "ra-aid" - } - # Run ra-aid - patch_str = run_raaid(tmp_path, problem_statement, fail_tests, pass_tests) - # Return required prediction structure + try: + if checkout_dir.exists(): + logging.info(f"Removing pre-existing directory: {checkout_dir}") + shutil.rmtree(checkout_dir) + + # Clone and checkout + repo = Repo.clone_from(repo_url, checkout_dir) + repo.git.checkout(commit) + + # Set up local Python venv & install dependencies + venv_dir = setup_repo_venv(checkout_dir, repo_name=repo_name) + + # Run ra-aid + patch_str = run_raaid( + checkout_dir, + venv_dir, + problem_statement, + fail_tests, + pass_tests + ) + + except Exception as e: + logging.error(f"Failed to process {repo_url}:{commit} - {e}") + return { + "instance_id": inst_id, + "model_patch": "", + "model_name_or_path": "ra-aid" + } + finally: + if cleanup: + logging.info(f"Cleaning up directory: {checkout_dir}") + shutil.rmtree(checkout_dir, ignore_errors=True) + return { "instance_id": inst_id, "model_patch": patch_str if patch_str else "", @@ -217,17 +377,30 @@ def main() -> None: default=None, help="Number of instances to process (default: all)" ) + parser.add_argument( + "--projects-dir", + type=Path, + required=True, + help="Directory where projects will be cloned. Must exist or can be created." + ) + parser.add_argument( + "--cleanup", + action="store_true", + help="If set, remove the cloned repos after generating the patch." + ) args = parser.parse_args() base_dir, log_dir = create_output_dirs() setup_logging(log_dir, args.verbose) logging.info("Starting script") + args.projects_dir.mkdir(parents=True, exist_ok=True) + dataset = load_dataset_safely() if dataset is None: sys.exit(1) - # Combine "dev" and "test" splits (no "train" in this dataset) + # Combine 'dev' and 'test' splits for this dataset (there is no 'train') all_data = list(dataset["dev"]) + list(dataset["test"]) args.output_dir.mkdir(parents=True, exist_ok=True) @@ -247,7 +420,11 @@ def main() -> None: if i >= limit: break try: - pred = process_instance(inst, args.output_dir) + pred = process_instance( + inst, + projects_dir=args.projects_dir, + cleanup=args.cleanup + ) predictions.append(pred) except Exception as e: logging.error(f"Error processing instance: {inst.get('instance_id', '')} - {e}") @@ -268,4 +445,4 @@ if __name__ == "__main__": sys.exit(1) except Exception as e: logging.exception("Unhandled error occurred.") - sys.exit(1) \ No newline at end of file + sys.exit(1)