diff --git a/scripts/generate_swebench_dataset.py b/scripts/generate_swebench_dataset.py index acc8f24..56ab201 100755 --- a/scripts/generate_swebench_dataset.py +++ b/scripts/generate_swebench_dataset.py @@ -1,34 +1,15 @@ #!/usr/bin/env python3 """ Script to generate predictions for SWE-bench Lite (princeton-nlp/SWE-bench_Lite). -This script: +This version uses 'uv venv' and 'uv pip' / 'uv run ra-aid' commands to manage everything in the environment. + +It: - Loads the SWE-bench Lite dataset -- For each instance, clones the repo at the specified commit into a user-defined projects directory -- Creates a dedicated Python virtual environment in the cloned repo using 'uv venv' - (the default system Python is used unless overridden in the `PYTHON_VERSION_OVERRIDES` dictionary) -- Installs `ra-aid` (in editable mode) plus any project dependencies from: - - pyproject.toml (pip install .) - - requirements.txt - - requirements-dev.txt -- Forms a prompt from the instance fields (problem_statement, FAIL_TO_PASS, PASS_TO_PASS) -- Calls ra-aid (from the venv) to create a patch -- Writes out predictions in the required JSON format - -Additionally, we provide an internal dictionary for per-project Python version overrides: - e.g.: - - PYTHON_VERSION_OVERRIDES = { - "org/repo": "3.9", - "some-other-org/another-repo": "3.10", - } - -If a repo name is not found in that dictionary, this script will just use the default system Python. - -Required parameters: - --projects-dir : Directory where all repos are cloned. - -Optional parameters: - --cleanup : If set, remove the cloned repos after processing. +- For each instance, clones (or reuses) the repo at the specified commit +- Creates or reuses a dedicated Python virtual environment via `uv venv` +- Installs `ra-aid` in editable mode + any project dependencies via `uv pip` +- Calls `uv run ra-aid` to generate a patch +- Writes out predictions in JSON format """ import argparse @@ -41,20 +22,15 @@ from datetime import datetime from pathlib import Path from typing import Optional, Tuple, Dict, Any, List -from datasets import load_dataset from git import Repo from rich.logging import RichHandler from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn - # If you'd like to override Python versions for specific repos: -# For example: "pandas-dev/pandas": "3.9" PYTHON_VERSION_OVERRIDES = { - # "org/repo": "3.9", - # "another-org/another-repo": "3.10", + # "someorg/somerepo": "3.9", } - def setup_logging(log_dir: Path, verbose: bool = False) -> None: """Configure logging with both file and console handlers.""" log_dir.mkdir(parents=True, exist_ok=True) @@ -79,17 +55,16 @@ def setup_logging(log_dir: Path, verbose: bool = False) -> None: console_handler.setLevel(logging.DEBUG if verbose else logging.INFO) root_logger.addHandler(console_handler) - def load_dataset_safely() -> Optional[Any]: """Load SWE-bench Lite dataset with error handling.""" try: + from datasets import load_dataset dataset = load_dataset("princeton-nlp/SWE-bench_Lite") return dataset except Exception as e: logging.error(f"Failed to load dataset: {e}") return None - def create_output_dirs() -> Tuple[Path, Path]: """Create base/log directory structure.""" date_str = datetime.now().strftime("%Y%m%d") @@ -99,122 +74,124 @@ def create_output_dirs() -> Tuple[Path, Path]: log_dir.mkdir(parents=True, exist_ok=True) return base_dir, log_dir - -def install_local_raaid(pip_path: Path) -> None: +def uv_venv(repo_dir: Path, repo_name: str, force_venv: bool) -> None: """ - Install ra-aid (in editable mode) into the local environment. - We assume that this script lives in /scripts, so the - root directory is one level up from __file__. - """ - script_dir = Path(__file__).resolve().parent - repo_root = script_dir.parent # one level up - try: - subprocess.run( - [str(pip_path), "install", "-e", str(repo_root)], - cwd=str(repo_root), - check=True - ) - except Exception as e: - logging.error(f"Failed to install ra-aid in editable mode from {repo_root}: {e}") + Create (or reuse) a .venv in 'repo_dir' using 'uv venv'. + If force_venv is True, we remove .venv first. - -def setup_repo_venv(repo_dir: Path, repo_name: str) -> Path: - """ - Create a Python virtual environment in `repo_dir/.venv` using `uv venv`. - Installs: - - local ra-aid (editable mode) - - pyproject.toml => pip install . - - requirements.txt => pip install -r ... - - requirements-dev.txt => pip install -r ... - - Steps to determine Python version: - 1) Check the PYTHON_VERSION_OVERRIDES dict for the given repo_name. - If found, use that as the --python= argument. - 2) Otherwise, let uv pick the default system Python. - - Returns: - Path to the .venv directory + Example command: + uv venv .venv --python=3.9 """ venv_dir = repo_dir / ".venv" + if venv_dir.exists() and force_venv: + logging.info(f"Removing existing .venv at {venv_dir}") + shutil.rmtree(venv_dir) - # Check for Python version override python_version = PYTHON_VERSION_OVERRIDES.get(repo_name, None) - - # Construct the uv command - uv_cmd = ["uv", "venv"] + cmd = ["uv", "venv"] if python_version: - uv_cmd.append(f"--python={python_version}") - uv_cmd.append(str(venv_dir)) + cmd.append(f"--python={python_version}") + cmd.append(".venv") try: - subprocess.run(uv_cmd, cwd=repo_dir, check=True) + subprocess.run(cmd, cwd=repo_dir, check=True) except Exception as e: - logging.error(f"Failed to create venv in {repo_dir} using uv: {e}") - return venv_dir # Return anyway for partial info + logging.error(f"Failed to create venv in {repo_dir}: {e}") - pip_path = venv_dir / "bin" / "pip" - - # Upgrade pip +def uv_pip_install(repo_dir: Path, args: List[str]) -> None: + """ + Run 'uv pip install ...' in the specified repo_dir. + Example: uv_pip_install(repo_dir, ["--upgrade", "pip"]) + """ + cmd = ["uv", "pip", "install"] + args try: - subprocess.run( - [str(pip_path), "install", "--upgrade", "pip"], - cwd=repo_dir, - check=False - ) + subprocess.run(cmd, cwd=repo_dir, check=True) except Exception as e: - logging.error(f"Failed to upgrade pip in {venv_dir}: {e}") + logging.error(f"Failed to run uv pip install {args}: {e}") - # 1) Install ra-aid in editable mode from our local repo - install_local_raaid(pip_path) +def uv_run_raaid(repo_dir: Path, prompt: str) -> Optional[str]: + """ + Call 'uv run ra-aid' with the given prompt in the environment. + Returns the patch if successful, else None. + """ + cmd = [ + "uv", "run", "ra-aid", + "--cowboy-mode", + "-m", prompt + ] + try: + result = subprocess.run(cmd, cwd=repo_dir, text=True, capture_output=True, timeout=300) + if result.returncode != 0: + logging.error("ra-aid returned non-zero exit code.") + logging.debug(f"stdout: {result.stdout}") + logging.debug(f"stderr: {result.stderr}") + return None + except subprocess.TimeoutExpired: + logging.error("ra-aid timed out") + return None + except Exception as e: + logging.error(f"ra-aid error: {e}") + return None - # 2) If pyproject.toml is present, install local project + # Collect patch + patch = get_git_patch(repo_dir) + return patch + +def get_git_patch(repo_dir: Path) -> Optional[str]: + """Generate a git patch from the current changes in `repo_dir`.""" + try: + repo = Repo(repo_dir) + if not repo.is_dirty(): + logging.info("No changes detected in repository.") + return None + patch_text = repo.git.diff(unified=3) + if not patch_text.strip(): + return None + if not any(line.startswith('+') for line in patch_text.splitlines()): + return None + return patch_text + except Exception as e: + logging.error(f"Failed to generate patch: {e}") + return None + +def setup_venv_and_deps(repo_dir: Path, repo_name: str, force_venv: bool) -> None: + """ + - uv venv .venv --python=xxx (optional) + - uv pip install --upgrade pip + - uv pip install -e + - If pyproject.toml -> uv pip install . + - If requirements.txt -> uv pip install -r requirements.txt + - If requirements-dev.txt -> uv pip install -r requirements-dev.txt + """ + uv_venv(repo_dir, repo_name, force_venv) + + # Now uv pip install ... + # 1) upgrade pip + uv_pip_install(repo_dir, ["--upgrade", "pip"]) + + # 2) install ra-aid from local path + script_dir = Path(__file__).resolve().parent + ra_aid_root = script_dir.parent # one level up from scripts + uv_pip_install(repo_dir, ["-e", str(ra_aid_root)]) + + # 3) optional pyproject pyproject_path = repo_dir / "pyproject.toml" if pyproject_path.is_file(): - try: - subprocess.run( - [str(pip_path), "install", "."], - cwd=repo_dir, - check=True - ) - except Exception as e: - logging.error(f"Failed to install project from pyproject.toml in {repo_dir}: {e}") + uv_pip_install(repo_dir, ["."]) - # 3) If requirements.txt is present - req_path = repo_dir / "requirements.txt" - if req_path.is_file(): - try: - subprocess.run( - [str(pip_path), "install", "-r", str(req_path)], - cwd=repo_dir, - check=True - ) - except Exception as e: - logging.error(f"Failed to install from requirements.txt: {e}") + # 4) optional requirements.txt + req_file = repo_dir / "requirements.txt" + if req_file.is_file(): + uv_pip_install(repo_dir, ["-r", "requirements.txt"]) - # 4) If requirements-dev.txt is present - req_dev_path = repo_dir / "requirements-dev.txt" - if req_dev_path.is_file(): - try: - subprocess.run( - [str(pip_path), "install", "-r", str(req_dev_path)], - cwd=repo_dir, - check=True - ) - except Exception as e: - logging.error(f"Failed to install from requirements-dev.txt: {e}") + # 5) optional requirements-dev.txt + req_dev_file = repo_dir / "requirements-dev.txt" + if req_dev_file.is_file(): + uv_pip_install(repo_dir, ["-r", "requirements-dev.txt"]) - return venv_dir - - -def run_raaid( - repo_dir: Path, - venv_dir: Path, - problem_statement: str, - fail_tests: List[str], - pass_tests: List[str] -) -> Optional[str]: +def build_prompt(problem_statement: str, fail_tests: List[str], pass_tests: List[str]) -> str: """ - Run ra-aid on the problem statement (using the local venv), returning a generated patch if possible. + Construct the prompt text from problem_statement, FAIL_TO_PASS, PASS_TO_PASS. """ prompt = f"{problem_statement}\n\nTests that need to be fixed:\n```\n" for t in fail_tests: @@ -225,77 +202,14 @@ def run_raaid( for t in pass_tests: prompt += f"- {t}\n" prompt += "```\n\n" - - # Use ra-aid from the local venv - raaid_exe = venv_dir / "bin" / "ra-aid" - impl_cmd = [ - str(raaid_exe), - '--cowboy-mode', - '-m', prompt, - ] - - try: - impl_result = subprocess.run( - impl_cmd, - cwd=repo_dir, - capture_output=True, - text=True, - timeout=300 - ) - if impl_result.returncode != 0: - logging.error("ra-aid returned non-zero exit code.") - logging.debug(f"stdout: {impl_result.stdout}") - logging.debug(f"stderr: {impl_result.stderr}") - return None - except subprocess.TimeoutExpired: - logging.error("ra-aid implementation phase timed out.") - return None - except Exception as e: - logging.error(f"ra-aid error: {e}") - return None - - repo = Repo(repo_dir) - patch = get_git_patch(repo) - return patch - - -def get_git_patch(repo: Repo) -> Optional[str]: - """Generate a git patch for current changes.""" - if not repo.is_dirty(): - logging.info("No repo changes detected.") - return None - try: - patch = repo.git.diff(unified=3) - if not patch or not patch.strip(): - return None - if not any(line.startswith('+') for line in patch.splitlines()): - return None - return patch - except Exception as e: - logging.error(f"Failed to generate patch: {e}") - return None - + return prompt def process_instance( instance: Dict[str, Any], projects_dir: Path, - cleanup: bool + reuse_repo: bool, + force_venv: bool ) -> Dict[str, Any]: - """ - Process a single dataset instance: - - Clone the repo into projects_dir/ - - Checkout commit - - Build a local Python venv in that repo (checking override dict) - - Install ra-aid + any project dependencies - - Build prompt from problem_statement, FAIL_TO_PASS, PASS_TO_PASS - - Return dict in required format: - { - "instance_id": ..., - "model_patch": ..., - "model_name_or_path": ... - } - - If cleanup is True, remove the cloned repo after generating a patch - """ inst_id = instance.get("instance_id", "") repo_name = instance["repo"] commit = instance["base_commit"] @@ -308,35 +222,44 @@ def process_instance( if isinstance(pass_tests, str): pass_tests = [pass_tests] - # Build GitHub URL + # Build GH URL if "github.com" not in repo_name: repo_url = f"https://github.com/{repo_name}.git" else: repo_url = repo_name checkout_dir = projects_dir / f"{inst_id}" - patch_str = None + # Clone or reuse try: - if checkout_dir.exists(): - logging.info(f"Removing pre-existing directory: {checkout_dir}") - shutil.rmtree(checkout_dir) + if not checkout_dir.exists(): + logging.info(f"Cloning {repo_url} -> {checkout_dir}") + repo = Repo.clone_from(repo_url, checkout_dir) + else: + # if reuse_repo + if reuse_repo: + logging.info(f"Reusing existing directory: {checkout_dir}") + repo = Repo(checkout_dir) + else: + logging.info(f"Deleting existing directory: {checkout_dir}") + shutil.rmtree(checkout_dir) + repo = Repo.clone_from(repo_url, checkout_dir) - # Clone and checkout - repo = Repo.clone_from(repo_url, checkout_dir) + # checkout commit repo.git.checkout(commit) - # Set up local Python venv & install dependencies - venv_dir = setup_repo_venv(checkout_dir, repo_name=repo_name) + # set up venv + deps + setup_venv_and_deps(checkout_dir, repo_name, force_venv) - # Run ra-aid - patch_str = run_raaid( - checkout_dir, - venv_dir, - problem_statement, - fail_tests, - pass_tests - ) + # build prompt, run ra-aid + prompt_text = build_prompt(problem_statement, fail_tests, pass_tests) + patch = uv_run_raaid(checkout_dir, prompt_text) + + return { + "instance_id": inst_id, + "model_patch": patch if patch else "", + "model_name_or_path": "ra-aid" + } except Exception as e: logging.error(f"Failed to process {repo_url}:{commit} - {e}") @@ -345,31 +268,19 @@ def process_instance( "model_patch": "", "model_name_or_path": "ra-aid" } - finally: - if cleanup: - logging.info(f"Cleaning up directory: {checkout_dir}") - shutil.rmtree(checkout_dir, ignore_errors=True) - - return { - "instance_id": inst_id, - "model_patch": patch_str if patch_str else "", - "model_name_or_path": "ra-aid" - } - def main() -> None: - parser = argparse.ArgumentParser( - description="Generate predictions for SWE-bench Lite using ra-aid." - ) + parser = argparse.ArgumentParser(description="Generate predictions for SWE-bench Lite using uv + ra-aid.") parser.add_argument( "output_dir", type=Path, help="Directory to store prediction file" ) parser.add_argument( - "--verbose", - action="store_true", - help="Enable verbose logging" + "--projects-dir", + type=Path, + required=True, + help="Directory where projects will be cloned." ) parser.add_argument( "--num-instances", @@ -378,18 +289,24 @@ def main() -> None: help="Number of instances to process (default: all)" ) parser.add_argument( - "--projects-dir", - type=Path, - required=True, - help="Directory where projects will be cloned. Must exist or can be created." + "--reuse-repo", + action="store_true", + help="If set, do not delete an existing repo directory. We'll reuse it." ) parser.add_argument( - "--cleanup", + "--force-venv", action="store_true", - help="If set, remove the cloned repos after generating the patch." + help="If set, recreate the .venv even if it exists." + ) + parser.add_argument( + "--verbose", + action="store_true", + help="Enable verbose logging" ) args = parser.parse_args() + from datasets import load_dataset + base_dir, log_dir = create_output_dirs() setup_logging(log_dir, args.verbose) logging.info("Starting script") @@ -400,12 +317,11 @@ def main() -> None: if dataset is None: sys.exit(1) - # Combine 'dev' and 'test' splits for this dataset (there is no 'train') all_data = list(dataset["dev"]) + list(dataset["test"]) args.output_dir.mkdir(parents=True, exist_ok=True) predictions_file = args.output_dir / "predictions.json" - predictions = [] + predictions: List[Dict[str, str]] = [] limit = args.num_instances if args.num_instances else len(all_data) @@ -419,24 +335,15 @@ def main() -> None: for i, inst in enumerate(all_data): if i >= limit: break - try: - pred = process_instance( - inst, - projects_dir=args.projects_dir, - cleanup=args.cleanup - ) - predictions.append(pred) - except Exception as e: - logging.error(f"Error processing instance: {inst.get('instance_id', '')} - {e}") - finally: - progress.advance(task) + pred = process_instance(inst, args.projects_dir, args.reuse_repo, args.force_venv) + predictions.append(pred) + progress.advance(task) with open(predictions_file, "w", encoding="utf-8") as f: json.dump(predictions, f, indent=2) logging.info("Done generating predictions.") - if __name__ == "__main__": try: main()