SWEBench updates.

2024-12-30 17:54:21 -05:00 · 2024-12-30 17:54:21 -05:00 · bf86de2597
parent 1e2917990e
commit bf86de2597
1 changed files with 157 additions and 250 deletions
--- a/scripts/generate_swebench_dataset.py
+++ b/scripts/generate_swebench_dataset.py
@ -1,34 +1,15 @@
 #!/usr/bin/env python3
 """
 Script to generate predictions for SWE-bench Lite (princeton-nlp/SWE-bench_Lite).
-This script:
+This version uses 'uv venv' and 'uv pip' / 'uv run ra-aid' commands to manage everything in the environment.
+
+It:
 - Loads the SWE-bench Lite dataset
- For each instance, clones the repo at the specified commit into a user-defined projects directory
- Creates a dedicated Python virtual environment in the cloned repo using 'uv venv'
-  (the default system Python is used unless overridden in the `PYTHON_VERSION_OVERRIDES` dictionary)
- Installs `ra-aid` (in editable mode) plus any project dependencies from:
-    - pyproject.toml (pip install .)
-    - requirements.txt
-    - requirements-dev.txt
- Forms a prompt from the instance fields (problem_statement, FAIL_TO_PASS, PASS_TO_PASS)
- Calls ra-aid (from the venv) to create a patch
- Writes out predictions in the required JSON format
-
-Additionally, we provide an internal dictionary for per-project Python version overrides:
-  e.g.:
-
-    PYTHON_VERSION_OVERRIDES = {
-        "org/repo": "3.9",
-        "some-other-org/another-repo": "3.10",
-    }
-
-If a repo name is not found in that dictionary, this script will just use the default system Python.
-
-Required parameters:
-  --projects-dir : Directory where all repos are cloned.
-
-Optional parameters:
-  --cleanup      : If set, remove the cloned repos after processing.
+- For each instance, clones (or reuses) the repo at the specified commit
+- Creates or reuses a dedicated Python virtual environment via `uv venv`
+- Installs `ra-aid` in editable mode + any project dependencies via `uv pip`
+- Calls `uv run ra-aid` to generate a patch
+- Writes out predictions in JSON format
 """

 import argparse
@ -41,20 +22,15 @@ from datetime import datetime
 from pathlib import Path
 from typing import Optional, Tuple, Dict, Any, List

-from datasets import load_dataset
 from git import Repo
 from rich.logging import RichHandler
 from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn

-
 # If you'd like to override Python versions for specific repos:
-# For example: "pandas-dev/pandas": "3.9"
 PYTHON_VERSION_OVERRIDES = {
-    # "org/repo": "3.9",
-    # "another-org/another-repo": "3.10",
+    # "someorg/somerepo": "3.9",
 }

-
 def setup_logging(log_dir: Path, verbose: bool = False) -> None:
    """Configure logging with both file and console handlers."""
    log_dir.mkdir(parents=True, exist_ok=True)
@ -79,17 +55,16 @@ def setup_logging(log_dir: Path, verbose: bool = False) -> None:
    console_handler.setLevel(logging.DEBUG if verbose else logging.INFO)
    root_logger.addHandler(console_handler)

-
 def load_dataset_safely() -> Optional[Any]:
    """Load SWE-bench Lite dataset with error handling."""
    try:
+        from datasets import load_dataset
        dataset = load_dataset("princeton-nlp/SWE-bench_Lite")
        return dataset
    except Exception as e:
        logging.error(f"Failed to load dataset: {e}")
        return None

-
 def create_output_dirs() -> Tuple[Path, Path]:
    """Create base/log directory structure."""
    date_str = datetime.now().strftime("%Y%m%d")
@ -99,122 +74,124 @@ def create_output_dirs() -> Tuple[Path, Path]:
    log_dir.mkdir(parents=True, exist_ok=True)
    return base_dir, log_dir

-
-def install_local_raaid(pip_path: Path) -> None:
+def uv_venv(repo_dir: Path, repo_name: str, force_venv: bool) -> None:
    """
-    Install ra-aid (in editable mode) into the local environment.
-    We assume that this script lives in <repo_root>/scripts, so the
-    root directory is one level up from __file__.
-    """
-    script_dir = Path(__file__).resolve().parent
-    repo_root = script_dir.parent  # one level up
-    try:
-        subprocess.run(
-            [str(pip_path), "install", "-e", str(repo_root)],
-            cwd=str(repo_root),
-            check=True
-        )
-    except Exception as e:
-        logging.error(f"Failed to install ra-aid in editable mode from {repo_root}: {e}")
+    Create (or reuse) a .venv in 'repo_dir' using 'uv venv'.
+    If force_venv is True, we remove .venv first.

-
-def setup_repo_venv(repo_dir: Path, repo_name: str) -> Path:
-    """
-    Create a Python virtual environment in `repo_dir/.venv` using `uv venv`.
-    Installs:
-      - local ra-aid (editable mode)
-      - pyproject.toml => pip install .
-      - requirements.txt => pip install -r ...
-      - requirements-dev.txt => pip install -r ...
-
-    Steps to determine Python version:
-      1) Check the PYTHON_VERSION_OVERRIDES dict for the given repo_name.
-         If found, use that as the --python=<version> argument.
-      2) Otherwise, let uv pick the default system Python.
-
-    Returns:
-        Path to the .venv directory
+    Example command:
+      uv venv .venv --python=3.9
    """
    venv_dir = repo_dir / ".venv"
+    if venv_dir.exists() and force_venv:
+        logging.info(f"Removing existing .venv at {venv_dir}")
+        shutil.rmtree(venv_dir)

-    # Check for Python version override
    python_version = PYTHON_VERSION_OVERRIDES.get(repo_name, None)
-
-    # Construct the uv command
-    uv_cmd = ["uv", "venv"]
+    cmd = ["uv", "venv"]
    if python_version:
-        uv_cmd.append(f"--python={python_version}")
-    uv_cmd.append(str(venv_dir))
+        cmd.append(f"--python={python_version}")
+    cmd.append(".venv")

    try:
-        subprocess.run(uv_cmd, cwd=repo_dir, check=True)
+        subprocess.run(cmd, cwd=repo_dir, check=True)
    except Exception as e:
-        logging.error(f"Failed to create venv in {repo_dir} using uv: {e}")
-        return venv_dir  # Return anyway for partial info
+        logging.error(f"Failed to create venv in {repo_dir}: {e}")

-    pip_path = venv_dir / "bin" / "pip"
-
-    # Upgrade pip
+def uv_pip_install(repo_dir: Path, args: List[str]) -> None:
+    """
+    Run 'uv pip install ...' in the specified repo_dir.
+    Example: uv_pip_install(repo_dir, ["--upgrade", "pip"])
+    """
+    cmd = ["uv", "pip", "install"] + args
    try:
-        subprocess.run(
-            [str(pip_path), "install", "--upgrade", "pip"],
-            cwd=repo_dir,
-            check=False
-        )
+        subprocess.run(cmd, cwd=repo_dir, check=True)
    except Exception as e:
-        logging.error(f"Failed to upgrade pip in {venv_dir}: {e}")
+        logging.error(f"Failed to run uv pip install {args}: {e}")

-    # 1) Install ra-aid in editable mode from our local repo
-    install_local_raaid(pip_path)
+def uv_run_raaid(repo_dir: Path, prompt: str) -> Optional[str]:
+    """
+    Call 'uv run ra-aid' with the given prompt in the environment.
+    Returns the patch if successful, else None.
+    """
+    cmd = [
+        "uv", "run", "ra-aid",
+        "--cowboy-mode",
+        "-m", prompt
+    ]
+    try:
+        result = subprocess.run(cmd, cwd=repo_dir, text=True, capture_output=True, timeout=300)
+        if result.returncode != 0:
+            logging.error("ra-aid returned non-zero exit code.")
+            logging.debug(f"stdout: {result.stdout}")
+            logging.debug(f"stderr: {result.stderr}")
+            return None
+    except subprocess.TimeoutExpired:
+        logging.error("ra-aid timed out")
+        return None
+    except Exception as e:
+        logging.error(f"ra-aid error: {e}")
+        return None

-    # 2) If pyproject.toml is present, install local project
+    # Collect patch
+    patch = get_git_patch(repo_dir)
+    return patch
+
+def get_git_patch(repo_dir: Path) -> Optional[str]:
+    """Generate a git patch from the current changes in `repo_dir`."""
+    try:
+        repo = Repo(repo_dir)
+        if not repo.is_dirty():
+            logging.info("No changes detected in repository.")
+            return None
+        patch_text = repo.git.diff(unified=3)
+        if not patch_text.strip():
+            return None
+        if not any(line.startswith('+') for line in patch_text.splitlines()):
+            return None
+        return patch_text
+    except Exception as e:
+        logging.error(f"Failed to generate patch: {e}")
+        return None
+
+def setup_venv_and_deps(repo_dir: Path, repo_name: str, force_venv: bool) -> None:
+    """
+    - uv venv .venv --python=xxx (optional)
+    - uv pip install --upgrade pip
+    - uv pip install -e <ra-aid local path>
+    - If pyproject.toml -> uv pip install .
+    - If requirements.txt -> uv pip install -r requirements.txt
+    - If requirements-dev.txt -> uv pip install -r requirements-dev.txt
+    """
+    uv_venv(repo_dir, repo_name, force_venv)
+
+    # Now uv pip install ...
+    # 1) upgrade pip
+    uv_pip_install(repo_dir, ["--upgrade", "pip"])
+
+    # 2) install ra-aid from local path
+    script_dir = Path(__file__).resolve().parent
+    ra_aid_root = script_dir.parent  # one level up from scripts
+    uv_pip_install(repo_dir, ["-e", str(ra_aid_root)])
+
+    # 3) optional pyproject
    pyproject_path = repo_dir / "pyproject.toml"
    if pyproject_path.is_file():
-        try:
-            subprocess.run(
-                [str(pip_path), "install", "."],
-                cwd=repo_dir,
-                check=True
-            )
-        except Exception as e:
-            logging.error(f"Failed to install project from pyproject.toml in {repo_dir}: {e}")
+        uv_pip_install(repo_dir, ["."])

-    # 3) If requirements.txt is present
-    req_path = repo_dir / "requirements.txt"
-    if req_path.is_file():
-        try:
-            subprocess.run(
-                [str(pip_path), "install", "-r", str(req_path)],
-                cwd=repo_dir,
-                check=True
-            )
-        except Exception as e:
-            logging.error(f"Failed to install from requirements.txt: {e}")
+    # 4) optional requirements.txt
+    req_file = repo_dir / "requirements.txt"
+    if req_file.is_file():
+        uv_pip_install(repo_dir, ["-r", "requirements.txt"])

-    # 4) If requirements-dev.txt is present
-    req_dev_path = repo_dir / "requirements-dev.txt"
-    if req_dev_path.is_file():
-        try:
-            subprocess.run(
-                [str(pip_path), "install", "-r", str(req_dev_path)],
-                cwd=repo_dir,
-                check=True
-            )
-        except Exception as e:
-            logging.error(f"Failed to install from requirements-dev.txt: {e}")
+    # 5) optional requirements-dev.txt
+    req_dev_file = repo_dir / "requirements-dev.txt"
+    if req_dev_file.is_file():
+        uv_pip_install(repo_dir, ["-r", "requirements-dev.txt"])

-    return venv_dir
-
-
-def run_raaid(
-    repo_dir: Path,
-    venv_dir: Path,
-    problem_statement: str,
-    fail_tests: List[str],
-    pass_tests: List[str]
-) -> Optional[str]:
+def build_prompt(problem_statement: str, fail_tests: List[str], pass_tests: List[str]) -> str:
    """
-    Run ra-aid on the problem statement (using the local venv), returning a generated patch if possible.
+    Construct the prompt text from problem_statement, FAIL_TO_PASS, PASS_TO_PASS.
    """
    prompt = f"{problem_statement}\n\nTests that need to be fixed:\n```\n"
    for t in fail_tests:
@ -225,77 +202,14 @@ def run_raaid(
        for t in pass_tests:
            prompt += f"- {t}\n"
        prompt += "```\n\n"
-
-    # Use ra-aid from the local venv
-    raaid_exe = venv_dir / "bin" / "ra-aid"
-    impl_cmd = [
-        str(raaid_exe),
-        '--cowboy-mode',
-        '-m', prompt,
-    ]
-
-    try:
-        impl_result = subprocess.run(
-            impl_cmd,
-            cwd=repo_dir,
-            capture_output=True,
-            text=True,
-            timeout=300
-        )
-        if impl_result.returncode != 0:
-            logging.error("ra-aid returned non-zero exit code.")
-            logging.debug(f"stdout: {impl_result.stdout}")
-            logging.debug(f"stderr: {impl_result.stderr}")
-            return None
-    except subprocess.TimeoutExpired:
-        logging.error("ra-aid implementation phase timed out.")
-        return None
-    except Exception as e:
-        logging.error(f"ra-aid error: {e}")
-        return None
-
-    repo = Repo(repo_dir)
-    patch = get_git_patch(repo)
-    return patch
-
-
-def get_git_patch(repo: Repo) -> Optional[str]:
-    """Generate a git patch for current changes."""
-    if not repo.is_dirty():
-        logging.info("No repo changes detected.")
-        return None
-    try:
-        patch = repo.git.diff(unified=3)
-        if not patch or not patch.strip():
-            return None
-        if not any(line.startswith('+') for line in patch.splitlines()):
-            return None
-        return patch
-    except Exception as e:
-        logging.error(f"Failed to generate patch: {e}")
-        return None
-
+    return prompt

 def process_instance(
    instance: Dict[str, Any],
    projects_dir: Path,
-    cleanup: bool
+    reuse_repo: bool,
+    force_venv: bool
 ) -> Dict[str, Any]:
-    """
-    Process a single dataset instance:
-    - Clone the repo into projects_dir/<instance_id>
-    - Checkout commit
-    - Build a local Python venv in that repo (checking override dict)
-    - Install ra-aid + any project dependencies
-    - Build prompt from problem_statement, FAIL_TO_PASS, PASS_TO_PASS
-    - Return dict in required format:
-        {
-            "instance_id": ...,
-            "model_patch": ...,
-            "model_name_or_path": ...
-        }
-    - If cleanup is True, remove the cloned repo after generating a patch
-    """
    inst_id = instance.get("instance_id", "<unknown>")
    repo_name = instance["repo"]
    commit = instance["base_commit"]
@ -308,35 +222,44 @@ def process_instance(
    if isinstance(pass_tests, str):
        pass_tests = [pass_tests]

-    # Build GitHub URL
+    # Build GH URL
    if "github.com" not in repo_name:
        repo_url = f"https://github.com/{repo_name}.git"
    else:
        repo_url = repo_name

    checkout_dir = projects_dir / f"{inst_id}"
-    patch_str = None

+    # Clone or reuse
    try:
-        if checkout_dir.exists():
-            logging.info(f"Removing pre-existing directory: {checkout_dir}")
-            shutil.rmtree(checkout_dir)
+        if not checkout_dir.exists():
+            logging.info(f"Cloning {repo_url} -> {checkout_dir}")
+            repo = Repo.clone_from(repo_url, checkout_dir)
+        else:
+            # if reuse_repo
+            if reuse_repo:
+                logging.info(f"Reusing existing directory: {checkout_dir}")
+                repo = Repo(checkout_dir)
+            else:
+                logging.info(f"Deleting existing directory: {checkout_dir}")
+                shutil.rmtree(checkout_dir)
+                repo = Repo.clone_from(repo_url, checkout_dir)

-        # Clone and checkout
-        repo = Repo.clone_from(repo_url, checkout_dir)
+        # checkout commit
        repo.git.checkout(commit)

-        # Set up local Python venv & install dependencies
-        venv_dir = setup_repo_venv(checkout_dir, repo_name=repo_name)
+        # set up venv + deps
+        setup_venv_and_deps(checkout_dir, repo_name, force_venv)

-        # Run ra-aid
-        patch_str = run_raaid(
-            checkout_dir,
-            venv_dir,
-            problem_statement,
-            fail_tests,
-            pass_tests
-        )
+        # build prompt, run ra-aid
+        prompt_text = build_prompt(problem_statement, fail_tests, pass_tests)
+        patch = uv_run_raaid(checkout_dir, prompt_text)
+
+        return {
+            "instance_id": inst_id,
+            "model_patch": patch if patch else "",
+            "model_name_or_path": "ra-aid"
+        }

    except Exception as e:
        logging.error(f"Failed to process {repo_url}:{commit} - {e}")
@ -345,31 +268,19 @@ def process_instance(
            "model_patch": "",
            "model_name_or_path": "ra-aid"
        }
-    finally:
-        if cleanup:
-            logging.info(f"Cleaning up directory: {checkout_dir}")
-            shutil.rmtree(checkout_dir, ignore_errors=True)
-
-    return {
-        "instance_id": inst_id,
-        "model_patch": patch_str if patch_str else "",
-        "model_name_or_path": "ra-aid"
-    }
-

 def main() -> None:
-    parser = argparse.ArgumentParser(
-        description="Generate predictions for SWE-bench Lite using ra-aid."
-    )
+    parser = argparse.ArgumentParser(description="Generate predictions for SWE-bench Lite using uv + ra-aid.")
    parser.add_argument(
        "output_dir",
        type=Path,
        help="Directory to store prediction file"
    )
    parser.add_argument(
-        "--verbose",
-        action="store_true",
-        help="Enable verbose logging"
+        "--projects-dir",
+        type=Path,
+        required=True,
+        help="Directory where projects will be cloned."
    )
    parser.add_argument(
        "--num-instances",
@ -378,18 +289,24 @@ def main() -> None:
        help="Number of instances to process (default: all)"
    )
    parser.add_argument(
-        "--projects-dir",
-        type=Path,
-        required=True,
-        help="Directory where projects will be cloned. Must exist or can be created."
+        "--reuse-repo",
+        action="store_true",
+        help="If set, do not delete an existing repo directory. We'll reuse it."
    )
    parser.add_argument(
-        "--cleanup",
+        "--force-venv",
        action="store_true",
-        help="If set, remove the cloned repos after generating the patch."
+        help="If set, recreate the .venv even if it exists."
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Enable verbose logging"
    )
    args = parser.parse_args()

+    from datasets import load_dataset
+
    base_dir, log_dir = create_output_dirs()
    setup_logging(log_dir, args.verbose)
    logging.info("Starting script")
@ -400,12 +317,11 @@ def main() -> None:
    if dataset is None:
        sys.exit(1)

-    # Combine 'dev' and 'test' splits for this dataset (there is no 'train')
    all_data = list(dataset["dev"]) + list(dataset["test"])

    args.output_dir.mkdir(parents=True, exist_ok=True)
    predictions_file = args.output_dir / "predictions.json"
-    predictions = []
+    predictions: List[Dict[str, str]] = []

    limit = args.num_instances if args.num_instances else len(all_data)

@ -419,24 +335,15 @@ def main() -> None:
        for i, inst in enumerate(all_data):
            if i >= limit:
                break
-            try:
-                pred = process_instance(
-                    inst,
-                    projects_dir=args.projects_dir,
-                    cleanup=args.cleanup
-                )
-                predictions.append(pred)
-            except Exception as e:
-                logging.error(f"Error processing instance: {inst.get('instance_id', '')} - {e}")
-            finally:
-                progress.advance(task)
+            pred = process_instance(inst, args.projects_dir, args.reuse_repo, args.force_venv)
+            predictions.append(pred)
+            progress.advance(task)

    with open(predictions_file, "w", encoding="utf-8") as f:
        json.dump(predictions, f, indent=2)

    logging.info("Done generating predictions.")

-
 if __name__ == "__main__":
    try:
        main()