From 1e2917990e9a0a587a33ad342652c4eb1d42144c Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Mon, 30 Dec 2024 14:01:14 -0500
Subject: [PATCH] SWEBench updates.

---
 scripts/generate_swebench_dataset.py | 243 +++++++++++++++++++++++----
 1 file changed, 210 insertions(+), 33 deletions(-)
diff --git a/scripts/generate_swebench_dataset.py b/scripts/generate_swebench_dataset.py
index 380e0ba..acc8f24 100755
--- a/scripts/generate_swebench_dataset.py
+++ b/scripts/generate_swebench_dataset.py
@@ -3,10 +3,32 @@
 Script to generate predictions for SWE-bench Lite (princeton-nlp/SWE-bench_Lite).
 This script:
 - Loads the SWE-bench Lite dataset
-- Clones each repo at the specified commit
+- For each instance, clones the repo at the specified commit into a user-defined projects directory
+- Creates a dedicated Python virtual environment in the cloned repo using 'uv venv'
+  (the default system Python is used unless overridden in the `PYTHON_VERSION_OVERRIDES` dictionary)
+- Installs `ra-aid` (in editable mode) plus any project dependencies from:
+    - pyproject.toml (pip install .)
+    - requirements.txt
+    - requirements-dev.txt
 - Forms a prompt from the instance fields (problem_statement, FAIL_TO_PASS, PASS_TO_PASS)
-- Calls ra-aid to create a patch
+- Calls ra-aid (from the venv) to create a patch
 - Writes out predictions in the required JSON format
+
+Additionally, we provide an internal dictionary for per-project Python version overrides:
+  e.g.:
+
+    PYTHON_VERSION_OVERRIDES = {
+        "org/repo": "3.9",
+        "some-other-org/another-repo": "3.10",
+    }
+
+If a repo name is not found in that dictionary, this script will just use the default system Python.
+
+Required parameters:
+  --projects-dir : Directory where all repos are cloned.
+
+Optional parameters:
+  --cleanup      : If set, remove the cloned repos after processing.
 """
 
 import argparse
@@ -15,7 +37,6 @@ import logging
 import shutil
 import subprocess
 import sys
-import tempfile
 from datetime import datetime
 from pathlib import Path
 from typing import Optional, Tuple, Dict, Any, List
@@ -26,6 +47,14 @@ from rich.logging import RichHandler
 from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
 
 
+# If you'd like to override Python versions for specific repos:
+# For example: "pandas-dev/pandas": "3.9"
+PYTHON_VERSION_OVERRIDES = {
+    # "org/repo": "3.9",
+    # "another-org/another-repo": "3.10",
+}
+
+
 def setup_logging(log_dir: Path, verbose: bool = False) -> None:
     """Configure logging with both file and console handlers."""
     log_dir.mkdir(parents=True, exist_ok=True)
@@ -71,14 +100,122 @@ def create_output_dirs() -> Tuple[Path, Path]:
     return base_dir, log_dir
 
 
+def install_local_raaid(pip_path: Path) -> None:
+    """
+    Install ra-aid (in editable mode) into the local environment.
+    We assume that this script lives in <repo_root>/scripts, so the
+    root directory is one level up from __file__.
+    """
+    script_dir = Path(__file__).resolve().parent
+    repo_root = script_dir.parent  # one level up
+    try:
+        subprocess.run(
+            [str(pip_path), "install", "-e", str(repo_root)],
+            cwd=str(repo_root),
+            check=True
+        )
+    except Exception as e:
+        logging.error(f"Failed to install ra-aid in editable mode from {repo_root}: {e}")
+
+
+def setup_repo_venv(repo_dir: Path, repo_name: str) -> Path:
+    """
+    Create a Python virtual environment in `repo_dir/.venv` using `uv venv`.
+    Installs:
+      - local ra-aid (editable mode)
+      - pyproject.toml => pip install .
+      - requirements.txt => pip install -r ...
+      - requirements-dev.txt => pip install -r ...
+
+    Steps to determine Python version:
+      1) Check the PYTHON_VERSION_OVERRIDES dict for the given repo_name.
+         If found, use that as the --python=<version> argument.
+      2) Otherwise, let uv pick the default system Python.
+
+    Returns:
+        Path to the .venv directory
+    """
+    venv_dir = repo_dir / ".venv"
+
+    # Check for Python version override
+    python_version = PYTHON_VERSION_OVERRIDES.get(repo_name, None)
+
+    # Construct the uv command
+    uv_cmd = ["uv", "venv"]
+    if python_version:
+        uv_cmd.append(f"--python={python_version}")
+    uv_cmd.append(str(venv_dir))
+
+    try:
+        subprocess.run(uv_cmd, cwd=repo_dir, check=True)
+    except Exception as e:
+        logging.error(f"Failed to create venv in {repo_dir} using uv: {e}")
+        return venv_dir  # Return anyway for partial info
+
+    pip_path = venv_dir / "bin" / "pip"
+
+    # Upgrade pip
+    try:
+        subprocess.run(
+            [str(pip_path), "install", "--upgrade", "pip"],
+            cwd=repo_dir,
+            check=False
+        )
+    except Exception as e:
+        logging.error(f"Failed to upgrade pip in {venv_dir}: {e}")
+
+    # 1) Install ra-aid in editable mode from our local repo
+    install_local_raaid(pip_path)
+
+    # 2) If pyproject.toml is present, install local project
+    pyproject_path = repo_dir / "pyproject.toml"
+    if pyproject_path.is_file():
+        try:
+            subprocess.run(
+                [str(pip_path), "install", "."],
+                cwd=repo_dir,
+                check=True
+            )
+        except Exception as e:
+            logging.error(f"Failed to install project from pyproject.toml in {repo_dir}: {e}")
+
+    # 3) If requirements.txt is present
+    req_path = repo_dir / "requirements.txt"
+    if req_path.is_file():
+        try:
+            subprocess.run(
+                [str(pip_path), "install", "-r", str(req_path)],
+                cwd=repo_dir,
+                check=True
+            )
+        except Exception as e:
+            logging.error(f"Failed to install from requirements.txt: {e}")
+
+    # 4) If requirements-dev.txt is present
+    req_dev_path = repo_dir / "requirements-dev.txt"
+    if req_dev_path.is_file():
+        try:
+            subprocess.run(
+                [str(pip_path), "install", "-r", str(req_dev_path)],
+                cwd=repo_dir,
+                check=True
+            )
+        except Exception as e:
+            logging.error(f"Failed to install from requirements-dev.txt: {e}")
+
+    return venv_dir
+
+
 def run_raaid(
     repo_dir: Path,
+    venv_dir: Path,
     problem_statement: str,
     fail_tests: List[str],
     pass_tests: List[str]
 ) -> Optional[str]:
-    """Run ra-aid on the problem statement, returning a generated patch if possible."""
-    # Create prompt
+    """
+    Run ra-aid on the problem statement (using the local venv), returning a generated patch if possible.
+    """
     prompt = f"{problem_statement}\n\nTests that need to be fixed:\n```\n"
     for t in fail_tests:
         prompt += f"- {t}\n"
@@ -89,13 +226,14 @@ def run_raaid(
             prompt += f"- {t}\n"
         prompt += "```\n\n"
 
-    # Implementation phase
+    # Use ra-aid from the local venv
+    raaid_exe = venv_dir / "bin" / "ra-aid"
     impl_cmd = [
-        'ra-aid',
+        str(raaid_exe),
         '--cowboy-mode',
         '-m', prompt,
     ]
-    
+
     try:
         impl_result = subprocess.run(
             impl_cmd,
@@ -116,7 +254,6 @@ def run_raaid(
         logging.error(f"ra-aid error: {e}")
         return None
 
-    # Collect patch
     repo = Repo(repo_dir)
     patch = get_git_patch(repo)
     return patch
@@ -139,11 +276,17 @@ def get_git_patch(repo: Repo) -> Optional[str]:
         return None
 
 
-def process_instance(instance: Dict[str, Any], output_repo_dir: Path) -> Dict[str, Any]:
+def process_instance(
+    instance: Dict[str, Any],
+    projects_dir: Path,
+    cleanup: bool
+) -> Dict[str, Any]:
     """
     Process a single dataset instance:
-    - Clone the repo
+    - Clone the repo into projects_dir/<instance_id>
     - Checkout commit
+    - Build a local Python venv in that repo (checking override dict)
+    - Install ra-aid + any project dependencies
     - Build prompt from problem_statement, FAIL_TO_PASS, PASS_TO_PASS
     - Return dict in required format:
         {
@@ -151,6 +294,7 @@ def process_instance(instance: Dict[str, Any], output_repo_dir: Path) -> Dict[st
             "model_patch": ...,
             "model_name_or_path": ...
         }
+    - If cleanup is True, remove the cloned repo after generating a patch
     """
     inst_id = instance.get("instance_id", "<unknown>")
     repo_name = instance["repo"]
@@ -159,37 +303,53 @@ def process_instance(instance: Dict[str, Any], output_repo_dir: Path) -> Dict[st
     fail_tests = instance.get("FAIL_TO_PASS", [])
     pass_tests = instance.get("PASS_TO_PASS", [])
 
-    # Convert to lists if they're strings
     if isinstance(fail_tests, str):
         fail_tests = [fail_tests]
     if isinstance(pass_tests, str):
         pass_tests = [pass_tests]
 
-    # Attempt to build a github url if not provided
-    # If 'repo' is "org/repo", create https://github.com/org/repo.git
+    # Build GitHub URL
     if "github.com" not in repo_name:
         repo_url = f"https://github.com/{repo_name}.git"
     else:
         repo_url = repo_name
 
+    checkout_dir = projects_dir / f"{inst_id}"
     patch_str = None
-    with tempfile.TemporaryDirectory() as tmp:
-        tmp_path = Path(tmp)
-        try:
-            # Clone & checkout
-            repo = Repo.clone_from(repo_url, tmp_path)
-            repo.git.checkout(commit)
-        except Exception as e:
-            logging.error(f"Failed to clone/check out {repo_url}:{commit} - {e}")
-            return {
-                "instance_id": inst_id,
-                "model_patch": "",
-                "model_name_or_path": "ra-aid"
-            }
-        # Run ra-aid
-        patch_str = run_raaid(tmp_path, problem_statement, fail_tests, pass_tests)
 
-    # Return required prediction structure
+    try:
+        if checkout_dir.exists():
+            logging.info(f"Removing pre-existing directory: {checkout_dir}")
+            shutil.rmtree(checkout_dir)
+
+        # Clone and checkout
+        repo = Repo.clone_from(repo_url, checkout_dir)
+        repo.git.checkout(commit)
+
+        # Set up local Python venv & install dependencies
+        venv_dir = setup_repo_venv(checkout_dir, repo_name=repo_name)
+
+        # Run ra-aid
+        patch_str = run_raaid(
+            checkout_dir,
+            venv_dir,
+            problem_statement,
+            fail_tests,
+            pass_tests
+        )
+
+    except Exception as e:
+        logging.error(f"Failed to process {repo_url}:{commit} - {e}")
+        return {
+            "instance_id": inst_id,
+            "model_patch": "",
+            "model_name_or_path": "ra-aid"
+        }
+    finally:
+        if cleanup:
+            logging.info(f"Cleaning up directory: {checkout_dir}")
+            shutil.rmtree(checkout_dir, ignore_errors=True)
+
     return {
         "instance_id": inst_id,
         "model_patch": patch_str if patch_str else "",
@@ -217,17 +377,30 @@ def main() -> None:
         default=None,
         help="Number of instances to process (default: all)"
     )
+    parser.add_argument(
+        "--projects-dir",
+        type=Path,
+        required=True,
+        help="Directory where projects will be cloned. Must exist or can be created."
+    )
+    parser.add_argument(
+        "--cleanup",
+        action="store_true",
+        help="If set, remove the cloned repos after generating the patch."
+    )
     args = parser.parse_args()
 
     base_dir, log_dir = create_output_dirs()
     setup_logging(log_dir, args.verbose)
     logging.info("Starting script")
 
+    args.projects_dir.mkdir(parents=True, exist_ok=True)
+
     dataset = load_dataset_safely()
     if dataset is None:
         sys.exit(1)
 
-    # Combine "dev" and "test" splits (no "train" in this dataset)
+    # Combine 'dev' and 'test' splits for this dataset (there is no 'train')
     all_data = list(dataset["dev"]) + list(dataset["test"])
 
     args.output_dir.mkdir(parents=True, exist_ok=True)
@@ -247,7 +420,11 @@ def main() -> None:
             if i >= limit:
                 break
             try:
-                pred = process_instance(inst, args.output_dir)
+                pred = process_instance(
+                    inst,
+                    projects_dir=args.projects_dir,
+                    cleanup=args.cleanup
+                )
                 predictions.append(pred)
             except Exception as e:
                 logging.error(f"Error processing instance: {inst.get('instance_id', '')} - {e}")
@@ -268,4 +445,4 @@ if __name__ == "__main__":
         sys.exit(1)
     except Exception as e:
         logging.exception("Unhandled error occurred.")
-        sys.exit(1)
\ No newline at end of file
+        sys.exit(1)