import os
import pytest
from ra_aid.text.processing import extract_think_tag
def test_basic_extraction():
"""Test basic extraction of think tag content."""
content = "This is a testRemaining content"
expected_extracted = "This is a test"
expected_remaining = "Remaining content"
extracted, remaining = extract_think_tag(content)
assert extracted == expected_extracted
assert remaining == expected_remaining
def test_multiline_extraction():
"""Test extraction of multiline think tag content."""
content = "Line 1\nLine 2\nLine 3Remaining content"
expected_extracted = "Line 1\nLine 2\nLine 3"
expected_remaining = "Remaining content"
extracted, remaining = extract_think_tag(content)
assert extracted == expected_extracted
assert remaining == expected_remaining
def test_multiple_think_tags():
"""Test that only the first think tag is extracted."""
content = "First tagMiddleSecond tagEnd"
expected_extracted = "First tag"
expected_remaining = "MiddleSecond tagEnd"
extracted, remaining = extract_think_tag(content)
assert extracted == expected_extracted
assert remaining == expected_remaining
def test_no_think_tag():
"""Test behavior when no think tag is present."""
content = "This is a string without a think tag"
extracted, remaining = extract_think_tag(content)
assert extracted is None
assert remaining == content
def test_empty_think_tag():
"""Test extraction of an empty think tag."""
content = "Remaining content"
expected_extracted = ""
expected_remaining = "Remaining content"
extracted, remaining = extract_think_tag(content)
assert extracted == expected_extracted
assert remaining == expected_remaining
def test_whitespace_handling():
"""Test whitespace handling in think tag extraction."""
content = " \n Content with whitespace \n Remaining content"
expected_extracted = " \n Content with whitespace \n "
expected_remaining = "Remaining content"
extracted, remaining = extract_think_tag(content)
assert extracted == expected_extracted
assert remaining == expected_remaining
def test_tag_not_at_start():
"""Test behavior when think tag is not at the start of the string."""
content = "Some content before Think contentRemaining content"
extracted, remaining = extract_think_tag(content)
assert extracted is None
assert remaining == content
def test_sample_data():
"""Test extraction using sample data from tests/data/think-tag/sample_1.txt."""
# Get the absolute path to the sample file
current_dir = os.path.dirname(os.path.abspath(__file__))
sample_file_path = os.path.join(current_dir, '..', '..', 'data', 'think-tag', 'sample_1.txt')
# Read the sample data
with open(sample_file_path, 'r', encoding='utf-8') as f:
sample_data = f.read()
# Extract the think tag
extracted, remaining = extract_think_tag(sample_data)
# Check that extraction worked
assert extracted is not None
assert "Okay, the user wants me to write a" in extracted
assert "return 0;" in extracted
# Check that we got the think tag content without the tags
assert not extracted.startswith("")
assert not extracted.endswith("")
# Check that the remaining content doesn't contain the think tag
assert "" not in remaining
assert "" not in remaining