import os import pytest from ra_aid.text.processing import extract_think_tag def test_basic_extraction(): """Test basic extraction of think tag content.""" content = "This is a testRemaining content" expected_extracted = "This is a test" expected_remaining = "Remaining content" extracted, remaining = extract_think_tag(content) assert extracted == expected_extracted assert remaining == expected_remaining def test_multiline_extraction(): """Test extraction of multiline think tag content.""" content = "Line 1\nLine 2\nLine 3Remaining content" expected_extracted = "Line 1\nLine 2\nLine 3" expected_remaining = "Remaining content" extracted, remaining = extract_think_tag(content) assert extracted == expected_extracted assert remaining == expected_remaining def test_multiple_think_tags(): """Test that only the first think tag is extracted.""" content = "First tagMiddleSecond tagEnd" expected_extracted = "First tag" expected_remaining = "MiddleSecond tagEnd" extracted, remaining = extract_think_tag(content) assert extracted == expected_extracted assert remaining == expected_remaining def test_no_think_tag(): """Test behavior when no think tag is present.""" content = "This is a string without a think tag" extracted, remaining = extract_think_tag(content) assert extracted is None assert remaining == content def test_empty_think_tag(): """Test extraction of an empty think tag.""" content = "Remaining content" expected_extracted = "" expected_remaining = "Remaining content" extracted, remaining = extract_think_tag(content) assert extracted == expected_extracted assert remaining == expected_remaining def test_whitespace_handling(): """Test whitespace handling in think tag extraction.""" content = " \n Content with whitespace \n Remaining content" expected_extracted = " \n Content with whitespace \n " expected_remaining = "Remaining content" extracted, remaining = extract_think_tag(content) assert extracted == expected_extracted assert remaining == expected_remaining def test_tag_not_at_start(): """Test behavior when think tag is not at the start of the string.""" content = "Some content before Think contentRemaining content" extracted, remaining = extract_think_tag(content) assert extracted is None assert remaining == content def test_sample_data(): """Test extraction using sample data from tests/data/think-tag/sample_1.txt.""" # Get the absolute path to the sample file current_dir = os.path.dirname(os.path.abspath(__file__)) sample_file_path = os.path.join(current_dir, '..', '..', 'data', 'think-tag', 'sample_1.txt') # Read the sample data with open(sample_file_path, 'r', encoding='utf-8') as f: sample_data = f.read() # Extract the think tag extracted, remaining = extract_think_tag(sample_data) # Check that extraction worked assert extracted is not None assert "Okay, the user wants me to write a" in extracted assert "return 0;" in extracted # Check that we got the think tag content without the tags assert not extracted.startswith("") assert not extracted.endswith("") # Check that the remaining content doesn't contain the think tag assert "" not in remaining assert "" not in remaining