jack
/
r2rpy


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
							# type: ignore
import re
from typing import AsyncGenerator

from core.base.parsers.base_parser import AsyncParser
from core.base.providers import (
    CompletionProvider,
    DatabaseProvider,
    IngestionConfig,
)


class CSSParser(AsyncParser[str | bytes]):
    """A parser for CSS files."""

    def __init__(
        self,
        config: IngestionConfig,
        database_provider: DatabaseProvider,
        llm_provider: CompletionProvider,
    ):
        self.database_provider = database_provider
        self.llm_provider = llm_provider
        self.config = config

    async def ingest(
        self, data: str | bytes, *args, **kwargs
    ) -> AsyncGenerator[str, None]:
        """Ingest CSS data and yield structured text representation.

        Extracts selectors, properties, values, and comments from CSS while
        preserving the structure in a text format suitable for analysis.

        :param data: The CSS content to parse
        :param kwargs: Additional keyword arguments
        """
        if isinstance(data, bytes):
            data = data.decode("utf-8", errors="ignore")

        # Process the CSS content
        processed_text = self._process_css_content(data)

        # Yield the processed text
        yield processed_text

    def _process_css_content(self, css: str) -> str:
        """Process CSS content into a structured text representation.

        This method:
        1. Extracts and preserves comments
        2. Identifies selectors and their properties
        3. Formats the CSS structure in a readable way
        """
        # Extract comments
        comments = self._extract_comments(css)

        # Extract rules (selectors and declarations)
        rules = self._extract_rules(css)

        # Build the result
        result = []

        if comments:
            result.append("COMMENTS:")
            result.extend(comments)
            result.append("")

        if rules:
            result.append("CSS RULES:")
            result.extend(rules)

        return "\n".join(result)

    def _extract_comments(self, css: str) -> list[str]:
        """Extract comments from CSS content."""
        comment_pattern = r"/\*(.*?)\*/"
        comments = re.findall(comment_pattern, css, re.DOTALL)
        return [comment.strip() for comment in comments if comment.strip()]

    def _extract_rules(self, css: str) -> list[str]:
        """Extract selectors and their declarations from CSS content."""
        # Remove comments to simplify parsing
        css_without_comments = re.sub(r"/\*.*?\*/", "", css, flags=re.DOTALL)

        # Pattern to match CSS rules
        rule_pattern = r"([^{]+)\{([^}]*)\}"
        matches = re.findall(rule_pattern, css_without_comments)

        rules = []
        for selector, declarations in matches:
            selector = selector.strip()
            if not selector:
                continue

            rules.append(f"Selector: {selector}")

            # Process declarations
            declaration_list = declarations.strip().split(";")
            for declaration in declaration_list:
                declaration = declaration.strip()
                if declaration:
                    property_value = declaration.split(":", 1)
                    if len(property_value) == 2:
                        property_name = property_value[0].strip()
                        value = property_value[1].strip()
                        rules.append(f"  {property_name}: {value}")

            rules.append("")  # Empty line for readability

        return rules