jack
/
r2rpy


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
							import base64
import io
import logging
from typing import Tuple

from PIL import Image

logger = logging.getLogger()


def resize_base64_image(
    base64_string: str,
    max_size: Tuple[int, int] = (512, 512),
    max_megapixels: float = 0.25,
) -> str:
    """Aggressively resize images with better error handling and debug output"""
    logger.debug(
        f"RESIZING NOW!!! Original length: {len(base64_string)} chars"
    )

    # Decode base64 string to bytes
    try:
        image_data = base64.b64decode(base64_string)
        image = Image.open(io.BytesIO(image_data))
        logger.debug(f"Image opened successfully: {image.format} {image.size}")
    except Exception as e:
        logger.debug(f"Failed to decode/open image: {e}")
        # Emergency fallback - truncate the base64 string to reduce tokens
        if len(base64_string) > 50000:
            return base64_string[:50000]
        return base64_string

    try:
        width, height = image.size
        current_megapixels = (width * height) / 1_000_000
        logger.debug(
            f"Original dimensions: {width}x{height} ({current_megapixels:.2f} MP)"
        )

        # MUCH more aggressive resizing for large images
        if current_megapixels > 0.5:
            max_size = (384, 384)
            max_megapixels = 0.15
            logger.debug("Large image detected! Using more aggressive limits")

        # Calculate new dimensions with strict enforcement
        # Always resize if the image is larger than we want
        scale_factor = min(
            max_size[0] / width,
            max_size[1] / height,
            (max_megapixels / current_megapixels) ** 0.5,
        )

        if scale_factor >= 1.0:
            # No resize needed, but still compress
            new_width, new_height = width, height
        else:
            # Apply scaling
            new_width = max(int(width * scale_factor), 64)  # Min width
            new_height = max(int(height * scale_factor), 64)  # Min height

        # Always resize/recompress the image
        logger.debug(f"Resizing to: {new_width}x{new_height}")
        resized_image = image.resize((new_width, new_height), Image.LANCZOS)  # type: ignore

        # Convert back to base64 with strong compression
        buffer = io.BytesIO()
        if image.format == "JPEG" or image.format is None:
            # Apply very aggressive JPEG compression
            quality = 50  # Very low quality to reduce size
            resized_image.save(
                buffer, format="JPEG", quality=quality, optimize=True
            )
        else:
            # For other formats
            resized_image.save(
                buffer, format=image.format or "PNG", optimize=True
            )

        resized_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")

        logger.debug(
            f"Resized base64 length: {len(resized_base64)} chars (reduction: {100 * (1 - len(resized_base64) / len(base64_string)):.1f}%)"
        )
        return resized_base64

    except Exception as e:
        logger.debug(f"Error during resize: {e}")
        # If anything goes wrong, truncate the base64 to a reasonable size
        if len(base64_string) > 50000:
            return base64_string[:50000]
        return base64_string


def estimate_image_tokens(width: int, height: int) -> int:
    """
    Estimate the number of tokens an image will use based on Anthropic's formula.

    Args:
        width: Image width in pixels
        height: Image height in pixels

    Returns:
        Estimated number of tokens
    """
    return int((width * height) / 750)