utils.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. import base64
  2. import io
  3. import logging
  4. from typing import Tuple
  5. from PIL import Image
  6. logger = logging.getLogger()
  7. def resize_base64_image(
  8. base64_string: str,
  9. max_size: Tuple[int, int] = (512, 512),
  10. max_megapixels: float = 0.25,
  11. ) -> str:
  12. """Aggressively resize images with better error handling and debug output"""
  13. logger.debug(
  14. f"RESIZING NOW!!! Original length: {len(base64_string)} chars"
  15. )
  16. # Decode base64 string to bytes
  17. try:
  18. image_data = base64.b64decode(base64_string)
  19. image = Image.open(io.BytesIO(image_data))
  20. logger.debug(f"Image opened successfully: {image.format} {image.size}")
  21. except Exception as e:
  22. logger.debug(f"Failed to decode/open image: {e}")
  23. # Emergency fallback - truncate the base64 string to reduce tokens
  24. if len(base64_string) > 50000:
  25. return base64_string[:50000]
  26. return base64_string
  27. try:
  28. width, height = image.size
  29. current_megapixels = (width * height) / 1_000_000
  30. logger.debug(
  31. f"Original dimensions: {width}x{height} ({current_megapixels:.2f} MP)"
  32. )
  33. # MUCH more aggressive resizing for large images
  34. if current_megapixels > 0.5:
  35. max_size = (384, 384)
  36. max_megapixels = 0.15
  37. logger.debug("Large image detected! Using more aggressive limits")
  38. # Calculate new dimensions with strict enforcement
  39. # Always resize if the image is larger than we want
  40. scale_factor = min(
  41. max_size[0] / width,
  42. max_size[1] / height,
  43. (max_megapixels / current_megapixels) ** 0.5,
  44. )
  45. if scale_factor >= 1.0:
  46. # No resize needed, but still compress
  47. new_width, new_height = width, height
  48. else:
  49. # Apply scaling
  50. new_width = max(int(width * scale_factor), 64) # Min width
  51. new_height = max(int(height * scale_factor), 64) # Min height
  52. # Always resize/recompress the image
  53. logger.debug(f"Resizing to: {new_width}x{new_height}")
  54. resized_image = image.resize((new_width, new_height), Image.LANCZOS) # type: ignore
  55. # Convert back to base64 with strong compression
  56. buffer = io.BytesIO()
  57. if image.format == "JPEG" or image.format is None:
  58. # Apply very aggressive JPEG compression
  59. quality = 50 # Very low quality to reduce size
  60. resized_image.save(
  61. buffer, format="JPEG", quality=quality, optimize=True
  62. )
  63. else:
  64. # For other formats
  65. resized_image.save(
  66. buffer, format=image.format or "PNG", optimize=True
  67. )
  68. resized_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
  69. logger.debug(
  70. f"Resized base64 length: {len(resized_base64)} chars (reduction: {100 * (1 - len(resized_base64) / len(base64_string)):.1f}%)"
  71. )
  72. return resized_base64
  73. except Exception as e:
  74. logger.debug(f"Error during resize: {e}")
  75. # If anything goes wrong, truncate the base64 to a reasonable size
  76. if len(base64_string) > 50000:
  77. return base64_string[:50000]
  78. return base64_string
  79. def estimate_image_tokens(width: int, height: int) -> int:
  80. """
  81. Estimate the number of tokens an image will use based on Anthropic's formula.
  82. Args:
  83. width: Image width in pixels
  84. height: Image height in pixels
  85. Returns:
  86. Estimated number of tokens
  87. """
  88. return int((width * height) / 750)