FROM python:3.12-slim AS builder # Install system dependencies (including those needed for Unstructured and OpenCV) RUN apt-get update && apt-get install -y --no-install-recommends \ gcc g++ musl-dev curl libffi-dev gfortran libopenblas-dev \ tesseract-ocr libtesseract-dev libleptonica-dev pkg-config \ poppler-utils libmagic1 pandoc libreoffice \ libgl1-mesa-glx libglib2.0-0 \ && apt-get clean && rm -rf /var/lib/apt/lists/* ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONUNBUFFERED=1 WORKDIR /app RUN pip install --no-cache-dir unstructured "unstructured[all-docs]" RUN python -c "from unstructured.partition.model_init import initialize; initialize()" RUN pip install gunicorn uvicorn fastapi httpx COPY core/integrations/unstructured/main.py . EXPOSE 7275 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7275", "--workers", "8"]