c45a9cf6a8a4_add_user_and_document_count_to_.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. """Add user and document count to collection
  2. Revision ID: c45a9cf6a8a4
  3. Revises:
  4. Create Date: 2024-12-10 13:28:07.798167
  5. """
  6. import os
  7. from typing import Sequence, Union
  8. import sqlalchemy as sa
  9. from alembic import op
  10. # revision identifiers, used by Alembic.
  11. revision: str = "c45a9cf6a8a4"
  12. down_revision: Union[str, None] = None
  13. branch_labels: Union[str, Sequence[str], None] = None
  14. depends_on: Union[str, Sequence[str], None] = None
  15. project_name = os.getenv("R2R_PROJECT_NAME")
  16. if not project_name:
  17. raise ValueError(
  18. "Environment variable `R2R_PROJECT_NAME` must be provided migrate, it should be set equal to the value of `project_name` in your `r2r.toml`."
  19. )
  20. def upgrade():
  21. # Add the new columns with default value of 0
  22. op.add_column(
  23. "collections",
  24. sa.Column(
  25. "user_count", sa.Integer(), nullable=False, server_default="0"
  26. ),
  27. schema=project_name,
  28. )
  29. op.add_column(
  30. "collections",
  31. sa.Column(
  32. "document_count", sa.Integer(), nullable=False, server_default="0"
  33. ),
  34. schema=project_name,
  35. )
  36. # Initialize the counts based on existing relationships
  37. op.execute(
  38. f"""
  39. WITH collection_counts AS (
  40. SELECT c.id,
  41. COUNT(DISTINCT u.id) as user_count,
  42. COUNT(DISTINCT d.id) as document_count
  43. FROM {project_name}.collections c
  44. LEFT JOIN {project_name}.users u ON c.id = ANY(u.collection_ids)
  45. LEFT JOIN {project_name}.documents d ON c.id = ANY(d.collection_ids)
  46. GROUP BY c.id
  47. )
  48. UPDATE {project_name}.collections c
  49. SET user_count = COALESCE(cc.user_count, 0),
  50. document_count = COALESCE(cc.document_count, 0)
  51. FROM collection_counts cc
  52. WHERE c.id = cc.id
  53. """
  54. )
  55. def downgrade():
  56. op.drop_column("collections", "document_count", schema=project_name)
  57. op.drop_column("collections", "user_count", schema=project_name)