8077140e1e99_v3_api_database_revision.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. """v3_api_database_revision.
  2. Revision ID: 8077140e1e99
  3. Revises:
  4. Create Date: 2024-12-03 12:10:10.878485
  5. """
  6. import os
  7. from typing import Sequence, Union
  8. import sqlalchemy as sa
  9. from alembic import op
  10. from sqlalchemy import inspect
  11. # revision identifiers, used by Alembic.
  12. revision: str = "8077140e1e99"
  13. down_revision: Union[str, None] = "2fac23e4d91b"
  14. branch_labels: Union[str, Sequence[str], None] = None
  15. depends_on: Union[str, Sequence[str], None] = None
  16. project_name = os.getenv("R2R_PROJECT_NAME")
  17. if not project_name:
  18. raise ValueError(
  19. "Environment variable `R2R_PROJECT_NAME` must be provided migrate, it should be set equal to the value of `project_name` in your `r2r.toml`."
  20. )
  21. def check_if_upgrade_needed():
  22. """Check if the upgrade has already been applied or is needed."""
  23. connection = op.get_bind()
  24. inspector = inspect(connection)
  25. # Check collections table column names
  26. collections_columns = {
  27. col["name"]
  28. for col in inspector.get_columns("collections", schema=project_name)
  29. }
  30. # If we find a new column name, we don't need to migrate
  31. # If we find an old column name, we do need to migrate
  32. if "id" in collections_columns:
  33. print(
  34. "Migration not needed: collections table already has 'id' column"
  35. )
  36. return False
  37. elif "collection_id" in collections_columns:
  38. print("Migration needed: collections table has old column names")
  39. return True
  40. else:
  41. print(
  42. "Migration not needed: collections table doesn't exist or has different structure"
  43. )
  44. return False
  45. def upgrade() -> None:
  46. if not check_if_upgrade_needed():
  47. return
  48. # Collections table migration
  49. op.alter_column(
  50. "collections",
  51. "collection_id",
  52. new_column_name="id",
  53. schema=project_name,
  54. )
  55. op.drop_column(
  56. "collections",
  57. "graph_search_results_enrichment_status",
  58. schema=project_name,
  59. )
  60. op.add_column(
  61. "collections",
  62. sa.Column(
  63. "owner_id",
  64. sa.UUID,
  65. server_default=sa.text("'2acb499e-8428-543b-bd85-0d9098718220'"),
  66. ),
  67. schema=project_name,
  68. )
  69. op.add_column(
  70. "collections",
  71. sa.Column(
  72. "graph_sync_status", sa.Text, server_default=sa.text("'pending'")
  73. ),
  74. schema=project_name,
  75. )
  76. op.add_column(
  77. "collections",
  78. sa.Column(
  79. "graph_cluster_status",
  80. sa.Text,
  81. server_default=sa.text("'pending'"),
  82. ),
  83. schema=project_name,
  84. )
  85. # Documents table migration
  86. op.rename_table(
  87. "document_info",
  88. "documents",
  89. schema=project_name,
  90. )
  91. op.alter_column(
  92. "documents",
  93. "document_id",
  94. new_column_name="id",
  95. schema=project_name,
  96. )
  97. op.alter_column(
  98. "documents",
  99. "user_id",
  100. new_column_name="owner_id",
  101. schema=project_name,
  102. )
  103. op.drop_column(
  104. "documents",
  105. "graph_search_results_extraction_status",
  106. schema=project_name,
  107. )
  108. op.add_column(
  109. "documents",
  110. sa.Column(
  111. "extraction_status",
  112. sa.Text,
  113. server_default=sa.text("'pending'"),
  114. ),
  115. schema=project_name,
  116. )
  117. op.alter_column(
  118. "documents",
  119. "doc_search_vector",
  120. new_column_name="raw_tsvector",
  121. schema=project_name,
  122. )
  123. # Files table migration
  124. op.rename_table(
  125. "file_storage",
  126. "files",
  127. schema=project_name,
  128. )
  129. op.alter_column(
  130. "files",
  131. "file_name",
  132. new_column_name="name",
  133. schema=project_name,
  134. )
  135. op.alter_column(
  136. "files",
  137. "file_oid",
  138. new_column_name="oid",
  139. schema=project_name,
  140. )
  141. op.alter_column(
  142. "files",
  143. "file_size",
  144. new_column_name="size",
  145. schema=project_name,
  146. )
  147. op.alter_column(
  148. "files",
  149. "file_type",
  150. new_column_name="type",
  151. schema=project_name,
  152. )
  153. # Prompts table migration
  154. op.alter_column(
  155. "prompts",
  156. "prompt_id",
  157. new_column_name="id",
  158. schema=project_name,
  159. )
  160. # Users table migration
  161. op.alter_column(
  162. "users",
  163. "user_id",
  164. new_column_name="id",
  165. schema=project_name,
  166. )
  167. # Chunks table migration
  168. op.rename_table(
  169. "vectors",
  170. "chunks",
  171. schema=project_name,
  172. )
  173. op.alter_column(
  174. "chunks",
  175. "extraction_id",
  176. new_column_name="id",
  177. schema=project_name,
  178. )
  179. op.alter_column(
  180. "chunks",
  181. "user_id",
  182. new_column_name="owner_id",
  183. schema=project_name,
  184. )
  185. def downgrade() -> None:
  186. # Collections table migration
  187. op.alter_column(
  188. "collections",
  189. "id",
  190. new_column_name="collection_id",
  191. schema=project_name,
  192. )
  193. op.add_column(
  194. "collections",
  195. sa.Column(
  196. "graph_search_results_enrichment_status",
  197. sa.Text,
  198. server_default=sa.text("'pending'"),
  199. ),
  200. schema=project_name,
  201. )
  202. op.drop_column(
  203. "collections",
  204. "owner_id",
  205. schema=project_name,
  206. )
  207. op.drop_column(
  208. "collections",
  209. "graph_sync_status",
  210. schema=project_name,
  211. )
  212. op.drop_column(
  213. "collections",
  214. "graph_cluster_status",
  215. schema=project_name,
  216. )
  217. # Documents table migration
  218. op.rename_table(
  219. "documents",
  220. "document_info",
  221. schema=project_name,
  222. )
  223. op.alter_column(
  224. "document_info",
  225. "id",
  226. new_column_name="document_id",
  227. schema=project_name,
  228. )
  229. op.alter_column(
  230. "document_info",
  231. "owner_id",
  232. new_column_name="user_id",
  233. schema=project_name,
  234. )
  235. op.add_column(
  236. "document_info",
  237. sa.Column(
  238. "graph_search_results_extraction_status",
  239. sa.Text,
  240. server_default=sa.text("'pending'"),
  241. ),
  242. schema=project_name,
  243. )
  244. op.drop_column(
  245. "document_info",
  246. "extraction_status",
  247. schema=project_name,
  248. )
  249. op.alter_column(
  250. "document_info",
  251. "raw_tsvector",
  252. new_column_name="doc_search_vector",
  253. schema=project_name,
  254. )
  255. # Files table migration
  256. op.rename_table(
  257. "files",
  258. "file_storage",
  259. schema=project_name,
  260. )
  261. op.alter_column(
  262. "file_storage",
  263. "name",
  264. new_column_name="file_name",
  265. schema=project_name,
  266. )
  267. op.alter_column(
  268. "file_storage",
  269. "oid",
  270. new_column_name="file_oid",
  271. schema=project_name,
  272. )
  273. op.alter_column(
  274. "file_storage",
  275. "size",
  276. new_column_name="file_size",
  277. schema=project_name,
  278. )
  279. op.alter_column(
  280. "file_storage",
  281. "type",
  282. new_column_name="file_type",
  283. schema=project_name,
  284. )
  285. # Prompts table migration
  286. op.alter_column(
  287. "prompts",
  288. "id",
  289. new_column_name="prompt_id",
  290. schema=project_name,
  291. )
  292. # Users table migration
  293. op.alter_column(
  294. "users",
  295. "id",
  296. new_column_name="user_id",
  297. schema=project_name,
  298. )
  299. # Chunks table migration
  300. op.rename_table(
  301. "chunks",
  302. "vectors",
  303. schema=project_name,
  304. )
  305. op.alter_column(
  306. "vectors",
  307. "id",
  308. new_column_name="extraction_id",
  309. schema=project_name,
  310. )
  311. op.alter_column(
  312. "vectors",
  313. "owner_id",
  314. new_column_name="user_id",
  315. schema=project_name,
  316. )