test_documents.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. import json
  2. import uuid
  3. from uuid import UUID
  4. import pytest
  5. from core.base import (
  6. DocumentResponse,
  7. DocumentType,
  8. IngestionStatus,
  9. KGExtractionStatus,
  10. R2RException,
  11. SearchSettings,
  12. )
  13. def make_db_entry(doc: DocumentResponse):
  14. # This simulates what your real code should do:
  15. return {
  16. "id": doc.id,
  17. "collection_ids": doc.collection_ids,
  18. "owner_id": doc.owner_id,
  19. "document_type": doc.document_type.value,
  20. "metadata": json.dumps(doc.metadata),
  21. "title": doc.title,
  22. "version": doc.version,
  23. "size_in_bytes": doc.size_in_bytes,
  24. "ingestion_status": doc.ingestion_status.value,
  25. "extraction_status": doc.extraction_status.value,
  26. "created_at": doc.created_at,
  27. "updated_at": doc.updated_at,
  28. "ingestion_attempt_number": 0,
  29. "summary": doc.summary,
  30. # If summary_embedding is a list, we can store it as a string here if needed
  31. "summary_embedding": (
  32. str(doc.summary_embedding)
  33. if doc.summary_embedding is not None
  34. else None
  35. ),
  36. }
  37. @pytest.mark.asyncio
  38. async def test_upsert_documents_overview_insert(documents_handler):
  39. doc_id = uuid.uuid4()
  40. doc = DocumentResponse(
  41. id=doc_id,
  42. collection_ids=[],
  43. owner_id=uuid.uuid4(),
  44. document_type=DocumentType.TXT,
  45. metadata={"description": "A test document"},
  46. title="Test Doc",
  47. version="v1",
  48. size_in_bytes=1234,
  49. ingestion_status=IngestionStatus.PENDING,
  50. extraction_status=KGExtractionStatus.PENDING,
  51. created_at=None,
  52. updated_at=None,
  53. summary=None,
  54. summary_embedding=None,
  55. )
  56. # Simulate the handler call
  57. await documents_handler.upsert_documents_overview(
  58. [doc]
  59. ) # adjust your handler to accept list or doc
  60. # If your handler expects a db entry dict, you may need to patch handler or adapt your code
  61. # Verify
  62. res = await documents_handler.get_documents_overview(
  63. offset=0, limit=10, filter_document_ids=[doc_id]
  64. )
  65. assert res["total_entries"] == 1
  66. fetched_doc = res["results"][0]
  67. assert fetched_doc.id == doc_id
  68. assert fetched_doc.title == "Test Doc"
  69. assert fetched_doc.metadata["description"] == "A test document"
  70. @pytest.mark.asyncio
  71. async def test_upsert_documents_overview_update(documents_handler):
  72. doc_id = uuid.uuid4()
  73. owner_id = uuid.uuid4()
  74. doc = DocumentResponse(
  75. id=doc_id,
  76. collection_ids=[],
  77. owner_id=owner_id,
  78. document_type=DocumentType.TXT,
  79. metadata={"note": "initial"},
  80. title="Initial Title",
  81. version="v1",
  82. size_in_bytes=100,
  83. ingestion_status=IngestionStatus.PENDING,
  84. extraction_status=KGExtractionStatus.PENDING,
  85. created_at=None,
  86. updated_at=None,
  87. summary=None,
  88. summary_embedding=None,
  89. )
  90. await documents_handler.upsert_documents_overview([doc])
  91. # Update document
  92. doc.title = "Updated Title"
  93. doc.metadata["note"] = "updated"
  94. await documents_handler.upsert_documents_overview([doc])
  95. # Verify update
  96. res = await documents_handler.get_documents_overview(
  97. offset=0, limit=10, filter_document_ids=[doc_id]
  98. )
  99. fetched_doc = res["results"][0]
  100. assert fetched_doc.title == "Updated Title"
  101. assert fetched_doc.metadata["note"] == "updated"
  102. @pytest.mark.asyncio
  103. async def test_delete_document(documents_handler):
  104. doc_id = uuid.uuid4()
  105. doc = DocumentResponse(
  106. id=doc_id,
  107. collection_ids=[],
  108. owner_id=uuid.uuid4(),
  109. document_type=DocumentType.TXT,
  110. metadata={},
  111. title="ToDelete",
  112. version="v1",
  113. size_in_bytes=100,
  114. ingestion_status=IngestionStatus.PENDING,
  115. extraction_status=KGExtractionStatus.PENDING,
  116. created_at=None,
  117. updated_at=None,
  118. summary=None,
  119. summary_embedding=None,
  120. )
  121. await documents_handler.upsert_documents_overview([doc])
  122. await documents_handler.delete(doc_id)
  123. res = await documents_handler.get_documents_overview(
  124. offset=0, limit=10, filter_document_ids=[doc_id]
  125. )
  126. assert res["total_entries"] == 0