conftest.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. """
  2. Common test fixtures for retrieval tests.
  3. """
  4. import pytest
  5. from unittest.mock import AsyncMock, MagicMock, patch
  6. from typing import Any, Optional
  7. class MockSearchSettings:
  8. """Mock class for SearchSettings to avoid dependency issues."""
  9. def __init__(self, **kwargs):
  10. self.__dict__.update(kwargs)
  11. # Set defaults for commonly used attributes
  12. for attr in ['use_semantic_search', 'use_hybrid_search', 'use_full_text_search',
  13. 'use_graph_search', 'filters', 'limit', 'offset', 'search_strategy',
  14. 'num_sub_queries', 'use_citation_search', 'hybrid_settings']:
  15. if not hasattr(self, attr):
  16. setattr(self, attr, None)
  17. # Default values
  18. if self.search_strategy is None:
  19. self.search_strategy = "basic"
  20. if self.limit is None:
  21. self.limit = 10
  22. if self.filters is None:
  23. self.filters = {}
  24. if self.offset is None:
  25. self.offset = 0
  26. if self.num_sub_queries is None:
  27. self.num_sub_queries = 3
  28. if self.hybrid_settings is None:
  29. self.hybrid_settings = {
  30. "semantic_weight": 0.5,
  31. "full_text_weight": 0.5
  32. }
  33. class MockDocument:
  34. """Mock Document class for testing."""
  35. def __init__(self, document_id, raw_text, metadata=None, chunks=None):
  36. self.document_id = document_id
  37. self.raw_text = raw_text
  38. self.metadata = metadata or {}
  39. self.chunks = chunks or []
  40. class MockChunk:
  41. """Mock Chunk class for testing."""
  42. def __init__(self, chunk_id, document_id, text, metadata=None):
  43. self.chunk_id = chunk_id
  44. self.document_id = document_id
  45. self.text = text
  46. self.metadata = metadata or {}
  47. self.embedding = None
  48. class MockCitation:
  49. """Mock Citation class for testing."""
  50. def __init__(self, citation_id, text, metadata=None, source=None):
  51. self.citation_id = citation_id
  52. self.text = text
  53. self.metadata = metadata or {}
  54. self.source = source or "unknown"
  55. @pytest.fixture
  56. def mock_providers():
  57. """Return a mocked providers object for testing."""
  58. class MockProviders:
  59. def __init__(self):
  60. # Mock the embedding provider
  61. self.completion_embedding = AsyncMock()
  62. self.completion_embedding.async_get_embedding = AsyncMock(
  63. return_value=[0.123] * 768 # pretend vector
  64. )
  65. # Mock the database chunks handler
  66. self.database = AsyncMock()
  67. self.database.chunks_handler = AsyncMock()
  68. self.database.chunks_handler.semantic_search = AsyncMock(
  69. return_value=[
  70. {
  71. "chunk_id": f"chunk-{i}",
  72. "document_id": f"doc-{i//2}",
  73. "text": f"This is search result {i} about philosophy.",
  74. "metadata": {"source": f"source-{i}"},
  75. "score": 0.95 - (i * 0.05),
  76. }
  77. for i in range(5)
  78. ]
  79. )
  80. self.database.chunks_handler.full_text_search = AsyncMock(
  81. return_value=[
  82. {
  83. "chunk_id": f"chunk-ft-{i}",
  84. "document_id": f"doc-ft-{i//2}",
  85. "text": f"Full-text search result {i} about philosophy.",
  86. "metadata": {"source": f"ft-source-{i}"},
  87. "score": 0.9 - (i * 0.05),
  88. }
  89. for i in range(5)
  90. ]
  91. )
  92. self.database.chunks_handler.hybrid_search = AsyncMock(
  93. return_value=[
  94. {
  95. "chunk_id": f"chunk-hybrid-{i}",
  96. "document_id": f"doc-hybrid-{i//2}",
  97. "text": f"Hybrid search result {i} about philosophy.",
  98. "metadata": {"source": f"hybrid-source-{i}"},
  99. "score": 0.92 - (i * 0.05),
  100. }
  101. for i in range(5)
  102. ]
  103. )
  104. # Mock graphs handler
  105. self.database.graphs_handler = AsyncMock()
  106. self.database.graphs_handler.graph_search = AsyncMock(
  107. return_value=iter([
  108. {
  109. "node_id": f"node-{i}",
  110. "document_id": f"doc-{i}",
  111. "text": f"Graph search result {i}.",
  112. "score": 0.85 - (i * 0.05),
  113. }
  114. for i in range(3)
  115. ])
  116. )
  117. # Mock citation handler
  118. self.database.citations_handler = AsyncMock()
  119. self.database.citations_handler.get_citations = AsyncMock(
  120. return_value=[
  121. MockCitation(
  122. citation_id=f"cite-{i}",
  123. text=f"Citation {i} from an important source.",
  124. metadata={"author": f"Author {i}", "year": 2020 + i},
  125. source=f"Book {i}"
  126. )
  127. for i in range(3)
  128. ]
  129. )
  130. # Mock LLM
  131. self.llm = AsyncMock()
  132. self.llm.aget_completion = AsyncMock(
  133. return_value={"choices": [{"message": {"content": "LLM generated response about philosophy"}}]}
  134. )
  135. self.llm.aget_completion_stream = AsyncMock(
  136. return_value=iter([
  137. {"choices": [{"delta": {"content": "Streamed "}}]},
  138. {"choices": [{"delta": {"content": "response "}}]},
  139. {"choices": [{"delta": {"content": "about "}}]},
  140. {"choices": [{"delta": {"content": "philosophy"}}]}
  141. ])
  142. )
  143. # Mock prompts handler
  144. self.database.prompts_handler = AsyncMock()
  145. self.database.prompts_handler.get_cached_prompt = AsyncMock(
  146. return_value="System prompt with {{context}} and {{query}} placeholders"
  147. )
  148. # Set up different prompt templates
  149. self.prompts = {
  150. "default": "Answer based on the following context: {{context}}\n\nQuery: {{query}}",
  151. "hyde_template": "Generate a hypothetical document about: {{query}}",
  152. "rag_fusion": "Generate {num_queries} search queries related to: {{query}}",
  153. "citation_format": "Format citation for {{source}}: {{text}}"
  154. }
  155. # Update get_cached_prompt to use different templates
  156. async def get_cached_prompt(prompt_id):
  157. return self.prompts.get(prompt_id, self.prompts["default"])
  158. self.database.prompts_handler.get_cached_prompt.side_effect = get_cached_prompt
  159. return MockProviders()
  160. @pytest.fixture
  161. def sample_chunk_results():
  162. """Sample chunk results for testing."""
  163. return [
  164. {
  165. "chunk_id": f"chunk-{i}",
  166. "document_id": f"doc-{i//2}",
  167. "text": f"This is chunk {i} about philosophy.",
  168. "metadata": {"source": f"source-{i}", "page": i + 1},
  169. "score": 0.95 - (i * 0.05),
  170. }
  171. for i in range(5)
  172. ]
  173. @pytest.fixture
  174. def sample_documents():
  175. """Sample documents for testing."""
  176. return [
  177. MockDocument(
  178. document_id=f"doc-{i}",
  179. raw_text=f"This is document {i} about philosophy with multiple paragraphs.\n\n"
  180. f"It contains information from various sources and perspectives.",
  181. metadata={"title": f"Philosophy Text {i}", "author": f"Author {i}"}
  182. )
  183. for i in range(3)
  184. ]