serper.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. # TODO - relocate to a dedicated module
  2. import http.client
  3. import json
  4. import os
  5. # TODO - Move process json to dedicated data processing module
  6. def process_json(json_object, indent=0):
  7. """
  8. Recursively traverses the JSON object (dicts and lists) to create an unstructured text blob.
  9. """
  10. text_blob = ""
  11. if isinstance(json_object, dict):
  12. for key, value in json_object.items():
  13. padding = " " * indent
  14. if isinstance(value, (dict, list)):
  15. text_blob += (
  16. f"{padding}{key}:\n{process_json(value, indent + 1)}"
  17. )
  18. else:
  19. text_blob += f"{padding}{key}: {value}\n"
  20. elif isinstance(json_object, list):
  21. for index, item in enumerate(json_object):
  22. padding = " " * indent
  23. if isinstance(item, (dict, list)):
  24. text_blob += f"{padding}Item {index + 1}:\n{process_json(item, indent + 1)}"
  25. else:
  26. text_blob += f"{padding}Item {index + 1}: {item}\n"
  27. return text_blob
  28. # TODO - Introduce abstract "Integration" ABC.
  29. class SerperClient:
  30. def __init__(self, api_base: str = "google.serper.dev") -> None:
  31. api_key = os.getenv("SERPER_API_KEY")
  32. if not api_key:
  33. raise ValueError(
  34. "Please set the `SERPER_API_KEY` environment variable to use `SerperClient`."
  35. )
  36. self.api_base = api_base
  37. self.headers = {
  38. "X-API-KEY": api_key,
  39. "Content-Type": "application/json",
  40. }
  41. @staticmethod
  42. def _extract_results(result_data: dict) -> list:
  43. formatted_results = []
  44. for key, value in result_data.items():
  45. # Skip searchParameters as it's not a result entry
  46. if key == "searchParameters":
  47. continue
  48. # Handle 'answerBox' as a single item
  49. if key == "answerBox":
  50. value["type"] = key # Add the type key to the dictionary
  51. formatted_results.append(value)
  52. # Handle lists of results
  53. elif isinstance(value, list):
  54. for item in value:
  55. item["type"] = key # Add the type key to the dictionary
  56. formatted_results.append(item)
  57. # Handle 'peopleAlsoAsk' and potentially other single item formats
  58. elif isinstance(value, dict):
  59. value["type"] = key # Add the type key to the dictionary
  60. formatted_results.append(value)
  61. return formatted_results
  62. # TODO - Add explicit typing for the return value
  63. def get_raw(self, query: str, limit: int = 10) -> list:
  64. connection = http.client.HTTPSConnection(self.api_base)
  65. payload = json.dumps({"q": query, "num_outputs": limit})
  66. connection.request("POST", "/search", payload, self.headers)
  67. response = connection.getresponse()
  68. data = response.read()
  69. json_data = json.loads(data.decode("utf-8"))
  70. return SerperClient._extract_results(json_data)
  71. @staticmethod
  72. def construct_context(results: list) -> str:
  73. # Organize results by type
  74. organized_results = {}
  75. for result in results:
  76. result_type = result.metadata.pop(
  77. "type", "Unknown"
  78. ) # Pop the type and use as key
  79. if result_type not in organized_results:
  80. organized_results[result_type] = [result.metadata]
  81. else:
  82. organized_results[result_type].append(result.metadata)
  83. context = ""
  84. # Iterate over each result type
  85. for result_type, items in organized_results.items():
  86. context += f"# {result_type} Results:\n"
  87. for index, item in enumerate(items, start=1):
  88. # Process each item under the current type
  89. context += f"Item {index}:\n"
  90. context += process_json(item) + "\n"
  91. return context