serper.py 3.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. # TODO - relocate to a dedicated module
  2. import http.client
  3. import json
  4. import logging
  5. import os
  6. logger = logging.getLogger(__name__)
  7. # TODO - Move process json to dedicated data processing module
  8. def process_json(json_object, indent=0):
  9. """Recursively traverses the JSON object (dicts and lists) to create an
  10. unstructured text blob."""
  11. text_blob = ""
  12. if isinstance(json_object, dict):
  13. for key, value in json_object.items():
  14. padding = " " * indent
  15. if isinstance(value, (dict, list)):
  16. text_blob += (
  17. f"{padding}{key}:\n{process_json(value, indent + 1)}"
  18. )
  19. else:
  20. text_blob += f"{padding}{key}: {value}\n"
  21. elif isinstance(json_object, list):
  22. for index, item in enumerate(json_object):
  23. padding = " " * indent
  24. if isinstance(item, (dict, list)):
  25. text_blob += f"{padding}Item {index + 1}:\n{process_json(item, indent + 1)}"
  26. else:
  27. text_blob += f"{padding}Item {index + 1}: {item}\n"
  28. return text_blob
  29. # TODO - Introduce abstract "Integration" ABC.
  30. class SerperClient:
  31. def __init__(self, api_base: str = "google.serper.dev") -> None:
  32. api_key = os.getenv("SERPER_API_KEY")
  33. if not api_key:
  34. raise ValueError(
  35. "Please set the `SERPER_API_KEY` environment variable to use `SerperClient`."
  36. )
  37. self.api_base = api_base
  38. self.headers = {
  39. "X-API-KEY": api_key,
  40. "Content-Type": "application/json",
  41. }
  42. @staticmethod
  43. def _extract_results(result_data: dict) -> list:
  44. formatted_results = []
  45. for key, value in result_data.items():
  46. # Skip searchParameters as it's not a result entry
  47. if key == "searchParameters":
  48. continue
  49. # Handle 'answerBox' as a single item
  50. if key == "answerBox":
  51. value["type"] = key # Add the type key to the dictionary
  52. formatted_results.append(value)
  53. # Handle lists of results
  54. elif isinstance(value, list):
  55. for item in value:
  56. item["type"] = key # Add the type key to the dictionary
  57. formatted_results.append(item)
  58. # Handle 'peopleAlsoAsk' and potentially other single item formats
  59. elif isinstance(value, dict):
  60. value["type"] = key # Add the type key to the dictionary
  61. formatted_results.append(value)
  62. return formatted_results
  63. # TODO - Add explicit typing for the return value
  64. def get_raw(self, query: str, limit: int = 10) -> list:
  65. connection = http.client.HTTPSConnection(self.api_base)
  66. payload = json.dumps({"q": query, "num_outputs": limit})
  67. connection.request("POST", "/search", payload, self.headers)
  68. response = connection.getresponse()
  69. logger.debug("Received response {response} from Serper API.")
  70. data = response.read()
  71. json_data = json.loads(data.decode("utf-8"))
  72. return SerperClient._extract_results(json_data)