1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- # TODO - relocate to a dedicated module
- import http.client
- import json
- import logging
- import os
- logger = logging.getLogger(__name__)
- # TODO - Move process json to dedicated data processing module
- def process_json(json_object, indent=0):
- """Recursively traverses the JSON object (dicts and lists) to create an
- unstructured text blob."""
- text_blob = ""
- if isinstance(json_object, dict):
- for key, value in json_object.items():
- padding = " " * indent
- if isinstance(value, (dict, list)):
- text_blob += (
- f"{padding}{key}:\n{process_json(value, indent + 1)}"
- )
- else:
- text_blob += f"{padding}{key}: {value}\n"
- elif isinstance(json_object, list):
- for index, item in enumerate(json_object):
- padding = " " * indent
- if isinstance(item, (dict, list)):
- text_blob += f"{padding}Item {index + 1}:\n{process_json(item, indent + 1)}"
- else:
- text_blob += f"{padding}Item {index + 1}: {item}\n"
- return text_blob
- # TODO - Introduce abstract "Integration" ABC.
- class SerperClient:
- def __init__(self, api_base: str = "google.serper.dev") -> None:
- api_key = os.getenv("SERPER_API_KEY")
- if not api_key:
- raise ValueError(
- "Please set the `SERPER_API_KEY` environment variable to use `SerperClient`."
- )
- self.api_base = api_base
- self.headers = {
- "X-API-KEY": api_key,
- "Content-Type": "application/json",
- }
- @staticmethod
- def _extract_results(result_data: dict) -> list:
- formatted_results = []
- for key, value in result_data.items():
- # Skip searchParameters as it's not a result entry
- if key == "searchParameters":
- continue
- # Handle 'answerBox' as a single item
- if key == "answerBox":
- value["type"] = key # Add the type key to the dictionary
- formatted_results.append(value)
- # Handle lists of results
- elif isinstance(value, list):
- for item in value:
- item["type"] = key # Add the type key to the dictionary
- formatted_results.append(item)
- # Handle 'peopleAlsoAsk' and potentially other single item formats
- elif isinstance(value, dict):
- value["type"] = key # Add the type key to the dictionary
- formatted_results.append(value)
- return formatted_results
- # TODO - Add explicit typing for the return value
- def get_raw(self, query: str, limit: int = 10) -> list:
- connection = http.client.HTTPSConnection(self.api_base)
- payload = json.dumps({"q": query, "num_outputs": limit})
- connection.request("POST", "/search", payload, self.headers)
- response = connection.getresponse()
- logger.debug("Received response {response} from Serper API.")
- data = response.read()
- json_data = json.loads(data.decode("utf-8"))
- return SerperClient._extract_results(json_data)
|