123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- # TODO - relocate to a dedicated module
- import http.client
- import json
- import os
- # TODO - Move process json to dedicated data processing module
- def process_json(json_object, indent=0):
- """
- Recursively traverses the JSON object (dicts and lists) to create an unstructured text blob.
- """
- text_blob = ""
- if isinstance(json_object, dict):
- for key, value in json_object.items():
- padding = " " * indent
- if isinstance(value, (dict, list)):
- text_blob += (
- f"{padding}{key}:\n{process_json(value, indent + 1)}"
- )
- else:
- text_blob += f"{padding}{key}: {value}\n"
- elif isinstance(json_object, list):
- for index, item in enumerate(json_object):
- padding = " " * indent
- if isinstance(item, (dict, list)):
- text_blob += f"{padding}Item {index + 1}:\n{process_json(item, indent + 1)}"
- else:
- text_blob += f"{padding}Item {index + 1}: {item}\n"
- return text_blob
- # TODO - Introduce abstract "Integration" ABC.
- class SerperClient:
- def __init__(self, api_base: str = "google.serper.dev") -> None:
- api_key = os.getenv("SERPER_API_KEY")
- if not api_key:
- raise ValueError(
- "Please set the `SERPER_API_KEY` environment variable to use `SerperClient`."
- )
- self.api_base = api_base
- self.headers = {
- "X-API-KEY": api_key,
- "Content-Type": "application/json",
- }
- @staticmethod
- def _extract_results(result_data: dict) -> list:
- formatted_results = []
- for key, value in result_data.items():
- # Skip searchParameters as it's not a result entry
- if key == "searchParameters":
- continue
- # Handle 'answerBox' as a single item
- if key == "answerBox":
- value["type"] = key # Add the type key to the dictionary
- formatted_results.append(value)
- # Handle lists of results
- elif isinstance(value, list):
- for item in value:
- item["type"] = key # Add the type key to the dictionary
- formatted_results.append(item)
- # Handle 'peopleAlsoAsk' and potentially other single item formats
- elif isinstance(value, dict):
- value["type"] = key # Add the type key to the dictionary
- formatted_results.append(value)
- return formatted_results
- # TODO - Add explicit typing for the return value
- def get_raw(self, query: str, limit: int = 10) -> list:
- connection = http.client.HTTPSConnection(self.api_base)
- payload = json.dumps({"q": query, "num_outputs": limit})
- connection.request("POST", "/search", payload, self.headers)
- response = connection.getresponse()
- data = response.read()
- json_data = json.loads(data.decode("utf-8"))
- return SerperClient._extract_results(json_data)
- @staticmethod
- def construct_context(results: list) -> str:
- # Organize results by type
- organized_results = {}
- for result in results:
- result_type = result.metadata.pop(
- "type", "Unknown"
- ) # Pop the type and use as key
- if result_type not in organized_results:
- organized_results[result_type] = [result.metadata]
- else:
- organized_results[result_type].append(result.metadata)
- context = ""
- # Iterate over each result type
- for result_type, items in organized_results.items():
- context += f"# {result_type} Results:\n"
- for index, item in enumerate(items, start=1):
- # Process each item under the current type
- context += f"Item {index}:\n"
- context += process_json(item) + "\n"
- return context
|