|
|
@@ -76,7 +76,7 @@ class UnstructuredIngestionConfig(IngestionConfig):
|
|
|
|
|
|
x = json.loads(self.json())
|
|
|
x.pop("extra_fields", None)
|
|
|
- #x.pop("provider", None)
|
|
|
+ x.pop("provider", None)
|
|
|
x.pop("excluded_parsers", None)
|
|
|
|
|
|
x = {k: v for k, v in x.items() if v is not None}
|
|
|
@@ -142,6 +142,7 @@ class UnstructuredIngestionProvider(IngestionProvider):
|
|
|
self.ocr_provider: MistralOCRProvider = ocr_provider
|
|
|
|
|
|
self.client: UnstructuredClient | httpx.AsyncClient
|
|
|
+ config.provider = "unstructured_api"
|
|
|
if config.provider == "unstructured_api":
|
|
|
try:
|
|
|
self.unstructured_api_auth = os.environ["UNSTRUCTURED_API_KEY"]
|
|
|
@@ -455,4 +456,4 @@ class UnstructuredIngestionProvider(IngestionProvider):
|
|
|
)
|
|
|
|
|
|
def get_parser_for_document_type(self, doc_type: DocumentType) -> str:
|
|
|
- return "unstructured_local"
|
|
|
+ return "unstructured_api"
|