| @@ -21,7 +21,6 @@ from core.rag.extractor.unstructured.unstructured_eml_extractor import Unstructu | |||
| from core.rag.extractor.unstructured.unstructured_epub_extractor import UnstructuredEpubExtractor | |||
| from core.rag.extractor.unstructured.unstructured_markdown_extractor import UnstructuredMarkdownExtractor | |||
| from core.rag.extractor.unstructured.unstructured_msg_extractor import UnstructuredMsgExtractor | |||
| from core.rag.extractor.unstructured.unstructured_pdf_extractor import UnstructuredPDFExtractor | |||
| from core.rag.extractor.unstructured.unstructured_ppt_extractor import UnstructuredPPTExtractor | |||
| from core.rag.extractor.unstructured.unstructured_pptx_extractor import UnstructuredPPTXExtractor | |||
| from core.rag.extractor.unstructured.unstructured_text_extractor import UnstructuredTextExtractor | |||
| @@ -103,7 +102,7 @@ class ExtractProcessor: | |||
| if file_extension in {".xlsx", ".xls"}: | |||
| extractor = ExcelExtractor(file_path) | |||
| elif file_extension == ".pdf": | |||
| extractor = UnstructuredPDFExtractor(file_path, unstructured_api_url, unstructured_api_key) | |||
| extractor = PdfExtractor(file_path) | |||
| elif file_extension in {".md", ".markdown"}: | |||
| extractor = ( | |||
| UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key) | |||