| from core.rag.extractor.unstructured.unstructured_epub_extractor import UnstructuredEpubExtractor | from core.rag.extractor.unstructured.unstructured_epub_extractor import UnstructuredEpubExtractor | ||||
| from core.rag.extractor.unstructured.unstructured_markdown_extractor import UnstructuredMarkdownExtractor | from core.rag.extractor.unstructured.unstructured_markdown_extractor import UnstructuredMarkdownExtractor | ||||
| from core.rag.extractor.unstructured.unstructured_msg_extractor import UnstructuredMsgExtractor | from core.rag.extractor.unstructured.unstructured_msg_extractor import UnstructuredMsgExtractor | ||||
| from core.rag.extractor.unstructured.unstructured_pdf_extractor import UnstructuredPDFExtractor | |||||
| from core.rag.extractor.unstructured.unstructured_ppt_extractor import UnstructuredPPTExtractor | from core.rag.extractor.unstructured.unstructured_ppt_extractor import UnstructuredPPTExtractor | ||||
| from core.rag.extractor.unstructured.unstructured_pptx_extractor import UnstructuredPPTXExtractor | from core.rag.extractor.unstructured.unstructured_pptx_extractor import UnstructuredPPTXExtractor | ||||
| from core.rag.extractor.unstructured.unstructured_text_extractor import UnstructuredTextExtractor | from core.rag.extractor.unstructured.unstructured_text_extractor import UnstructuredTextExtractor | ||||
| if file_extension in {".xlsx", ".xls"}: | if file_extension in {".xlsx", ".xls"}: | ||||
| extractor = ExcelExtractor(file_path) | extractor = ExcelExtractor(file_path) | ||||
| elif file_extension == ".pdf": | elif file_extension == ".pdf": | ||||
| extractor = UnstructuredPDFExtractor(file_path, unstructured_api_url, unstructured_api_key) | |||||
| extractor = PdfExtractor(file_path) | |||||
| elif file_extension in {".md", ".markdown"}: | elif file_extension in {".md", ".markdown"}: | ||||
| extractor = ( | extractor = ( | ||||
| UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key) | UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key) |