Bläddra i källkod

remove unstructured pdf extract (#9794)

tags/0.10.2
Jyong 1 år sedan
förälder
incheckning
5f11fe521d
Inget konto är kopplat till bidragsgivarens mejladress
1 ändrade filer med 1 tillägg och 2 borttagningar
  1. 1
    2
      api/core/rag/extractor/extract_processor.py

+ 1
- 2
api/core/rag/extractor/extract_processor.py Visa fil

from core.rag.extractor.unstructured.unstructured_epub_extractor import UnstructuredEpubExtractor from core.rag.extractor.unstructured.unstructured_epub_extractor import UnstructuredEpubExtractor
from core.rag.extractor.unstructured.unstructured_markdown_extractor import UnstructuredMarkdownExtractor from core.rag.extractor.unstructured.unstructured_markdown_extractor import UnstructuredMarkdownExtractor
from core.rag.extractor.unstructured.unstructured_msg_extractor import UnstructuredMsgExtractor from core.rag.extractor.unstructured.unstructured_msg_extractor import UnstructuredMsgExtractor
from core.rag.extractor.unstructured.unstructured_pdf_extractor import UnstructuredPDFExtractor
from core.rag.extractor.unstructured.unstructured_ppt_extractor import UnstructuredPPTExtractor from core.rag.extractor.unstructured.unstructured_ppt_extractor import UnstructuredPPTExtractor
from core.rag.extractor.unstructured.unstructured_pptx_extractor import UnstructuredPPTXExtractor from core.rag.extractor.unstructured.unstructured_pptx_extractor import UnstructuredPPTXExtractor
from core.rag.extractor.unstructured.unstructured_text_extractor import UnstructuredTextExtractor from core.rag.extractor.unstructured.unstructured_text_extractor import UnstructuredTextExtractor
if file_extension in {".xlsx", ".xls"}: if file_extension in {".xlsx", ".xls"}:
extractor = ExcelExtractor(file_path) extractor = ExcelExtractor(file_path)
elif file_extension == ".pdf": elif file_extension == ".pdf":
extractor = UnstructuredPDFExtractor(file_path, unstructured_api_url, unstructured_api_key)
extractor = PdfExtractor(file_path)
elif file_extension in {".md", ".markdown"}: elif file_extension in {".md", ".markdown"}:
extractor = ( extractor = (
UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key) UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key)

Laddar…
Avbryt
Spara