Parcourir la source

del doc support (#2494)

Co-authored-by: jyong <jyong@dify.ai>
tags/0.5.7
Jyong il y a 1 an
Parent
révision
20b932da97
Aucun compte lié à l'adresse e-mail de l'auteur
2 fichiers modifiés avec 5 ajouts et 5 suppressions
  1. 2
    2
      api/core/data_loader/file_extractor.py
  2. 3
    3
      api/services/file_service.py

+ 2
- 2
api/core/data_loader/file_extractor.py Voir le fichier

@@ -69,7 +69,7 @@ class FileExtractor:
else MarkdownLoader(file_path, autodetect_encoding=True)
elif file_extension in ['.htm', '.html']:
loader = HTMLLoader(file_path)
elif file_extension in ['.docx', '.doc']:
elif file_extension in ['.docx']:
loader = Docx2txtLoader(file_path)
elif file_extension == '.csv':
loader = CSVLoader(file_path, autodetect_encoding=True)
@@ -96,7 +96,7 @@ class FileExtractor:
loader = MarkdownLoader(file_path, autodetect_encoding=True)
elif file_extension in ['.htm', '.html']:
loader = HTMLLoader(file_path)
elif file_extension in ['.docx', '.doc']:
elif file_extension in ['.docx']:
loader = Docx2txtLoader(file_path)
elif file_extension == '.csv':
loader = CSVLoader(file_path, autodetect_encoding=True)

+ 3
- 3
api/services/file_service.py Voir le fichier

@@ -20,9 +20,9 @@ from services.errors.file import FileTooLargeError, UnsupportedFileTypeError
IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])

ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'doc', 'csv'] + IMAGE_EXTENSIONS
ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv'] + IMAGE_EXTENSIONS
UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx',
'docx', 'doc', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS
'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS
PREVIEW_WORDS_LIMIT = 3000


@@ -162,7 +162,7 @@ class FileService:
generator = storage.load(upload_file.key, stream=True)

return generator, upload_file.mime_type
@staticmethod
def get_public_image_preview(file_id: str) -> str:
upload_file = db.session.query(UploadFile) \

Chargement…
Annuler
Enregistrer