Signed-off-by: yihong0618 <zouzou0208@gmail.com>tags/0.14.0
| @@ -14,11 +14,11 @@ AUDIO_EXTENSIONS.extend([ext.upper() for ext in AUDIO_EXTENSIONS]) | |||
| if dify_config.ETL_TYPE == "Unstructured": | |||
| DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "pdf", "html", "htm", "xlsx", "xls"] | |||
| DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls"] | |||
| DOCUMENT_EXTENSIONS.extend(("docx", "csv", "eml", "msg", "pptx", "xml", "epub")) | |||
| if dify_config.UNSTRUCTURED_API_URL: | |||
| DOCUMENT_EXTENSIONS.append("ppt") | |||
| DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS]) | |||
| else: | |||
| DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "pdf", "html", "htm", "xlsx", "xls", "docx", "csv"] | |||
| DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls", "docx", "csv"] | |||
| DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS]) | |||
| @@ -103,7 +103,7 @@ class ExtractProcessor: | |||
| extractor = ExcelExtractor(file_path) | |||
| elif file_extension == ".pdf": | |||
| extractor = PdfExtractor(file_path) | |||
| elif file_extension in {".md", ".markdown"}: | |||
| elif file_extension in {".md", ".markdown", ".mdx"}: | |||
| extractor = ( | |||
| UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key) | |||
| if is_automatic | |||
| @@ -141,7 +141,7 @@ class ExtractProcessor: | |||
| extractor = ExcelExtractor(file_path) | |||
| elif file_extension == ".pdf": | |||
| extractor = PdfExtractor(file_path) | |||
| elif file_extension in {".md", ".markdown"}: | |||
| elif file_extension in {".md", ".markdown", ".mdx"}: | |||
| extractor = MarkdownExtractor(file_path, autodetect_encoding=True) | |||
| elif file_extension in {".htm", ".html"}: | |||
| extractor = HtmlExtractor(file_path) | |||
| @@ -36,6 +36,7 @@ const FileIcon: FC<FileIconProps> = ({ | |||
| return <Json className={className} /> | |||
| case 'md': | |||
| case 'markdown': | |||
| case 'mdx': | |||
| return <Md className={className} /> | |||
| case 'pdf': | |||
| return <Pdf className={className} /> | |||
| @@ -84,7 +84,7 @@ export const getFileAppearanceType = (fileName: string, fileMimetype: string) => | |||
| if (extension === 'pdf') | |||
| return FileAppearanceTypeEnum.pdf | |||
| if (extension === 'md' || extension === 'markdown') | |||
| if (extension === 'md' || extension === 'markdown' || extension === 'mdx') | |||
| return FileAppearanceTypeEnum.markdown | |||
| if (extension === 'xlsx' || extension === 'xls') | |||
| @@ -52,7 +52,7 @@ export const getInputVars = (text: string): ValueSelector[] => { | |||
| export const FILE_EXTS: Record<string, string[]> = { | |||
| [SupportUploadFileTypes.image]: ['JPG', 'JPEG', 'PNG', 'GIF', 'WEBP', 'SVG'], | |||
| [SupportUploadFileTypes.document]: ['TXT', 'MD', 'MARKDOWN', 'PDF', 'HTML', 'XLSX', 'XLS', 'DOCX', 'CSV', 'EML', 'MSG', 'PPTX', 'PPT', 'XML', 'EPUB'], | |||
| [SupportUploadFileTypes.document]: ['TXT', 'MD', 'MDX', 'MARKDOWN', 'PDF', 'HTML', 'XLSX', 'XLS', 'DOCX', 'CSV', 'EML', 'MSG', 'PPTX', 'PPT', 'XML', 'EPUB'], | |||
| [SupportUploadFileTypes.audio]: ['MP3', 'M4A', 'WAV', 'WEBM', 'AMR', 'MPGA'], | |||
| [SupportUploadFileTypes.video]: ['MP4', 'MOV', 'MPEG', 'MPGA'], | |||
| } | |||