2 anni fa · d33a269548
--- a/api/controllers/console/datasets/file.py
+++ b/api/controllers/console/datasets/file.py
            raise FileTooLargeError(message)
        extension = file.filename.split('.')[-1]
        if extension not in ALLOWED_EXTENSIONS:
        if extension.lower() not in ALLOWED_EXTENSIONS:
            raise UnsupportedFileTypeError()
        # user uuid as file name
        # extract text from file
        extension = upload_file.extension
        if extension not in ALLOWED_EXTENSIONS:
        if extension.lower() not in ALLOWED_EXTENSIONS:
            raise UnsupportedFileTypeError()
        text = FileExtractor.load(upload_file, return_text=True)
--- a/api/core/data_loader/file_extractor.py
+++ b/api/core/data_loader/file_extractor.py
                       upload_file: Optional[UploadFile] = None) -> Union[List[Document] | str]:
        input_file = Path(file_path)
        delimiter = '\n'
        if input_file.suffix == '.xlsx':
        file_extension = input_file.suffix.lower()
        if file_extension == '.xlsx':
            loader = ExcelLoader(file_path)
        elif input_file.suffix == '.pdf':
        elif file_extension == '.pdf':
            loader = PdfLoader(file_path, upload_file=upload_file)
        elif input_file.suffix in ['.md', '.markdown']:
        elif file_extension in ['.md', '.markdown']:
            loader = MarkdownLoader(file_path, autodetect_encoding=True)
        elif input_file.suffix in ['.htm', '.html']:
        elif file_extension in ['.htm', '.html']:
            loader = HTMLLoader(file_path)
        elif input_file.suffix == '.docx':
        elif file_extension == '.docx':
            loader = Docx2txtLoader(file_path)
        elif input_file.suffix == '.csv':
        elif file_extension == '.csv':
            loader = CSVLoader(file_path, autodetect_encoding=True)
        else:
            # txt
--- a/web/app/components/datasets/create/file-uploader/index.tsx
+++ b/web/app/components/datasets/create/file-uploader/index.tsx
  const isValid = useCallback((file: File) => {
    const { size } = file
    const ext = `.${getFileType(file)}`
    const isValidType = ACCEPTS.includes(ext)
    const isValidType = ACCEPTS.includes(ext.toLowerCase())
    if (!isValidType)
      notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.typeError') })
--- a/web/app/components/datasets/create/index.tsx
+++ b/web/app/components/datasets/create/index.tsx
  )
 }
 export default DatasetUpdateForm
 export default DatasetUpdateForm