| raise FileTooLargeError(message) | raise FileTooLargeError(message) | ||||
| extension = file.filename.split('.')[-1] | extension = file.filename.split('.')[-1] | ||||
| if extension not in ALLOWED_EXTENSIONS: | |||||
| if extension.lower() not in ALLOWED_EXTENSIONS: | |||||
| raise UnsupportedFileTypeError() | raise UnsupportedFileTypeError() | ||||
| # user uuid as file name | # user uuid as file name | ||||
| # extract text from file | # extract text from file | ||||
| extension = upload_file.extension | extension = upload_file.extension | ||||
| if extension not in ALLOWED_EXTENSIONS: | |||||
| if extension.lower() not in ALLOWED_EXTENSIONS: | |||||
| raise UnsupportedFileTypeError() | raise UnsupportedFileTypeError() | ||||
| text = FileExtractor.load(upload_file, return_text=True) | text = FileExtractor.load(upload_file, return_text=True) | 
| upload_file: Optional[UploadFile] = None) -> Union[List[Document] | str]: | upload_file: Optional[UploadFile] = None) -> Union[List[Document] | str]: | ||||
| input_file = Path(file_path) | input_file = Path(file_path) | ||||
| delimiter = '\n' | delimiter = '\n' | ||||
| if input_file.suffix == '.xlsx': | |||||
| file_extension = input_file.suffix.lower() | |||||
| if file_extension == '.xlsx': | |||||
| loader = ExcelLoader(file_path) | loader = ExcelLoader(file_path) | ||||
| elif input_file.suffix == '.pdf': | |||||
| elif file_extension == '.pdf': | |||||
| loader = PdfLoader(file_path, upload_file=upload_file) | loader = PdfLoader(file_path, upload_file=upload_file) | ||||
| elif input_file.suffix in ['.md', '.markdown']: | |||||
| elif file_extension in ['.md', '.markdown']: | |||||
| loader = MarkdownLoader(file_path, autodetect_encoding=True) | loader = MarkdownLoader(file_path, autodetect_encoding=True) | ||||
| elif input_file.suffix in ['.htm', '.html']: | |||||
| elif file_extension in ['.htm', '.html']: | |||||
| loader = HTMLLoader(file_path) | loader = HTMLLoader(file_path) | ||||
| elif input_file.suffix == '.docx': | |||||
| elif file_extension == '.docx': | |||||
| loader = Docx2txtLoader(file_path) | loader = Docx2txtLoader(file_path) | ||||
| elif input_file.suffix == '.csv': | |||||
| elif file_extension == '.csv': | |||||
| loader = CSVLoader(file_path, autodetect_encoding=True) | loader = CSVLoader(file_path, autodetect_encoding=True) | ||||
| else: | else: | ||||
| # txt | # txt | 
| const isValid = useCallback((file: File) => { | const isValid = useCallback((file: File) => { | ||||
| const { size } = file | const { size } = file | ||||
| const ext = `.${getFileType(file)}` | const ext = `.${getFileType(file)}` | ||||
| const isValidType = ACCEPTS.includes(ext) | |||||
| const isValidType = ACCEPTS.includes(ext.toLowerCase()) | |||||
| if (!isValidType) | if (!isValidType) | ||||
| notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.typeError') }) | notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.typeError') }) | ||||
| ) | ) | ||||
| } | } | ||||
| export default DatasetUpdateForm | |||||
| export default DatasetUpdateForm |