|
|
|
@@ -21,7 +21,7 @@ IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg'] |
|
|
|
IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS]) |
|
|
|
|
|
|
|
ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'xls', 'docx', 'csv'] |
|
|
|
UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'xls', |
|
|
|
UNSTRUCTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'xls', |
|
|
|
'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml', 'epub'] |
|
|
|
|
|
|
|
PREVIEW_WORDS_LIMIT = 3000 |
|
|
|
@@ -36,7 +36,7 @@ class FileService: |
|
|
|
if len(filename) > 200: |
|
|
|
filename = filename.split('.')[0][:200] + '.' + extension |
|
|
|
etl_type = current_app.config['ETL_TYPE'] |
|
|
|
allowed_extensions = UNSTRUSTURED_ALLOWED_EXTENSIONS + IMAGE_EXTENSIONS if etl_type == 'Unstructured' \ |
|
|
|
allowed_extensions = UNSTRUCTURED_ALLOWED_EXTENSIONS + IMAGE_EXTENSIONS if etl_type == 'Unstructured' \ |
|
|
|
else ALLOWED_EXTENSIONS + IMAGE_EXTENSIONS |
|
|
|
if extension.lower() not in allowed_extensions: |
|
|
|
raise UnsupportedFileTypeError() |
|
|
|
@@ -139,7 +139,7 @@ class FileService: |
|
|
|
# extract text from file |
|
|
|
extension = upload_file.extension |
|
|
|
etl_type = current_app.config['ETL_TYPE'] |
|
|
|
allowed_extensions = UNSTRUSTURED_ALLOWED_EXTENSIONS if etl_type == 'Unstructured' else ALLOWED_EXTENSIONS |
|
|
|
allowed_extensions = UNSTRUCTURED_ALLOWED_EXTENSIONS if etl_type == 'Unstructured' else ALLOWED_EXTENSIONS |
|
|
|
if extension.lower() not in allowed_extensions: |
|
|
|
raise UnsupportedFileTypeError() |
|
|
|
|