Co-authored-by: jyong <jyong@dify.ai> Co-authored-by: StyleZhang <jasonapring2015@outlook.com>tags/0.5.7
| @@ -130,3 +130,5 @@ UNSTRUCTURED_API_URL= | |||
| SSRF_PROXY_HTTP_URL= | |||
| SSRF_PROXY_HTTPS_URL= | |||
| BATCH_UPLOAD_LIMIT=10 | |||
| @@ -56,6 +56,7 @@ DEFAULTS = { | |||
| 'BILLING_ENABLED': 'False', | |||
| 'CAN_REPLACE_LOGO': 'False', | |||
| 'ETL_TYPE': 'dify', | |||
| 'BATCH_UPLOAD_LIMIT': 20 | |||
| } | |||
| @@ -285,6 +286,8 @@ class Config: | |||
| self.BILLING_ENABLED = get_bool_env('BILLING_ENABLED') | |||
| self.CAN_REPLACE_LOGO = get_bool_env('CAN_REPLACE_LOGO') | |||
| self.BATCH_UPLOAD_LIMIT = get_env('BATCH_UPLOAD_LIMIT') | |||
| class CloudEditionConfig(Config): | |||
| @@ -32,6 +32,7 @@ from models.dataset import Dataset, DatasetProcessRule, DocumentSegment | |||
| from models.dataset import Document as DatasetDocument | |||
| from models.model import UploadFile | |||
| from models.source import DataSourceBinding | |||
| from services.feature_service import FeatureService | |||
| class IndexingRunner: | |||
| @@ -244,6 +245,14 @@ class IndexingRunner: | |||
| """ | |||
| Estimate the indexing for the document. | |||
| """ | |||
| # check document limit | |||
| features = FeatureService.get_features(tenant_id) | |||
| if features.billing.enabled: | |||
| count = len(file_details) | |||
| batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT']) | |||
| if count > batch_upload_limit: | |||
| raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.") | |||
| embedding_model_instance = None | |||
| if dataset_id: | |||
| dataset = Dataset.query.filter_by( | |||
| @@ -361,6 +370,14 @@ class IndexingRunner: | |||
| """ | |||
| Estimate the indexing for the document. | |||
| """ | |||
| # check document limit | |||
| features = FeatureService.get_features(tenant_id) | |||
| if features.billing.enabled: | |||
| count = len(notion_info_list) | |||
| batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT']) | |||
| if count > batch_upload_limit: | |||
| raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.") | |||
| embedding_model_instance = None | |||
| if dataset_id: | |||
| dataset = Dataset.query.filter_by( | |||
| @@ -10,6 +10,7 @@ from werkzeug.exceptions import NotFound | |||
| from extensions.ext_database import db | |||
| from extensions.ext_redis import redis_client | |||
| from models.model import App, AppAnnotationHitHistory, AppAnnotationSetting, Message, MessageAnnotation | |||
| from services.feature_service import FeatureService | |||
| from tasks.annotation.add_annotation_to_index_task import add_annotation_to_index_task | |||
| from tasks.annotation.batch_import_annotations_task import batch_import_annotations_task | |||
| from tasks.annotation.delete_annotation_index_task import delete_annotation_index_task | |||
| @@ -284,6 +285,12 @@ class AppAnnotationService: | |||
| result.append(content) | |||
| if len(result) == 0: | |||
| raise ValueError("The CSV file is empty.") | |||
| # check annotation limit | |||
| features = FeatureService.get_features(current_user.current_tenant_id) | |||
| if features.billing.enabled: | |||
| annotation_quota_limit = features.annotation_quota_limit | |||
| if annotation_quota_limit.limit < len(result) + annotation_quota_limit.size: | |||
| raise ValueError("The number of annotations exceeds the limit of your subscription.") | |||
| # async job | |||
| job_id = str(uuid.uuid4()) | |||
| indexing_cache_key = 'app_annotation_batch_import_{}'.format(str(job_id)) | |||
| @@ -36,6 +36,7 @@ from services.errors.account import NoPermissionError | |||
| from services.errors.dataset import DatasetNameDuplicateError | |||
| from services.errors.document import DocumentIndexingError | |||
| from services.errors.file import FileNotExistsError | |||
| from services.feature_service import FeatureService | |||
| from services.vector_service import VectorService | |||
| from tasks.clean_notion_document_task import clean_notion_document_task | |||
| from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task | |||
| @@ -452,7 +453,9 @@ class DocumentService: | |||
| created_from: str = 'web'): | |||
| # check document limit | |||
| if current_app.config['EDITION'] == 'CLOUD': | |||
| features = FeatureService.get_features(current_user.current_tenant_id) | |||
| if features.billing.enabled: | |||
| if 'original_document_id' not in document_data or not document_data['original_document_id']: | |||
| count = 0 | |||
| if document_data["data_source"]["type"] == "upload_file": | |||
| @@ -462,6 +465,9 @@ class DocumentService: | |||
| notion_info_list = document_data["data_source"]['info_list']['notion_info_list'] | |||
| for notion_info in notion_info_list: | |||
| count = count + len(notion_info['pages']) | |||
| batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT']) | |||
| if count > batch_upload_limit: | |||
| raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.") | |||
| # if dataset is empty, update dataset data_source_type | |||
| if not dataset.data_source_type: | |||
| dataset.data_source_type = document_data["data_source"]["type"] | |||
| @@ -741,14 +747,20 @@ class DocumentService: | |||
| @staticmethod | |||
| def save_document_without_dataset_id(tenant_id: str, document_data: dict, account: Account): | |||
| count = 0 | |||
| if document_data["data_source"]["type"] == "upload_file": | |||
| upload_file_list = document_data["data_source"]["info_list"]['file_info_list']['file_ids'] | |||
| count = len(upload_file_list) | |||
| elif document_data["data_source"]["type"] == "notion_import": | |||
| notion_info_list = document_data["data_source"]['info_list']['notion_info_list'] | |||
| for notion_info in notion_info_list: | |||
| count = count + len(notion_info['pages']) | |||
| features = FeatureService.get_features(current_user.current_tenant_id) | |||
| if features.billing.enabled: | |||
| count = 0 | |||
| if document_data["data_source"]["type"] == "upload_file": | |||
| upload_file_list = document_data["data_source"]["info_list"]['file_info_list']['file_ids'] | |||
| count = len(upload_file_list) | |||
| elif document_data["data_source"]["type"] == "notion_import": | |||
| notion_info_list = document_data["data_source"]['info_list']['notion_info_list'] | |||
| for notion_info in notion_info_list: | |||
| count = count + len(notion_info['pages']) | |||
| batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT']) | |||
| if count > batch_upload_limit: | |||
| raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.") | |||
| embedding_model = None | |||
| dataset_collection_binding_id = None | |||
| @@ -1139,7 +1151,7 @@ class SegmentService: | |||
| segment.answer = args['answer'] | |||
| if 'keywords' in args and args['keywords']: | |||
| segment.keywords = args['keywords'] | |||
| if'enabled' in args and args['enabled'] is not None: | |||
| if 'enabled' in args and args['enabled'] is not None: | |||
| segment.enabled = args['enabled'] | |||
| db.session.add(segment) | |||
| db.session.commit() | |||
| @@ -20,9 +20,9 @@ from services.errors.file import FileTooLargeError, UnsupportedFileTypeError | |||
| IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg'] | |||
| IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS]) | |||
| ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv'] + IMAGE_EXTENSIONS | |||
| ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv'] | |||
| UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', | |||
| 'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS | |||
| 'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] | |||
| PREVIEW_WORDS_LIMIT = 3000 | |||
| @@ -4,10 +4,12 @@ import time | |||
| import click | |||
| from celery import shared_task | |||
| from flask import current_app | |||
| from core.indexing_runner import DocumentIsPausedException, IndexingRunner | |||
| from extensions.ext_database import db | |||
| from models.dataset import Document | |||
| from models.dataset import Dataset, Document | |||
| from services.feature_service import FeatureService | |||
| @shared_task(queue='dataset') | |||
| @@ -21,6 +23,35 @@ def document_indexing_task(dataset_id: str, document_ids: list): | |||
| """ | |||
| documents = [] | |||
| start_at = time.perf_counter() | |||
| dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first() | |||
| # check document limit | |||
| features = FeatureService.get_features(dataset.tenant_id) | |||
| try: | |||
| if features.billing.enabled: | |||
| vector_space = features.vector_space | |||
| count = len(document_ids) | |||
| batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT']) | |||
| if count > batch_upload_limit: | |||
| raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.") | |||
| if 0 < vector_space.limit <= vector_space.size: | |||
| raise ValueError("Your total number of documents plus the number of uploads have over the limit of " | |||
| "your subscription.") | |||
| except Exception as e: | |||
| for document_id in document_ids: | |||
| document = db.session.query(Document).filter( | |||
| Document.id == document_id, | |||
| Document.dataset_id == dataset_id | |||
| ).first() | |||
| if document: | |||
| document.indexing_status = 'error' | |||
| document.error = str(e) | |||
| document.stopped_at = datetime.datetime.utcnow() | |||
| db.session.add(document) | |||
| db.session.commit() | |||
| return | |||
| for document_id in document_ids: | |||
| logging.info(click.style('Start process document: {}'.format(document_id), fg='green')) | |||
| @@ -14,6 +14,8 @@ import { fetchSupportFileTypes } from '@/service/datasets' | |||
| import I18n from '@/context/i18n' | |||
| import { LanguagesSupportedUnderscore, getModelRuntimeSupported } from '@/utils/language' | |||
| const FILES_NUMBER_LIMIT = 20 | |||
| type IFileUploaderProps = { | |||
| fileList: FileItem[] | |||
| titleClassName?: string | |||
| @@ -176,6 +178,11 @@ const FileUploader = ({ | |||
| if (!files.length) | |||
| return false | |||
| if (files.length + fileList.length > FILES_NUMBER_LIMIT) { | |||
| notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.filesNumber', { filesNumber: FILES_NUMBER_LIMIT }) }) | |||
| return false | |||
| } | |||
| const preparedFiles = files.map((file, index) => ({ | |||
| fileID: `file${index}-${Date.now()}`, | |||
| file, | |||
| @@ -185,7 +192,7 @@ const FileUploader = ({ | |||
| prepareFileList(newFiles) | |||
| fileListRef.current = newFiles | |||
| uploadMultipleFiles(preparedFiles) | |||
| }, [prepareFileList, uploadMultipleFiles]) | |||
| }, [prepareFileList, uploadMultipleFiles, notify, t, fileList]) | |||
| const handleDragEnter = (e: DragEvent) => { | |||
| e.preventDefault() | |||
| @@ -28,6 +28,7 @@ const translation = { | |||
| typeError: 'File type not supported', | |||
| size: 'File too large. Maximum is {{size}}MB', | |||
| count: 'Multiple files not supported', | |||
| filesNumber: 'You have reached the batch upload limit of {{filesNumber}}.', | |||
| }, | |||
| cancel: 'Cancel', | |||
| change: 'Change', | |||
| @@ -28,6 +28,7 @@ const translation = { | |||
| typeError: 'Tipo de arquivo não suportado', | |||
| size: 'Arquivo muito grande. Máximo é {{size}}MB', | |||
| count: 'Vários arquivos não suportados', | |||
| filesNumber: 'Limite de upload em massa {{filesNumber}}.', | |||
| }, | |||
| cancel: 'Cancelar', | |||
| change: 'Alterar', | |||
| @@ -28,6 +28,7 @@ const translation = { | |||
| typeError: 'Тип файлу не підтримується', | |||
| size: 'Файл занадто великий. Максимум – {{size}} МБ', | |||
| count: 'Не підтримується завантаження кількох файлів', | |||
| filesNumber: 'Ліміт масового завантаження {{filesNumber}}.', | |||
| }, | |||
| cancel: 'Скасувати', | |||
| change: 'Змінити', | |||
| @@ -28,6 +28,7 @@ const translation = { | |||
| typeError: '文件类型不支持', | |||
| size: '文件太大了,不能超过 {{size}}MB', | |||
| count: '暂不支持多个文件', | |||
| filesNumber: '批量上传限制 {{filesNumber}}。', | |||
| }, | |||
| cancel: '取消', | |||
| change: '更改文件', | |||