Co-authored-by: jyong <jyong@dify.ai> Co-authored-by: StyleZhang <jasonapring2015@outlook.com>tags/0.5.7
| SSRF_PROXY_HTTP_URL= | SSRF_PROXY_HTTP_URL= | ||||
| SSRF_PROXY_HTTPS_URL= | SSRF_PROXY_HTTPS_URL= | ||||
| BATCH_UPLOAD_LIMIT=10 |
| 'BILLING_ENABLED': 'False', | 'BILLING_ENABLED': 'False', | ||||
| 'CAN_REPLACE_LOGO': 'False', | 'CAN_REPLACE_LOGO': 'False', | ||||
| 'ETL_TYPE': 'dify', | 'ETL_TYPE': 'dify', | ||||
| 'BATCH_UPLOAD_LIMIT': 20 | |||||
| } | } | ||||
| self.BILLING_ENABLED = get_bool_env('BILLING_ENABLED') | self.BILLING_ENABLED = get_bool_env('BILLING_ENABLED') | ||||
| self.CAN_REPLACE_LOGO = get_bool_env('CAN_REPLACE_LOGO') | self.CAN_REPLACE_LOGO = get_bool_env('CAN_REPLACE_LOGO') | ||||
| self.BATCH_UPLOAD_LIMIT = get_env('BATCH_UPLOAD_LIMIT') | |||||
| class CloudEditionConfig(Config): | class CloudEditionConfig(Config): | ||||
| from models.dataset import Document as DatasetDocument | from models.dataset import Document as DatasetDocument | ||||
| from models.model import UploadFile | from models.model import UploadFile | ||||
| from models.source import DataSourceBinding | from models.source import DataSourceBinding | ||||
| from services.feature_service import FeatureService | |||||
| class IndexingRunner: | class IndexingRunner: | ||||
| """ | """ | ||||
| Estimate the indexing for the document. | Estimate the indexing for the document. | ||||
| """ | """ | ||||
| # check document limit | |||||
| features = FeatureService.get_features(tenant_id) | |||||
| if features.billing.enabled: | |||||
| count = len(file_details) | |||||
| batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT']) | |||||
| if count > batch_upload_limit: | |||||
| raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.") | |||||
| embedding_model_instance = None | embedding_model_instance = None | ||||
| if dataset_id: | if dataset_id: | ||||
| dataset = Dataset.query.filter_by( | dataset = Dataset.query.filter_by( | ||||
| """ | """ | ||||
| Estimate the indexing for the document. | Estimate the indexing for the document. | ||||
| """ | """ | ||||
| # check document limit | |||||
| features = FeatureService.get_features(tenant_id) | |||||
| if features.billing.enabled: | |||||
| count = len(notion_info_list) | |||||
| batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT']) | |||||
| if count > batch_upload_limit: | |||||
| raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.") | |||||
| embedding_model_instance = None | embedding_model_instance = None | ||||
| if dataset_id: | if dataset_id: | ||||
| dataset = Dataset.query.filter_by( | dataset = Dataset.query.filter_by( |
| from extensions.ext_database import db | from extensions.ext_database import db | ||||
| from extensions.ext_redis import redis_client | from extensions.ext_redis import redis_client | ||||
| from models.model import App, AppAnnotationHitHistory, AppAnnotationSetting, Message, MessageAnnotation | from models.model import App, AppAnnotationHitHistory, AppAnnotationSetting, Message, MessageAnnotation | ||||
| from services.feature_service import FeatureService | |||||
| from tasks.annotation.add_annotation_to_index_task import add_annotation_to_index_task | from tasks.annotation.add_annotation_to_index_task import add_annotation_to_index_task | ||||
| from tasks.annotation.batch_import_annotations_task import batch_import_annotations_task | from tasks.annotation.batch_import_annotations_task import batch_import_annotations_task | ||||
| from tasks.annotation.delete_annotation_index_task import delete_annotation_index_task | from tasks.annotation.delete_annotation_index_task import delete_annotation_index_task | ||||
| result.append(content) | result.append(content) | ||||
| if len(result) == 0: | if len(result) == 0: | ||||
| raise ValueError("The CSV file is empty.") | raise ValueError("The CSV file is empty.") | ||||
| # check annotation limit | |||||
| features = FeatureService.get_features(current_user.current_tenant_id) | |||||
| if features.billing.enabled: | |||||
| annotation_quota_limit = features.annotation_quota_limit | |||||
| if annotation_quota_limit.limit < len(result) + annotation_quota_limit.size: | |||||
| raise ValueError("The number of annotations exceeds the limit of your subscription.") | |||||
| # async job | # async job | ||||
| job_id = str(uuid.uuid4()) | job_id = str(uuid.uuid4()) | ||||
| indexing_cache_key = 'app_annotation_batch_import_{}'.format(str(job_id)) | indexing_cache_key = 'app_annotation_batch_import_{}'.format(str(job_id)) |
| from services.errors.dataset import DatasetNameDuplicateError | from services.errors.dataset import DatasetNameDuplicateError | ||||
| from services.errors.document import DocumentIndexingError | from services.errors.document import DocumentIndexingError | ||||
| from services.errors.file import FileNotExistsError | from services.errors.file import FileNotExistsError | ||||
| from services.feature_service import FeatureService | |||||
| from services.vector_service import VectorService | from services.vector_service import VectorService | ||||
| from tasks.clean_notion_document_task import clean_notion_document_task | from tasks.clean_notion_document_task import clean_notion_document_task | ||||
| from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task | from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task | ||||
| created_from: str = 'web'): | created_from: str = 'web'): | ||||
| # check document limit | # check document limit | ||||
| if current_app.config['EDITION'] == 'CLOUD': | |||||
| features = FeatureService.get_features(current_user.current_tenant_id) | |||||
| if features.billing.enabled: | |||||
| if 'original_document_id' not in document_data or not document_data['original_document_id']: | if 'original_document_id' not in document_data or not document_data['original_document_id']: | ||||
| count = 0 | count = 0 | ||||
| if document_data["data_source"]["type"] == "upload_file": | if document_data["data_source"]["type"] == "upload_file": | ||||
| notion_info_list = document_data["data_source"]['info_list']['notion_info_list'] | notion_info_list = document_data["data_source"]['info_list']['notion_info_list'] | ||||
| for notion_info in notion_info_list: | for notion_info in notion_info_list: | ||||
| count = count + len(notion_info['pages']) | count = count + len(notion_info['pages']) | ||||
| batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT']) | |||||
| if count > batch_upload_limit: | |||||
| raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.") | |||||
| # if dataset is empty, update dataset data_source_type | # if dataset is empty, update dataset data_source_type | ||||
| if not dataset.data_source_type: | if not dataset.data_source_type: | ||||
| dataset.data_source_type = document_data["data_source"]["type"] | dataset.data_source_type = document_data["data_source"]["type"] | ||||
| @staticmethod | @staticmethod | ||||
| def save_document_without_dataset_id(tenant_id: str, document_data: dict, account: Account): | def save_document_without_dataset_id(tenant_id: str, document_data: dict, account: Account): | ||||
| count = 0 | |||||
| if document_data["data_source"]["type"] == "upload_file": | |||||
| upload_file_list = document_data["data_source"]["info_list"]['file_info_list']['file_ids'] | |||||
| count = len(upload_file_list) | |||||
| elif document_data["data_source"]["type"] == "notion_import": | |||||
| notion_info_list = document_data["data_source"]['info_list']['notion_info_list'] | |||||
| for notion_info in notion_info_list: | |||||
| count = count + len(notion_info['pages']) | |||||
| features = FeatureService.get_features(current_user.current_tenant_id) | |||||
| if features.billing.enabled: | |||||
| count = 0 | |||||
| if document_data["data_source"]["type"] == "upload_file": | |||||
| upload_file_list = document_data["data_source"]["info_list"]['file_info_list']['file_ids'] | |||||
| count = len(upload_file_list) | |||||
| elif document_data["data_source"]["type"] == "notion_import": | |||||
| notion_info_list = document_data["data_source"]['info_list']['notion_info_list'] | |||||
| for notion_info in notion_info_list: | |||||
| count = count + len(notion_info['pages']) | |||||
| batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT']) | |||||
| if count > batch_upload_limit: | |||||
| raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.") | |||||
| embedding_model = None | embedding_model = None | ||||
| dataset_collection_binding_id = None | dataset_collection_binding_id = None | ||||
| segment.answer = args['answer'] | segment.answer = args['answer'] | ||||
| if 'keywords' in args and args['keywords']: | if 'keywords' in args and args['keywords']: | ||||
| segment.keywords = args['keywords'] | segment.keywords = args['keywords'] | ||||
| if'enabled' in args and args['enabled'] is not None: | |||||
| if 'enabled' in args and args['enabled'] is not None: | |||||
| segment.enabled = args['enabled'] | segment.enabled = args['enabled'] | ||||
| db.session.add(segment) | db.session.add(segment) | ||||
| db.session.commit() | db.session.commit() |
| IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg'] | IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg'] | ||||
| IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS]) | IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS]) | ||||
| ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv'] + IMAGE_EXTENSIONS | |||||
| ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv'] | |||||
| UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', | UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', | ||||
| 'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS | |||||
| 'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] | |||||
| PREVIEW_WORDS_LIMIT = 3000 | PREVIEW_WORDS_LIMIT = 3000 | ||||
| import click | import click | ||||
| from celery import shared_task | from celery import shared_task | ||||
| from flask import current_app | |||||
| from core.indexing_runner import DocumentIsPausedException, IndexingRunner | from core.indexing_runner import DocumentIsPausedException, IndexingRunner | ||||
| from extensions.ext_database import db | from extensions.ext_database import db | ||||
| from models.dataset import Document | |||||
| from models.dataset import Dataset, Document | |||||
| from services.feature_service import FeatureService | |||||
| @shared_task(queue='dataset') | @shared_task(queue='dataset') | ||||
| """ | """ | ||||
| documents = [] | documents = [] | ||||
| start_at = time.perf_counter() | start_at = time.perf_counter() | ||||
| dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first() | |||||
| # check document limit | |||||
| features = FeatureService.get_features(dataset.tenant_id) | |||||
| try: | |||||
| if features.billing.enabled: | |||||
| vector_space = features.vector_space | |||||
| count = len(document_ids) | |||||
| batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT']) | |||||
| if count > batch_upload_limit: | |||||
| raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.") | |||||
| if 0 < vector_space.limit <= vector_space.size: | |||||
| raise ValueError("Your total number of documents plus the number of uploads have over the limit of " | |||||
| "your subscription.") | |||||
| except Exception as e: | |||||
| for document_id in document_ids: | |||||
| document = db.session.query(Document).filter( | |||||
| Document.id == document_id, | |||||
| Document.dataset_id == dataset_id | |||||
| ).first() | |||||
| if document: | |||||
| document.indexing_status = 'error' | |||||
| document.error = str(e) | |||||
| document.stopped_at = datetime.datetime.utcnow() | |||||
| db.session.add(document) | |||||
| db.session.commit() | |||||
| return | |||||
| for document_id in document_ids: | for document_id in document_ids: | ||||
| logging.info(click.style('Start process document: {}'.format(document_id), fg='green')) | logging.info(click.style('Start process document: {}'.format(document_id), fg='green')) | ||||
| import I18n from '@/context/i18n' | import I18n from '@/context/i18n' | ||||
| import { LanguagesSupportedUnderscore, getModelRuntimeSupported } from '@/utils/language' | import { LanguagesSupportedUnderscore, getModelRuntimeSupported } from '@/utils/language' | ||||
| const FILES_NUMBER_LIMIT = 20 | |||||
| type IFileUploaderProps = { | type IFileUploaderProps = { | ||||
| fileList: FileItem[] | fileList: FileItem[] | ||||
| titleClassName?: string | titleClassName?: string | ||||
| if (!files.length) | if (!files.length) | ||||
| return false | return false | ||||
| if (files.length + fileList.length > FILES_NUMBER_LIMIT) { | |||||
| notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.filesNumber', { filesNumber: FILES_NUMBER_LIMIT }) }) | |||||
| return false | |||||
| } | |||||
| const preparedFiles = files.map((file, index) => ({ | const preparedFiles = files.map((file, index) => ({ | ||||
| fileID: `file${index}-${Date.now()}`, | fileID: `file${index}-${Date.now()}`, | ||||
| file, | file, | ||||
| prepareFileList(newFiles) | prepareFileList(newFiles) | ||||
| fileListRef.current = newFiles | fileListRef.current = newFiles | ||||
| uploadMultipleFiles(preparedFiles) | uploadMultipleFiles(preparedFiles) | ||||
| }, [prepareFileList, uploadMultipleFiles]) | |||||
| }, [prepareFileList, uploadMultipleFiles, notify, t, fileList]) | |||||
| const handleDragEnter = (e: DragEvent) => { | const handleDragEnter = (e: DragEvent) => { | ||||
| e.preventDefault() | e.preventDefault() |
| typeError: 'File type not supported', | typeError: 'File type not supported', | ||||
| size: 'File too large. Maximum is {{size}}MB', | size: 'File too large. Maximum is {{size}}MB', | ||||
| count: 'Multiple files not supported', | count: 'Multiple files not supported', | ||||
| filesNumber: 'You have reached the batch upload limit of {{filesNumber}}.', | |||||
| }, | }, | ||||
| cancel: 'Cancel', | cancel: 'Cancel', | ||||
| change: 'Change', | change: 'Change', |
| typeError: 'Tipo de arquivo não suportado', | typeError: 'Tipo de arquivo não suportado', | ||||
| size: 'Arquivo muito grande. Máximo é {{size}}MB', | size: 'Arquivo muito grande. Máximo é {{size}}MB', | ||||
| count: 'Vários arquivos não suportados', | count: 'Vários arquivos não suportados', | ||||
| filesNumber: 'Limite de upload em massa {{filesNumber}}.', | |||||
| }, | }, | ||||
| cancel: 'Cancelar', | cancel: 'Cancelar', | ||||
| change: 'Alterar', | change: 'Alterar', |
| typeError: 'Тип файлу не підтримується', | typeError: 'Тип файлу не підтримується', | ||||
| size: 'Файл занадто великий. Максимум – {{size}} МБ', | size: 'Файл занадто великий. Максимум – {{size}} МБ', | ||||
| count: 'Не підтримується завантаження кількох файлів', | count: 'Не підтримується завантаження кількох файлів', | ||||
| filesNumber: 'Ліміт масового завантаження {{filesNumber}}.', | |||||
| }, | }, | ||||
| cancel: 'Скасувати', | cancel: 'Скасувати', | ||||
| change: 'Змінити', | change: 'Змінити', |
| typeError: '文件类型不支持', | typeError: '文件类型不支持', | ||||
| size: '文件太大了,不能超过 {{size}}MB', | size: '文件太大了,不能超过 {{size}}MB', | ||||
| count: '暂不支持多个文件', | count: '暂不支持多个文件', | ||||
| filesNumber: '批量上传限制 {{filesNumber}}。', | |||||
| }, | }, | ||||
| cancel: '取消', | cancel: '取消', | ||||
| change: '更改文件', | change: '更改文件', |