Ver código fonte

document limit (#999)

Co-authored-by: jyong <jyong@dify.ai>
tags/0.3.19
Jyong 2 anos atrás
pai
commit
5397799aac
Nenhuma conta vinculada ao e-mail do autor do commit

+ 37
- 4
api/controllers/console/datasets/datasets_document.py Ver arquivo

from datetime import datetime from datetime import datetime
from typing import List from typing import List


from flask import request
from flask import request, current_app
from flask_login import current_user from flask_login import current_user
from core.login.login import login_required from core.login.login import login_required
from flask_restful import Resource, fields, marshal, marshal_with, reqparse from flask_restful import Resource, fields, marshal, marshal_with, reqparse
parser.add_argument('duplicate', type=bool, nullable=False, location='json') parser.add_argument('duplicate', type=bool, nullable=False, location='json')
parser.add_argument('original_document_id', type=str, required=False, location='json') parser.add_argument('original_document_id', type=str, required=False, location='json')
parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json') parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json')
parser.add_argument('doc_language', type=str, default='English', required=False, nullable=False, location='json')
parser.add_argument('doc_language', type=str, default='English', required=False, nullable=False,
location='json')
args = parser.parse_args() args = parser.parse_args()


if not dataset.indexing_technique and not args['indexing_technique']: if not dataset.indexing_technique and not args['indexing_technique']:
parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json') parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json')
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json') parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json') parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json')
parser.add_argument('doc_language', type=str, default='English', required=False, nullable=False, location='json')
parser.add_argument('doc_language', type=str, default='English', required=False, nullable=False,
location='json')
args = parser.parse_args() args = parser.parse_args()


try: try:
indexing_runner = IndexingRunner() indexing_runner = IndexingRunner()
try: try:
response = indexing_runner.file_indexing_estimate(current_user.current_tenant_id, file_details, response = indexing_runner.file_indexing_estimate(current_user.current_tenant_id, file_details,
data_process_rule_dict, None, dataset_id)
data_process_rule_dict, None, dataset_id)
except LLMBadRequestError: except LLMBadRequestError:
raise ProviderNotInitializeError( raise ProviderNotInitializeError(
f"No Embedding Model available. Please configure a valid provider " f"No Embedding Model available. Please configure a valid provider "
if not document.archived: if not document.archived:
raise InvalidActionError('Document is not archived.') raise InvalidActionError('Document is not archived.')


# check document limit
if current_app.config['EDITION'] == 'CLOUD':
documents_count = DocumentService.get_tenant_documents_count()
total_count = documents_count + 1
tenant_document_count = int(current_app.config['TENANT_DOCUMENT_COUNT'])
if total_count > tenant_document_count:
raise ValueError(f"All your documents have overed limit {tenant_document_count}.")

document.archived = False document.archived = False
document.archived_at = None document.archived_at = None
document.archived_by = None document.archived_by = None




class DocumentPauseApi(DocumentResource): class DocumentPauseApi(DocumentResource):

@setup_required
@login_required
@account_initialization_required
def patch(self, dataset_id, document_id): def patch(self, dataset_id, document_id):
"""pause document.""" """pause document."""
dataset_id = str(dataset_id) dataset_id = str(dataset_id)




class DocumentRecoverApi(DocumentResource): class DocumentRecoverApi(DocumentResource):
@setup_required
@login_required
@account_initialization_required
def patch(self, dataset_id, document_id): def patch(self, dataset_id, document_id):
"""recover document.""" """recover document."""
dataset_id = str(dataset_id) dataset_id = str(dataset_id)
return {'result': 'success'}, 204 return {'result': 'success'}, 204




class DocumentLimitApi(DocumentResource):
@setup_required
@login_required
@account_initialization_required
def get(self):
"""get document limit"""
documents_count = DocumentService.get_tenant_documents_count()
tenant_document_count = int(current_app.config['TENANT_DOCUMENT_COUNT'])

return {
'documents_count': documents_count,
'documents_limit': tenant_document_count
}, 200


api.add_resource(GetProcessRuleApi, '/datasets/process-rule') api.add_resource(GetProcessRuleApi, '/datasets/process-rule')
api.add_resource(DatasetDocumentListApi, api.add_resource(DatasetDocumentListApi,
'/datasets/<uuid:dataset_id>/documents') '/datasets/<uuid:dataset_id>/documents')
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/status/<string:action>') '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/status/<string:action>')
api.add_resource(DocumentPauseApi, '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/pause') api.add_resource(DocumentPauseApi, '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/pause')
api.add_resource(DocumentRecoverApi, '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/resume') api.add_resource(DocumentRecoverApi, '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/resume')
api.add_resource(DocumentLimitApi, '/datasets/limit')

+ 20
- 3
api/services/dataset_service.py Ver arquivo

def save_document_with_dataset_id(dataset: Dataset, document_data: dict, def save_document_with_dataset_id(dataset: Dataset, document_data: dict,
account: Account, dataset_process_rule: Optional[DatasetProcessRule] = None, account: Account, dataset_process_rule: Optional[DatasetProcessRule] = None,
created_from: str = 'web'): created_from: str = 'web'):

# check document limit # check document limit
if current_app.config['EDITION'] == 'CLOUD': if current_app.config['EDITION'] == 'CLOUD':
count = 0
if document_data["data_source"]["type"] == "upload_file":
upload_file_list = document_data["data_source"]["info_list"]['file_info_list']['file_ids']
count = len(upload_file_list)
elif document_data["data_source"]["type"] == "notion_import":
notion_page_list = document_data["data_source"]['info_list']['notion_info_list']['pages']
count = len(notion_page_list)
documents_count = DocumentService.get_tenant_documents_count() documents_count = DocumentService.get_tenant_documents_count()
total_count = documents_count + count
tenant_document_count = int(current_app.config['TENANT_DOCUMENT_COUNT']) tenant_document_count = int(current_app.config['TENANT_DOCUMENT_COUNT'])
if documents_count > tenant_document_count:
if total_count > tenant_document_count:
raise ValueError(f"over document limit {tenant_document_count}.") raise ValueError(f"over document limit {tenant_document_count}.")
# if dataset is empty, update dataset data_source_type # if dataset is empty, update dataset data_source_type
if not dataset.data_source_type: if not dataset.data_source_type:


@staticmethod @staticmethod
def save_document_without_dataset_id(tenant_id: str, document_data: dict, account: Account): def save_document_without_dataset_id(tenant_id: str, document_data: dict, account: Account):
count = 0
if document_data["data_source"]["type"] == "upload_file":
upload_file_list = document_data["data_source"]["info_list"]['file_info_list']['file_ids']
count = len(upload_file_list)
elif document_data["data_source"]["type"] == "notion_import":
notion_page_list = document_data["data_source"]['info_list']['notion_info_list']['pages']
count = len(notion_page_list)
# check document limit # check document limit
if current_app.config['EDITION'] == 'CLOUD': if current_app.config['EDITION'] == 'CLOUD':
documents_count = DocumentService.get_tenant_documents_count() documents_count = DocumentService.get_tenant_documents_count()
total_count = documents_count + count
tenant_document_count = int(current_app.config['TENANT_DOCUMENT_COUNT']) tenant_document_count = int(current_app.config['TENANT_DOCUMENT_COUNT'])
if documents_count > tenant_document_count:
raise ValueError(f"over document limit {tenant_document_count}.")
if total_count > tenant_document_count:
raise ValueError(f"All your documents have overed limit {tenant_document_count}.")
embedding_model = ModelFactory.get_embedding_model( embedding_model = ModelFactory.get_embedding_model(
tenant_id=tenant_id tenant_id=tenant_id
) )

Carregando…
Cancelar
Salvar