| @@ -43,7 +43,6 @@ from core.model_runtime.errors.invoke import InvokeAuthorizationError | |||
| from core.plugin.impl.exc import PluginDaemonClientSideError | |||
| from core.rag.extractor.entity.extract_setting import ExtractSetting | |||
| from extensions.ext_database import db | |||
| from extensions.ext_redis import redis_client | |||
| from fields.document_fields import ( | |||
| dataset_and_document_fields, | |||
| document_fields, | |||
| @@ -54,8 +53,6 @@ from libs.login import login_required | |||
| from models import Dataset, DatasetProcessRule, Document, DocumentSegment, UploadFile | |||
| from services.dataset_service import DatasetService, DocumentService | |||
| from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig | |||
| from tasks.add_document_to_index_task import add_document_to_index_task | |||
| from tasks.remove_document_from_index_task import remove_document_from_index_task | |||
| class DocumentResource(Resource): | |||
| @@ -862,77 +859,16 @@ class DocumentStatusApi(DocumentResource): | |||
| DatasetService.check_dataset_permission(dataset, current_user) | |||
| document_ids = request.args.getlist("document_id") | |||
| for document_id in document_ids: | |||
| document = self.get_document(dataset_id, document_id) | |||
| indexing_cache_key = "document_{}_indexing".format(document.id) | |||
| cache_result = redis_client.get(indexing_cache_key) | |||
| if cache_result is not None: | |||
| raise InvalidActionError(f"Document:{document.name} is being indexed, please try again later") | |||
| if action == "enable": | |||
| if document.enabled: | |||
| continue | |||
| document.enabled = True | |||
| document.disabled_at = None | |||
| document.disabled_by = None | |||
| document.updated_at = datetime.now(UTC).replace(tzinfo=None) | |||
| db.session.commit() | |||
| # Set cache to prevent indexing the same document multiple times | |||
| redis_client.setex(indexing_cache_key, 600, 1) | |||
| add_document_to_index_task.delay(document_id) | |||
| elif action == "disable": | |||
| if not document.completed_at or document.indexing_status != "completed": | |||
| raise InvalidActionError(f"Document: {document.name} is not completed.") | |||
| if not document.enabled: | |||
| continue | |||
| document.enabled = False | |||
| document.disabled_at = datetime.now(UTC).replace(tzinfo=None) | |||
| document.disabled_by = current_user.id | |||
| document.updated_at = datetime.now(UTC).replace(tzinfo=None) | |||
| db.session.commit() | |||
| # Set cache to prevent indexing the same document multiple times | |||
| redis_client.setex(indexing_cache_key, 600, 1) | |||
| remove_document_from_index_task.delay(document_id) | |||
| elif action == "archive": | |||
| if document.archived: | |||
| continue | |||
| document.archived = True | |||
| document.archived_at = datetime.now(UTC).replace(tzinfo=None) | |||
| document.archived_by = current_user.id | |||
| document.updated_at = datetime.now(UTC).replace(tzinfo=None) | |||
| db.session.commit() | |||
| if document.enabled: | |||
| # Set cache to prevent indexing the same document multiple times | |||
| redis_client.setex(indexing_cache_key, 600, 1) | |||
| remove_document_from_index_task.delay(document_id) | |||
| elif action == "un_archive": | |||
| if not document.archived: | |||
| continue | |||
| document.archived = False | |||
| document.archived_at = None | |||
| document.archived_by = None | |||
| document.updated_at = datetime.now(UTC).replace(tzinfo=None) | |||
| db.session.commit() | |||
| # Set cache to prevent indexing the same document multiple times | |||
| redis_client.setex(indexing_cache_key, 600, 1) | |||
| add_document_to_index_task.delay(document_id) | |||
| else: | |||
| raise InvalidActionError() | |||
| try: | |||
| DocumentService.batch_update_document_status(dataset, document_ids, action, current_user) | |||
| except services.errors.document.DocumentIndexingError as e: | |||
| raise InvalidActionError(str(e)) | |||
| except ValueError as e: | |||
| raise InvalidActionError(str(e)) | |||
| except NotFound as e: | |||
| raise NotFound(str(e)) | |||
| return {"result": "success"}, 200 | |||
| @@ -4,7 +4,7 @@ from werkzeug.exceptions import Forbidden, NotFound | |||
| import services.dataset_service | |||
| from controllers.service_api import api | |||
| from controllers.service_api.dataset.error import DatasetInUseError, DatasetNameDuplicateError | |||
| from controllers.service_api.dataset.error import DatasetInUseError, DatasetNameDuplicateError, InvalidActionError | |||
| from controllers.service_api.wraps import ( | |||
| DatasetApiResource, | |||
| cloud_edition_billing_rate_limit_check, | |||
| @@ -17,7 +17,7 @@ from fields.dataset_fields import dataset_detail_fields | |||
| from fields.tag_fields import tag_fields | |||
| from libs.login import current_user | |||
| from models.dataset import Dataset, DatasetPermissionEnum | |||
| from services.dataset_service import DatasetPermissionService, DatasetService | |||
| from services.dataset_service import DatasetPermissionService, DatasetService, DocumentService | |||
| from services.entities.knowledge_entities.knowledge_entities import RetrievalModel | |||
| from services.tag_service import TagService | |||
| @@ -329,6 +329,56 @@ class DatasetApi(DatasetApiResource): | |||
| raise DatasetInUseError() | |||
| class DocumentStatusApi(DatasetApiResource): | |||
| """Resource for batch document status operations.""" | |||
| def patch(self, tenant_id, dataset_id, action): | |||
| """ | |||
| Batch update document status. | |||
| Args: | |||
| tenant_id: tenant id | |||
| dataset_id: dataset id | |||
| action: action to perform (enable, disable, archive, un_archive) | |||
| Returns: | |||
| dict: A dictionary with a key 'result' and a value 'success' | |||
| int: HTTP status code 200 indicating that the operation was successful. | |||
| Raises: | |||
| NotFound: If the dataset with the given ID does not exist. | |||
| Forbidden: If the user does not have permission. | |||
| InvalidActionError: If the action is invalid or cannot be performed. | |||
| """ | |||
| dataset_id_str = str(dataset_id) | |||
| dataset = DatasetService.get_dataset(dataset_id_str) | |||
| if dataset is None: | |||
| raise NotFound("Dataset not found.") | |||
| # Check user's permission | |||
| try: | |||
| DatasetService.check_dataset_permission(dataset, current_user) | |||
| except services.errors.account.NoPermissionError as e: | |||
| raise Forbidden(str(e)) | |||
| # Check dataset model setting | |||
| DatasetService.check_dataset_model_setting(dataset) | |||
| # Get document IDs from request body | |||
| data = request.get_json() | |||
| document_ids = data.get("document_ids", []) | |||
| try: | |||
| DocumentService.batch_update_document_status(dataset, document_ids, action, current_user) | |||
| except services.errors.document.DocumentIndexingError as e: | |||
| raise InvalidActionError(str(e)) | |||
| except ValueError as e: | |||
| raise InvalidActionError(str(e)) | |||
| return {"result": "success"}, 200 | |||
| class DatasetTagsApi(DatasetApiResource): | |||
| @validate_dataset_token | |||
| @marshal_with(tag_fields) | |||
| @@ -457,6 +507,7 @@ class DatasetTagsBindingStatusApi(DatasetApiResource): | |||
| api.add_resource(DatasetListApi, "/datasets") | |||
| api.add_resource(DatasetApi, "/datasets/<uuid:dataset_id>") | |||
| api.add_resource(DocumentStatusApi, "/datasets/<uuid:dataset_id>/documents/status/<string:action>") | |||
| api.add_resource(DatasetTagsApi, "/datasets/tags") | |||
| api.add_resource(DatasetTagBindingApi, "/datasets/tags/binding") | |||
| api.add_resource(DatasetTagUnbindingApi, "/datasets/tags/unbinding") | |||
| @@ -59,6 +59,7 @@ from services.external_knowledge_service import ExternalDatasetService | |||
| from services.feature_service import FeatureModel, FeatureService | |||
| from services.tag_service import TagService | |||
| from services.vector_service import VectorService | |||
| from tasks.add_document_to_index_task import add_document_to_index_task | |||
| from tasks.batch_clean_document_task import batch_clean_document_task | |||
| from tasks.clean_notion_document_task import clean_notion_document_task | |||
| from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task | |||
| @@ -70,6 +71,7 @@ from tasks.document_indexing_update_task import document_indexing_update_task | |||
| from tasks.duplicate_document_indexing_task import duplicate_document_indexing_task | |||
| from tasks.enable_segments_to_index_task import enable_segments_to_index_task | |||
| from tasks.recover_document_indexing_task import recover_document_indexing_task | |||
| from tasks.remove_document_from_index_task import remove_document_from_index_task | |||
| from tasks.retry_document_indexing_task import retry_document_indexing_task | |||
| from tasks.sync_website_document_indexing_task import sync_website_document_indexing_task | |||
| @@ -434,7 +436,7 @@ class DatasetService: | |||
| raise ValueError(ex.description) | |||
| filtered_data["updated_by"] = user.id | |||
| filtered_data["updated_at"] = datetime.datetime.now() | |||
| filtered_data["updated_at"] = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) | |||
| # update Retrieval model | |||
| filtered_data["retrieval_model"] = data["retrieval_model"] | |||
| @@ -1608,6 +1610,99 @@ class DocumentService: | |||
| if not isinstance(args["process_rule"]["rules"]["segmentation"]["max_tokens"], int): | |||
| raise ValueError("Process rule segmentation max_tokens is invalid") | |||
| @staticmethod | |||
| def batch_update_document_status(dataset: Dataset, document_ids: list[str], action: str, user): | |||
| """ | |||
| Batch update document status. | |||
| Args: | |||
| dataset (Dataset): The dataset object | |||
| document_ids (list[str]): List of document IDs to update | |||
| action (str): Action to perform (enable, disable, archive, un_archive) | |||
| user: Current user performing the action | |||
| Raises: | |||
| DocumentIndexingError: If document is being indexed or not in correct state | |||
| """ | |||
| if not document_ids: | |||
| return | |||
| for document_id in document_ids: | |||
| document = DocumentService.get_document(dataset.id, document_id) | |||
| if not document: | |||
| continue | |||
| indexing_cache_key = f"document_{document.id}_indexing" | |||
| cache_result = redis_client.get(indexing_cache_key) | |||
| if cache_result is not None: | |||
| raise DocumentIndexingError(f"Document:{document.name} is being indexed, please try again later") | |||
| if action == "enable": | |||
| if document.enabled: | |||
| continue | |||
| document.enabled = True | |||
| document.disabled_at = None | |||
| document.disabled_by = None | |||
| document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) | |||
| db.session.commit() | |||
| # Set cache to prevent indexing the same document multiple times | |||
| redis_client.setex(indexing_cache_key, 600, 1) | |||
| add_document_to_index_task.delay(document_id) | |||
| elif action == "disable": | |||
| if not document.completed_at or document.indexing_status != "completed": | |||
| raise DocumentIndexingError(f"Document: {document.name} is not completed.") | |||
| if not document.enabled: | |||
| continue | |||
| document.enabled = False | |||
| document.disabled_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) | |||
| document.disabled_by = user.id | |||
| document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) | |||
| db.session.commit() | |||
| # Set cache to prevent indexing the same document multiple times | |||
| redis_client.setex(indexing_cache_key, 600, 1) | |||
| remove_document_from_index_task.delay(document_id) | |||
| elif action == "archive": | |||
| if document.archived: | |||
| continue | |||
| document.archived = True | |||
| document.archived_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) | |||
| document.archived_by = user.id | |||
| document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) | |||
| db.session.commit() | |||
| if document.enabled: | |||
| # Set cache to prevent indexing the same document multiple times | |||
| redis_client.setex(indexing_cache_key, 600, 1) | |||
| remove_document_from_index_task.delay(document_id) | |||
| elif action == "un_archive": | |||
| if not document.archived: | |||
| continue | |||
| document.archived = False | |||
| document.archived_at = None | |||
| document.archived_by = None | |||
| document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) | |||
| db.session.commit() | |||
| # Only re-index if the document is currently enabled | |||
| if document.enabled: | |||
| # Set cache to prevent indexing the same document multiple times | |||
| redis_client.setex(indexing_cache_key, 600, 1) | |||
| add_document_to_index_task.delay(document_id) | |||
| else: | |||
| raise ValueError(f"Invalid action: {action}") | |||
| class SegmentService: | |||
| @classmethod | |||
| @@ -1124,6 +1124,63 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi | |||
| <hr className='ml-0 mr-0' /> | |||
| <Heading | |||
| url='/datasets/{dataset_id}/documents/status/{action}' | |||
| method='PATCH' | |||
| title='Update Document Status' | |||
| name='#batch_document_status' | |||
| /> | |||
| <Row> | |||
| <Col> | |||
| ### Path | |||
| <Properties> | |||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||
| Knowledge ID | |||
| </Property> | |||
| <Property name='action' type='string' key='action'> | |||
| - `enable` - Enable document | |||
| - `disable` - Disable document | |||
| - `archive` - Archive document | |||
| - `un_archive` - Unarchive document | |||
| </Property> | |||
| </Properties> | |||
| ### Request Body | |||
| <Properties> | |||
| <Property name='document_ids' type='array[string]' key='document_ids'> | |||
| List of document IDs | |||
| </Property> | |||
| </Properties> | |||
| </Col> | |||
| <Col sticky> | |||
| <CodeGroup | |||
| title="Request" | |||
| tag="PATCH" | |||
| label="/datasets/{dataset_id}/documents/status/{action}" | |||
| targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{\n "document_ids": ["doc-id-1", "doc-id-2"]\n}'`} | |||
| > | |||
| ```bash {{ title: 'cURL' }} | |||
| curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \ | |||
| --header 'Authorization: Bearer {api_key}' \ | |||
| --header 'Content-Type: application/json' \ | |||
| --data-raw '{ | |||
| "document_ids": ["doc-id-1", "doc-id-2"] | |||
| }' | |||
| ``` | |||
| </CodeGroup> | |||
| <CodeGroup title="Response"> | |||
| ```json {{ title: 'Response' }} | |||
| { | |||
| "result": "success" | |||
| } | |||
| ``` | |||
| </CodeGroup> | |||
| </Col> | |||
| </Row> | |||
| <hr className='ml-0 mr-0' /> | |||
| <Heading | |||
| url='/datasets/{dataset_id}/documents/{document_id}/segments' | |||
| method='POST' | |||
| @@ -881,6 +881,63 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi | |||
| <hr className='ml-0 mr-0' /> | |||
| <Heading | |||
| url='/datasets/{dataset_id}/documents/status/{action}' | |||
| method='PATCH' | |||
| title='ドキュメントステータスの更新' | |||
| name='#batch_document_status' | |||
| /> | |||
| <Row> | |||
| <Col> | |||
| ### パス | |||
| <Properties> | |||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||
| ナレッジ ID | |||
| </Property> | |||
| <Property name='action' type='string' key='action'> | |||
| - `enable` - ドキュメントを有効化 | |||
| - `disable` - ドキュメントを無効化 | |||
| - `archive` - ドキュメントをアーカイブ | |||
| - `un_archive` - ドキュメントのアーカイブを解除 | |||
| </Property> | |||
| </Properties> | |||
| ### リクエストボディ | |||
| <Properties> | |||
| <Property name='document_ids' type='array[string]' key='document_ids'> | |||
| ドキュメントIDのリスト | |||
| </Property> | |||
| </Properties> | |||
| </Col> | |||
| <Col sticky> | |||
| <CodeGroup | |||
| title="リクエスト" | |||
| tag="PATCH" | |||
| label="/datasets/{dataset_id}/documents/status/{action}" | |||
| targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{\n "document_ids": ["doc-id-1", "doc-id-2"]\n}'`} | |||
| > | |||
| ```bash {{ title: 'cURL' }} | |||
| curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \ | |||
| --header 'Authorization: Bearer {api_key}' \ | |||
| --header 'Content-Type: application/json' \ | |||
| --data-raw '{ | |||
| "document_ids": ["doc-id-1", "doc-id-2"] | |||
| }' | |||
| ``` | |||
| </CodeGroup> | |||
| <CodeGroup title="レスポンス"> | |||
| ```json {{ title: 'Response' }} | |||
| { | |||
| "result": "success" | |||
| } | |||
| ``` | |||
| </CodeGroup> | |||
| </Col> | |||
| </Row> | |||
| <hr className='ml-0 mr-0' /> | |||
| <Heading | |||
| url='/datasets/{dataset_id}/documents/{document_id}/segments' | |||
| method='POST' | |||
| @@ -2413,3 +2470,4 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi | |||
| </tbody> | |||
| </table> | |||
| <div className="pb-4" /> | |||
| @@ -1131,6 +1131,63 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi | |||
| <hr className='ml-0 mr-0' /> | |||
| <Heading | |||
| url='/datasets/{dataset_id}/documents/status/{action}' | |||
| method='PATCH' | |||
| title='更新文档状态' | |||
| name='#batch_document_status' | |||
| /> | |||
| <Row> | |||
| <Col> | |||
| ### Path | |||
| <Properties> | |||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||
| 知识库 ID | |||
| </Property> | |||
| <Property name='action' type='string' key='action'> | |||
| - `enable` - 启用文档 | |||
| - `disable` - 禁用文档 | |||
| - `archive` - 归档文档 | |||
| - `un_archive` - 取消归档文档 | |||
| </Property> | |||
| </Properties> | |||
| ### Request Body | |||
| <Properties> | |||
| <Property name='document_ids' type='array[string]' key='document_ids'> | |||
| 文档ID列表 | |||
| </Property> | |||
| </Properties> | |||
| </Col> | |||
| <Col sticky> | |||
| <CodeGroup | |||
| title="Request" | |||
| tag="PATCH" | |||
| label="/datasets/{dataset_id}/documents/status/{action}" | |||
| targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{\n "document_ids": ["doc-id-1", "doc-id-2"]\n}'`} | |||
| > | |||
| ```bash {{ title: 'cURL' }} | |||
| curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \ | |||
| --header 'Authorization: Bearer {api_key}' \ | |||
| --header 'Content-Type: application/json' \ | |||
| --data-raw '{ | |||
| "document_ids": ["doc-id-1", "doc-id-2"] | |||
| }' | |||
| ``` | |||
| </CodeGroup> | |||
| <CodeGroup title="Response"> | |||
| ```json {{ title: 'Response' }} | |||
| { | |||
| "result": "success" | |||
| } | |||
| ``` | |||
| </CodeGroup> | |||
| </Col> | |||
| </Row> | |||
| <hr className='ml-0 mr-0' /> | |||
| <Heading | |||
| url='/datasets/{dataset_id}/documents/{document_id}/segments' | |||
| method='POST' | |||