| from core.model_manager import ModelManager | from core.model_manager import ModelManager | ||||
| from core.model_runtime.entities.model_entities import ModelType | from core.model_runtime.entities.model_entities import ModelType | ||||
| from extensions.ext_database import db | from extensions.ext_database import db | ||||
| from fields.segment_fields import segment_fields | |||||
| from models.dataset import Dataset, DocumentSegment | |||||
| from fields.segment_fields import child_chunk_fields, segment_fields | |||||
| from models.dataset import Dataset | |||||
| from services.dataset_service import DatasetService, DocumentService, SegmentService | from services.dataset_service import DatasetService, DocumentService, SegmentService | ||||
| from services.entities.knowledge_entities.knowledge_entities import SegmentUpdateArgs | from services.entities.knowledge_entities.knowledge_entities import SegmentUpdateArgs | ||||
| from services.errors.chunk import ( | |||||
| ChildChunkDeleteIndexError, | |||||
| ChildChunkIndexingError, | |||||
| ) | |||||
| from services.errors.chunk import ( | |||||
| ChildChunkDeleteIndexError as ChildChunkDeleteIndexServiceError, | |||||
| ) | |||||
| from services.errors.chunk import ( | |||||
| ChildChunkIndexingError as ChildChunkIndexingServiceError, | |||||
| ) | |||||
| class SegmentApi(DatasetApiResource): | class SegmentApi(DatasetApiResource): | ||||
| return {"error": "Segments is required"}, 400 | return {"error": "Segments is required"}, 400 | ||||
| def get(self, tenant_id, dataset_id, document_id): | def get(self, tenant_id, dataset_id, document_id): | ||||
| """Create single segment.""" | |||||
| """Get segments.""" | |||||
| # check dataset | # check dataset | ||||
| dataset_id = str(dataset_id) | dataset_id = str(dataset_id) | ||||
| tenant_id = str(tenant_id) | tenant_id = str(tenant_id) | ||||
| status_list = args["status"] | status_list = args["status"] | ||||
| keyword = args["keyword"] | keyword = args["keyword"] | ||||
| query = DocumentSegment.query.filter( | |||||
| DocumentSegment.document_id == str(document_id), DocumentSegment.tenant_id == current_user.current_tenant_id | |||||
| segments, total = SegmentService.get_segments( | |||||
| document_id=document_id, | |||||
| tenant_id=current_user.current_tenant_id, | |||||
| status_list=args["status"], | |||||
| keyword=args["keyword"], | |||||
| ) | ) | ||||
| if status_list: | |||||
| query = query.filter(DocumentSegment.status.in_(status_list)) | |||||
| if keyword: | |||||
| query = query.where(DocumentSegment.content.ilike(f"%{keyword}%")) | |||||
| total = query.count() | |||||
| query = query.order_by(DocumentSegment.position) | |||||
| paginated_segments = query.paginate( | |||||
| page=page, | |||||
| per_page=limit, | |||||
| max_per_page=100, | |||||
| error_out=False, | |||||
| ) | |||||
| segments = paginated_segments.items | |||||
| response = { | response = { | ||||
| "data": marshal(segments, segment_fields), | "data": marshal(segments, segment_fields), | ||||
| "doc_form": document.doc_form, | "doc_form": document.doc_form, | ||||
| if not document: | if not document: | ||||
| raise NotFound("Document not found.") | raise NotFound("Document not found.") | ||||
| # check segment | # check segment | ||||
| segment = DocumentSegment.query.filter( | |||||
| DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id | |||||
| ).first() | |||||
| segment_id = str(segment_id) | |||||
| segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) | |||||
| if not segment: | if not segment: | ||||
| raise NotFound("Segment not found.") | raise NotFound("Segment not found.") | ||||
| SegmentService.delete_segment(segment, document, dataset) | SegmentService.delete_segment(segment, document, dataset) | ||||
| raise ProviderNotInitializeError(ex.description) | raise ProviderNotInitializeError(ex.description) | ||||
| # check segment | # check segment | ||||
| segment_id = str(segment_id) | segment_id = str(segment_id) | ||||
| segment = DocumentSegment.query.filter( | |||||
| DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id | |||||
| ).first() | |||||
| segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) | |||||
| if not segment: | if not segment: | ||||
| raise NotFound("Segment not found.") | raise NotFound("Segment not found.") | ||||
| parser.add_argument("segment", type=dict, required=False, nullable=True, location="json") | parser.add_argument("segment", type=dict, required=False, nullable=True, location="json") | ||||
| args = parser.parse_args() | args = parser.parse_args() | ||||
| SegmentService.segment_create_args_validate(args["segment"], document) | |||||
| segment = SegmentService.update_segment(SegmentUpdateArgs(**args["segment"]), segment, document, dataset) | |||||
| return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200 | |||||
| updated_segment = SegmentService.update_segment( | |||||
| SegmentUpdateArgs(**args["segment"]), segment, document, dataset | |||||
| ) | |||||
| return {"data": marshal(updated_segment, segment_fields), "doc_form": document.doc_form}, 200 | |||||
| class ChildChunkApi(DatasetApiResource): | |||||
| """Resource for child chunks.""" | |||||
| @cloud_edition_billing_resource_check("vector_space", "dataset") | |||||
| @cloud_edition_billing_knowledge_limit_check("add_segment", "dataset") | |||||
| def post(self, tenant_id, dataset_id, document_id, segment_id): | |||||
| """Create child chunk.""" | |||||
| # check dataset | |||||
| dataset_id = str(dataset_id) | |||||
| tenant_id = str(tenant_id) | |||||
| dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() | |||||
| if not dataset: | |||||
| raise NotFound("Dataset not found.") | |||||
| # check document | |||||
| document_id = str(document_id) | |||||
| document = DocumentService.get_document(dataset.id, document_id) | |||||
| if not document: | |||||
| raise NotFound("Document not found.") | |||||
| # check segment | |||||
| segment_id = str(segment_id) | |||||
| segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) | |||||
| if not segment: | |||||
| raise NotFound("Segment not found.") | |||||
| # check embedding model setting | |||||
| if dataset.indexing_technique == "high_quality": | |||||
| try: | |||||
| model_manager = ModelManager() | |||||
| model_manager.get_model_instance( | |||||
| tenant_id=current_user.current_tenant_id, | |||||
| provider=dataset.embedding_model_provider, | |||||
| model_type=ModelType.TEXT_EMBEDDING, | |||||
| model=dataset.embedding_model, | |||||
| ) | |||||
| except LLMBadRequestError: | |||||
| raise ProviderNotInitializeError( | |||||
| "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider." | |||||
| ) | |||||
| except ProviderTokenNotInitError as ex: | |||||
| raise ProviderNotInitializeError(ex.description) | |||||
| # validate args | |||||
| parser = reqparse.RequestParser() | |||||
| parser.add_argument("content", type=str, required=True, nullable=False, location="json") | |||||
| args = parser.parse_args() | |||||
| try: | |||||
| child_chunk = SegmentService.create_child_chunk(args.get("content"), segment, document, dataset) | |||||
| except ChildChunkIndexingServiceError as e: | |||||
| raise ChildChunkIndexingError(str(e)) | |||||
| return {"data": marshal(child_chunk, child_chunk_fields)}, 200 | |||||
| def get(self, tenant_id, dataset_id, document_id, segment_id): | |||||
| """Get child chunks.""" | |||||
| # check dataset | |||||
| dataset_id = str(dataset_id) | |||||
| tenant_id = str(tenant_id) | |||||
| dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() | |||||
| if not dataset: | |||||
| raise NotFound("Dataset not found.") | |||||
| # check document | |||||
| document_id = str(document_id) | |||||
| document = DocumentService.get_document(dataset.id, document_id) | |||||
| if not document: | |||||
| raise NotFound("Document not found.") | |||||
| # check segment | |||||
| segment_id = str(segment_id) | |||||
| segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) | |||||
| if not segment: | |||||
| raise NotFound("Segment not found.") | |||||
| parser = reqparse.RequestParser() | |||||
| parser.add_argument("limit", type=int, default=20, location="args") | |||||
| parser.add_argument("keyword", type=str, default=None, location="args") | |||||
| parser.add_argument("page", type=int, default=1, location="args") | |||||
| args = parser.parse_args() | |||||
| page = args["page"] | |||||
| limit = min(args["limit"], 100) | |||||
| keyword = args["keyword"] | |||||
| child_chunks = SegmentService.get_child_chunks(segment_id, document_id, dataset_id, page, limit, keyword) | |||||
| return { | |||||
| "data": marshal(child_chunks.items, child_chunk_fields), | |||||
| "total": child_chunks.total, | |||||
| "total_pages": child_chunks.pages, | |||||
| "page": page, | |||||
| "limit": limit, | |||||
| }, 200 | |||||
| class DatasetChildChunkApi(DatasetApiResource): | |||||
| """Resource for updating child chunks.""" | |||||
| @cloud_edition_billing_knowledge_limit_check("add_segment", "dataset") | |||||
| def delete(self, tenant_id, dataset_id, document_id, segment_id, child_chunk_id): | |||||
| """Delete child chunk.""" | |||||
| # check dataset | |||||
| dataset_id = str(dataset_id) | |||||
| tenant_id = str(tenant_id) | |||||
| dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() | |||||
| if not dataset: | |||||
| raise NotFound("Dataset not found.") | |||||
| # check document | |||||
| document_id = str(document_id) | |||||
| document = DocumentService.get_document(dataset.id, document_id) | |||||
| if not document: | |||||
| raise NotFound("Document not found.") | |||||
| # check segment | |||||
| segment_id = str(segment_id) | |||||
| segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) | |||||
| if not segment: | |||||
| raise NotFound("Segment not found.") | |||||
| # check child chunk | |||||
| child_chunk_id = str(child_chunk_id) | |||||
| child_chunk = SegmentService.get_child_chunk_by_id( | |||||
| child_chunk_id=child_chunk_id, tenant_id=current_user.current_tenant_id | |||||
| ) | |||||
| if not child_chunk: | |||||
| raise NotFound("Child chunk not found.") | |||||
| try: | |||||
| SegmentService.delete_child_chunk(child_chunk, dataset) | |||||
| except ChildChunkDeleteIndexServiceError as e: | |||||
| raise ChildChunkDeleteIndexError(str(e)) | |||||
| return {"result": "success"}, 200 | |||||
| @cloud_edition_billing_resource_check("vector_space", "dataset") | |||||
| @cloud_edition_billing_knowledge_limit_check("add_segment", "dataset") | |||||
| def patch(self, tenant_id, dataset_id, document_id, segment_id, child_chunk_id): | |||||
| """Update child chunk.""" | |||||
| # check dataset | |||||
| dataset_id = str(dataset_id) | |||||
| tenant_id = str(tenant_id) | |||||
| dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() | |||||
| if not dataset: | |||||
| raise NotFound("Dataset not found.") | |||||
| # get document | |||||
| document = DocumentService.get_document(dataset_id, document_id) | |||||
| if not document: | |||||
| raise NotFound("Document not found.") | |||||
| # get segment | |||||
| segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) | |||||
| if not segment: | |||||
| raise NotFound("Segment not found.") | |||||
| # get child chunk | |||||
| child_chunk = SegmentService.get_child_chunk_by_id( | |||||
| child_chunk_id=child_chunk_id, tenant_id=current_user.current_tenant_id | |||||
| ) | |||||
| if not child_chunk: | |||||
| raise NotFound("Child chunk not found.") | |||||
| # validate args | |||||
| parser = reqparse.RequestParser() | |||||
| parser.add_argument("content", type=str, required=True, nullable=False, location="json") | |||||
| args = parser.parse_args() | |||||
| try: | |||||
| child_chunk = SegmentService.update_child_chunk( | |||||
| args.get("content"), child_chunk, segment, document, dataset | |||||
| ) | |||||
| except ChildChunkIndexingServiceError as e: | |||||
| raise ChildChunkIndexingError(str(e)) | |||||
| return {"data": marshal(child_chunk, child_chunk_fields)}, 200 | |||||
| api.add_resource(SegmentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments") | api.add_resource(SegmentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments") | ||||
| api.add_resource( | api.add_resource( | ||||
| DatasetSegmentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>" | DatasetSegmentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>" | ||||
| ) | ) | ||||
| api.add_resource( | |||||
| ChildChunkApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>/child_chunks" | |||||
| ) | |||||
| api.add_resource( | |||||
| DatasetChildChunkApi, | |||||
| "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>/child_chunks/<uuid:child_chunk_id>", | |||||
| ) |
| query = query.where(ChildChunk.content.ilike(f"%{keyword}%")) | query = query.where(ChildChunk.content.ilike(f"%{keyword}%")) | ||||
| return query.paginate(page=page, per_page=limit, max_per_page=100, error_out=False) | return query.paginate(page=page, per_page=limit, max_per_page=100, error_out=False) | ||||
| @classmethod | |||||
| def get_child_chunk_by_id(cls, child_chunk_id: str, tenant_id: str) -> Optional[ChildChunk]: | |||||
| """Get a child chunk by its ID.""" | |||||
| result = ChildChunk.query.filter(ChildChunk.id == child_chunk_id, ChildChunk.tenant_id == tenant_id).first() | |||||
| return result if isinstance(result, ChildChunk) else None | |||||
| @classmethod | |||||
| def get_segments( | |||||
| cls, document_id: str, tenant_id: str, status_list: list[str] | None = None, keyword: str | None = None | |||||
| ): | |||||
| """Get segments for a document with optional filtering.""" | |||||
| query = DocumentSegment.query.filter( | |||||
| DocumentSegment.document_id == document_id, DocumentSegment.tenant_id == tenant_id | |||||
| ) | |||||
| if status_list: | |||||
| query = query.filter(DocumentSegment.status.in_(status_list)) | |||||
| if keyword: | |||||
| query = query.filter(DocumentSegment.content.ilike(f"%{keyword}%")) | |||||
| segments = query.order_by(DocumentSegment.position.asc()).all() | |||||
| total = len(segments) | |||||
| return segments, total | |||||
| @classmethod | |||||
| def update_segment_by_id( | |||||
| cls, tenant_id: str, dataset_id: str, document_id: str, segment_id: str, segment_data: dict, user_id: str | |||||
| ) -> tuple[DocumentSegment, Document]: | |||||
| """Update a segment by its ID with validation and checks.""" | |||||
| # check dataset | |||||
| dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() | |||||
| if not dataset: | |||||
| raise NotFound("Dataset not found.") | |||||
| # check user's model setting | |||||
| DatasetService.check_dataset_model_setting(dataset) | |||||
| # check document | |||||
| document = DocumentService.get_document(dataset_id, document_id) | |||||
| if not document: | |||||
| raise NotFound("Document not found.") | |||||
| # check embedding model setting if high quality | |||||
| if dataset.indexing_technique == "high_quality": | |||||
| try: | |||||
| model_manager = ModelManager() | |||||
| model_manager.get_model_instance( | |||||
| tenant_id=user_id, | |||||
| provider=dataset.embedding_model_provider, | |||||
| model_type=ModelType.TEXT_EMBEDDING, | |||||
| model=dataset.embedding_model, | |||||
| ) | |||||
| except LLMBadRequestError: | |||||
| raise ValueError( | |||||
| "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider." | |||||
| ) | |||||
| except ProviderTokenNotInitError as ex: | |||||
| raise ValueError(ex.description) | |||||
| # check segment | |||||
| segment = DocumentSegment.query.filter( | |||||
| DocumentSegment.id == segment_id, DocumentSegment.tenant_id == user_id | |||||
| ).first() | |||||
| if not segment: | |||||
| raise NotFound("Segment not found.") | |||||
| # validate and update segment | |||||
| cls.segment_create_args_validate(segment_data, document) | |||||
| updated_segment = cls.update_segment(SegmentUpdateArgs(**segment_data), segment, document, dataset) | |||||
| return updated_segment, document | |||||
| @classmethod | |||||
| def get_segment_by_id(cls, segment_id: str, tenant_id: str) -> Optional[DocumentSegment]: | |||||
| """Get a segment by its ID.""" | |||||
| result = DocumentSegment.query.filter( | |||||
| DocumentSegment.id == segment_id, DocumentSegment.tenant_id == tenant_id | |||||
| ).first() | |||||
| return result if isinstance(result, DocumentSegment) else None | |||||
| class DatasetCollectionBindingService: | class DatasetCollectionBindingService: | ||||
| @classmethod | @classmethod |
| <hr className='ml-0 mr-0' /> | <hr className='ml-0 mr-0' /> | ||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' | |||||
| method='POST' | |||||
| title='Create Child Chunk' | |||||
| name='#create_child_chunk' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| Knowledge ID | |||||
| </Property> | |||||
| <Property name='document_id' type='string' key='document_id'> | |||||
| Document ID | |||||
| </Property> | |||||
| <Property name='segment_id' type='string' key='segment_id'> | |||||
| Segment ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Request Body | |||||
| <Properties> | |||||
| <Property name='content' type='string' key='content'> | |||||
| Child chunk content | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="POST" | |||||
| label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks" | |||||
| targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"content": "Child chunk content"}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --data-raw '{ | |||||
| "content": "Child chunk content" | |||||
| }' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "data": { | |||||
| "id": "", | |||||
| "segment_id": "", | |||||
| "content": "Child chunk content", | |||||
| "word_count": 25, | |||||
| "tokens": 0, | |||||
| "index_node_id": "", | |||||
| "index_node_hash": "", | |||||
| "status": "completed", | |||||
| "created_by": "", | |||||
| "created_at": 1695312007, | |||||
| "indexing_at": 1695312007, | |||||
| "completed_at": 1695312007, | |||||
| "error": null, | |||||
| "stopped_at": null | |||||
| } | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| <hr className='ml-0 mr-0' /> | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' | |||||
| method='GET' | |||||
| title='Get Child Chunks' | |||||
| name='#get_child_chunks' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| Knowledge ID | |||||
| </Property> | |||||
| <Property name='document_id' type='string' key='document_id'> | |||||
| Document ID | |||||
| </Property> | |||||
| <Property name='segment_id' type='string' key='segment_id'> | |||||
| Segment ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Query | |||||
| <Properties> | |||||
| <Property name='keyword' type='string' key='keyword'> | |||||
| Search keyword (optional) | |||||
| </Property> | |||||
| <Property name='page' type='integer' key='page'> | |||||
| Page number (optional, default: 1) | |||||
| </Property> | |||||
| <Property name='limit' type='integer' key='limit'> | |||||
| Items per page (optional, default: 20, max: 100) | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="GET" | |||||
| label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks" | |||||
| targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks?page=1&limit=20' \ | |||||
| --header 'Authorization: Bearer {api_key}' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "data": [{ | |||||
| "id": "", | |||||
| "segment_id": "", | |||||
| "content": "Child chunk content", | |||||
| "word_count": 25, | |||||
| "tokens": 0, | |||||
| "index_node_id": "", | |||||
| "index_node_hash": "", | |||||
| "status": "completed", | |||||
| "created_by": "", | |||||
| "created_at": 1695312007, | |||||
| "indexing_at": 1695312007, | |||||
| "completed_at": 1695312007, | |||||
| "error": null, | |||||
| "stopped_at": null | |||||
| }], | |||||
| "total": 1, | |||||
| "total_pages": 1, | |||||
| "page": 1, | |||||
| "limit": 20 | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| <hr className='ml-0 mr-0' /> | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' | |||||
| method='DELETE' | |||||
| title='Delete Child Chunk' | |||||
| name='#delete_child_chunk' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| Knowledge ID | |||||
| </Property> | |||||
| <Property name='document_id' type='string' key='document_id'> | |||||
| Document ID | |||||
| </Property> | |||||
| <Property name='segment_id' type='string' key='segment_id'> | |||||
| Segment ID | |||||
| </Property> | |||||
| <Property name='child_chunk_id' type='string' key='child_chunk_id'> | |||||
| Child Chunk ID | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="DELETE" | |||||
| label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}" | |||||
| targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \ | |||||
| --header 'Authorization: Bearer {api_key}' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "result": "success" | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| <hr className='ml-0 mr-0' /> | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' | |||||
| method='PATCH' | |||||
| title='Update Child Chunk' | |||||
| name='#update_child_chunk' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Params | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| Knowledge ID | |||||
| </Property> | |||||
| <Property name='document_id' type='string' key='document_id'> | |||||
| Document ID | |||||
| </Property> | |||||
| <Property name='segment_id' type='string' key='segment_id'> | |||||
| Segment ID | |||||
| </Property> | |||||
| <Property name='child_chunk_id' type='string' key='child_chunk_id'> | |||||
| Child Chunk ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Request Body | |||||
| <Properties> | |||||
| <Property name='content' type='string' key='content'> | |||||
| Child chunk content | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="PATCH" | |||||
| label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}" | |||||
| targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"content": "Updated child chunk content"}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --data-raw '{ | |||||
| "content": "Updated child chunk content" | |||||
| }' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "data": { | |||||
| "id": "", | |||||
| "segment_id": "", | |||||
| "content": "Updated child chunk content", | |||||
| "word_count": 25, | |||||
| "tokens": 0, | |||||
| "index_node_id": "", | |||||
| "index_node_hash": "", | |||||
| "status": "completed", | |||||
| "created_by": "", | |||||
| "created_at": 1695312007, | |||||
| "indexing_at": 1695312007, | |||||
| "completed_at": 1695312007, | |||||
| "error": null, | |||||
| "stopped_at": null | |||||
| } | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| <hr className='ml-0 mr-0' /> | |||||
| <Heading | <Heading | ||||
| url='/datasets/{dataset_id}/documents/{document_id}/upload-file' | url='/datasets/{dataset_id}/documents/{document_id}/upload-file' | ||||
| method='GET' | method='GET' | ||||
| </tr> | </tr> | ||||
| </tbody> | </tbody> | ||||
| </table> | </table> | ||||
| <div className="pb-4" /> | |||||
| <div className="pb-4" /> |
| <hr className='ml-0 mr-0' /> | <hr className='ml-0 mr-0' /> | ||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' | |||||
| method='POST' | |||||
| title='新增文档子分段' | |||||
| name='#create_child_chunk' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Path | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| 知识库 ID | |||||
| </Property> | |||||
| <Property name='document_id' type='string' key='document_id'> | |||||
| 文档 ID | |||||
| </Property> | |||||
| <Property name='segment_id' type='string' key='segment_id'> | |||||
| 分段 ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Request Body | |||||
| <Properties> | |||||
| <Property name='content' type='string' key='content'> | |||||
| 子分段内容 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="POST" | |||||
| label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks" | |||||
| targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"content": "子分段内容"}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --data-raw '{ | |||||
| "content": "子分段内容" | |||||
| }' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "data": { | |||||
| "id": "", | |||||
| "segment_id": "", | |||||
| "content": "子分段内容", | |||||
| "word_count": 25, | |||||
| "tokens": 0, | |||||
| "index_node_id": "", | |||||
| "index_node_hash": "", | |||||
| "status": "completed", | |||||
| "created_by": "", | |||||
| "created_at": 1695312007, | |||||
| "indexing_at": 1695312007, | |||||
| "completed_at": 1695312007, | |||||
| "error": null, | |||||
| "stopped_at": null | |||||
| } | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| <hr className='ml-0 mr-0' /> | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' | |||||
| method='GET' | |||||
| title='查询文档子分段' | |||||
| name='#get_child_chunks' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Path | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| 知识库 ID | |||||
| </Property> | |||||
| <Property name='document_id' type='string' key='document_id'> | |||||
| 文档 ID | |||||
| </Property> | |||||
| <Property name='segment_id' type='string' key='segment_id'> | |||||
| 分段 ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Query | |||||
| <Properties> | |||||
| <Property name='keyword' type='string' key='keyword'> | |||||
| 搜索关键词(选填) | |||||
| </Property> | |||||
| <Property name='page' type='integer' key='page'> | |||||
| 页码(选填,默认1) | |||||
| </Property> | |||||
| <Property name='limit' type='integer' key='limit'> | |||||
| 每页数量(选填,默认20,最大100) | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="GET" | |||||
| label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks" | |||||
| targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks?page=1&limit=20' \ | |||||
| --header 'Authorization: Bearer {api_key}' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "data": [{ | |||||
| "id": "", | |||||
| "segment_id": "", | |||||
| "content": "子分段内容", | |||||
| "word_count": 25, | |||||
| "tokens": 0, | |||||
| "index_node_id": "", | |||||
| "index_node_hash": "", | |||||
| "status": "completed", | |||||
| "created_by": "", | |||||
| "created_at": 1695312007, | |||||
| "indexing_at": 1695312007, | |||||
| "completed_at": 1695312007, | |||||
| "error": null, | |||||
| "stopped_at": null | |||||
| }], | |||||
| "total": 1, | |||||
| "total_pages": 1, | |||||
| "page": 1, | |||||
| "limit": 20 | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| <hr className='ml-0 mr-0' /> | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' | |||||
| method='DELETE' | |||||
| title='删除文档子分段' | |||||
| name='#delete_child_chunk' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Path | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| 知识库 ID | |||||
| </Property> | |||||
| <Property name='document_id' type='string' key='document_id'> | |||||
| 文档 ID | |||||
| </Property> | |||||
| <Property name='segment_id' type='string' key='segment_id'> | |||||
| 分段 ID | |||||
| </Property> | |||||
| <Property name='child_chunk_id' type='string' key='child_chunk_id'> | |||||
| 子分段 ID | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="DELETE" | |||||
| label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}" | |||||
| targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \ | |||||
| --header 'Authorization: Bearer {api_key}' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "result": "success" | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| <hr className='ml-0 mr-0' /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### 错误信息 | |||||
| <Properties> | |||||
| <Property name='code' type='string' key='code'> | |||||
| 返回的错误代码 | |||||
| </Property> | |||||
| </Properties> | |||||
| <Properties> | |||||
| <Property name='status' type='number' key='status'> | |||||
| 返回的错误状态 | |||||
| </Property> | |||||
| </Properties> | |||||
| <Properties> | |||||
| <Property name='message' type='string' key='message'> | |||||
| 返回的错误信息 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col> | |||||
| <CodeGroup title="Example"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "code": "no_file_uploaded", | |||||
| "message": "Please upload your file.", | |||||
| "status": 400 | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| <hr className='ml-0 mr-0' /> | |||||
| <Heading | |||||
| url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' | |||||
| method='PATCH' | |||||
| title='更新文档子分段' | |||||
| name='#update_child_chunk' | |||||
| /> | |||||
| <Row> | |||||
| <Col> | |||||
| ### Path | |||||
| <Properties> | |||||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||||
| 知识库 ID | |||||
| </Property> | |||||
| <Property name='document_id' type='string' key='document_id'> | |||||
| 文档 ID | |||||
| </Property> | |||||
| <Property name='segment_id' type='string' key='segment_id'> | |||||
| 分段 ID | |||||
| </Property> | |||||
| <Property name='child_chunk_id' type='string' key='child_chunk_id'> | |||||
| 子分段 ID | |||||
| </Property> | |||||
| </Properties> | |||||
| ### Request Body | |||||
| <Properties> | |||||
| <Property name='content' type='string' key='content'> | |||||
| 子分段内容 | |||||
| </Property> | |||||
| </Properties> | |||||
| </Col> | |||||
| <Col sticky> | |||||
| <CodeGroup | |||||
| title="Request" | |||||
| tag="PATCH" | |||||
| label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}" | |||||
| targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"content": "更新的子分段内容"}'`} | |||||
| > | |||||
| ```bash {{ title: 'cURL' }} | |||||
| curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \ | |||||
| --header 'Authorization: Bearer {api_key}' \ | |||||
| --header 'Content-Type: application/json' \ | |||||
| --data-raw '{ | |||||
| "content": "更新的子分段内容" | |||||
| }' | |||||
| ``` | |||||
| </CodeGroup> | |||||
| <CodeGroup title="Response"> | |||||
| ```json {{ title: 'Response' }} | |||||
| { | |||||
| "data": { | |||||
| "id": "", | |||||
| "segment_id": "", | |||||
| "content": "更新的子分段内容", | |||||
| "word_count": 25, | |||||
| "tokens": 0, | |||||
| "index_node_id": "", | |||||
| "index_node_hash": "", | |||||
| "status": "completed", | |||||
| "created_by": "", | |||||
| "created_at": 1695312007, | |||||
| "indexing_at": 1695312007, | |||||
| "completed_at": 1695312007, | |||||
| "error": null, | |||||
| "stopped_at": null | |||||
| } | |||||
| } | |||||
| ``` | |||||
| </CodeGroup> | |||||
| </Col> | |||||
| </Row> | |||||
| <hr className='ml-0 mr-0' /> | |||||
| <Heading | <Heading | ||||
| url='/datasets/{dataset_id}/documents/{document_id}/upload-file' | url='/datasets/{dataset_id}/documents/{document_id}/upload-file' | ||||
| method='GET' | method='GET' |