| @@ -14,10 +14,20 @@ from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError | |||
| from core.model_manager import ModelManager | |||
| from core.model_runtime.entities.model_entities import ModelType | |||
| from extensions.ext_database import db | |||
| from fields.segment_fields import segment_fields | |||
| from models.dataset import Dataset, DocumentSegment | |||
| from fields.segment_fields import child_chunk_fields, segment_fields | |||
| from models.dataset import Dataset | |||
| from services.dataset_service import DatasetService, DocumentService, SegmentService | |||
| from services.entities.knowledge_entities.knowledge_entities import SegmentUpdateArgs | |||
| from services.errors.chunk import ( | |||
| ChildChunkDeleteIndexError, | |||
| ChildChunkIndexingError, | |||
| ) | |||
| from services.errors.chunk import ( | |||
| ChildChunkDeleteIndexError as ChildChunkDeleteIndexServiceError, | |||
| ) | |||
| from services.errors.chunk import ( | |||
| ChildChunkIndexingError as ChildChunkIndexingServiceError, | |||
| ) | |||
| class SegmentApi(DatasetApiResource): | |||
| @@ -71,7 +81,7 @@ class SegmentApi(DatasetApiResource): | |||
| return {"error": "Segments is required"}, 400 | |||
| def get(self, tenant_id, dataset_id, document_id): | |||
| """Create single segment.""" | |||
| """Get segments.""" | |||
| # check dataset | |||
| dataset_id = str(dataset_id) | |||
| tenant_id = str(tenant_id) | |||
| @@ -110,26 +120,13 @@ class SegmentApi(DatasetApiResource): | |||
| status_list = args["status"] | |||
| keyword = args["keyword"] | |||
| query = DocumentSegment.query.filter( | |||
| DocumentSegment.document_id == str(document_id), DocumentSegment.tenant_id == current_user.current_tenant_id | |||
| segments, total = SegmentService.get_segments( | |||
| document_id=document_id, | |||
| tenant_id=current_user.current_tenant_id, | |||
| status_list=args["status"], | |||
| keyword=args["keyword"], | |||
| ) | |||
| if status_list: | |||
| query = query.filter(DocumentSegment.status.in_(status_list)) | |||
| if keyword: | |||
| query = query.where(DocumentSegment.content.ilike(f"%{keyword}%")) | |||
| total = query.count() | |||
| query = query.order_by(DocumentSegment.position) | |||
| paginated_segments = query.paginate( | |||
| page=page, | |||
| per_page=limit, | |||
| max_per_page=100, | |||
| error_out=False, | |||
| ) | |||
| segments = paginated_segments.items | |||
| response = { | |||
| "data": marshal(segments, segment_fields), | |||
| "doc_form": document.doc_form, | |||
| @@ -158,9 +155,8 @@ class DatasetSegmentApi(DatasetApiResource): | |||
| if not document: | |||
| raise NotFound("Document not found.") | |||
| # check segment | |||
| segment = DocumentSegment.query.filter( | |||
| DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id | |||
| ).first() | |||
| segment_id = str(segment_id) | |||
| segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) | |||
| if not segment: | |||
| raise NotFound("Segment not found.") | |||
| SegmentService.delete_segment(segment, document, dataset) | |||
| @@ -199,9 +195,7 @@ class DatasetSegmentApi(DatasetApiResource): | |||
| raise ProviderNotInitializeError(ex.description) | |||
| # check segment | |||
| segment_id = str(segment_id) | |||
| segment = DocumentSegment.query.filter( | |||
| DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id | |||
| ).first() | |||
| segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) | |||
| if not segment: | |||
| raise NotFound("Segment not found.") | |||
| @@ -210,12 +204,200 @@ class DatasetSegmentApi(DatasetApiResource): | |||
| parser.add_argument("segment", type=dict, required=False, nullable=True, location="json") | |||
| args = parser.parse_args() | |||
| SegmentService.segment_create_args_validate(args["segment"], document) | |||
| segment = SegmentService.update_segment(SegmentUpdateArgs(**args["segment"]), segment, document, dataset) | |||
| return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200 | |||
| updated_segment = SegmentService.update_segment( | |||
| SegmentUpdateArgs(**args["segment"]), segment, document, dataset | |||
| ) | |||
| return {"data": marshal(updated_segment, segment_fields), "doc_form": document.doc_form}, 200 | |||
| class ChildChunkApi(DatasetApiResource): | |||
| """Resource for child chunks.""" | |||
| @cloud_edition_billing_resource_check("vector_space", "dataset") | |||
| @cloud_edition_billing_knowledge_limit_check("add_segment", "dataset") | |||
| def post(self, tenant_id, dataset_id, document_id, segment_id): | |||
| """Create child chunk.""" | |||
| # check dataset | |||
| dataset_id = str(dataset_id) | |||
| tenant_id = str(tenant_id) | |||
| dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() | |||
| if not dataset: | |||
| raise NotFound("Dataset not found.") | |||
| # check document | |||
| document_id = str(document_id) | |||
| document = DocumentService.get_document(dataset.id, document_id) | |||
| if not document: | |||
| raise NotFound("Document not found.") | |||
| # check segment | |||
| segment_id = str(segment_id) | |||
| segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) | |||
| if not segment: | |||
| raise NotFound("Segment not found.") | |||
| # check embedding model setting | |||
| if dataset.indexing_technique == "high_quality": | |||
| try: | |||
| model_manager = ModelManager() | |||
| model_manager.get_model_instance( | |||
| tenant_id=current_user.current_tenant_id, | |||
| provider=dataset.embedding_model_provider, | |||
| model_type=ModelType.TEXT_EMBEDDING, | |||
| model=dataset.embedding_model, | |||
| ) | |||
| except LLMBadRequestError: | |||
| raise ProviderNotInitializeError( | |||
| "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider." | |||
| ) | |||
| except ProviderTokenNotInitError as ex: | |||
| raise ProviderNotInitializeError(ex.description) | |||
| # validate args | |||
| parser = reqparse.RequestParser() | |||
| parser.add_argument("content", type=str, required=True, nullable=False, location="json") | |||
| args = parser.parse_args() | |||
| try: | |||
| child_chunk = SegmentService.create_child_chunk(args.get("content"), segment, document, dataset) | |||
| except ChildChunkIndexingServiceError as e: | |||
| raise ChildChunkIndexingError(str(e)) | |||
| return {"data": marshal(child_chunk, child_chunk_fields)}, 200 | |||
| def get(self, tenant_id, dataset_id, document_id, segment_id): | |||
| """Get child chunks.""" | |||
| # check dataset | |||
| dataset_id = str(dataset_id) | |||
| tenant_id = str(tenant_id) | |||
| dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() | |||
| if not dataset: | |||
| raise NotFound("Dataset not found.") | |||
| # check document | |||
| document_id = str(document_id) | |||
| document = DocumentService.get_document(dataset.id, document_id) | |||
| if not document: | |||
| raise NotFound("Document not found.") | |||
| # check segment | |||
| segment_id = str(segment_id) | |||
| segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) | |||
| if not segment: | |||
| raise NotFound("Segment not found.") | |||
| parser = reqparse.RequestParser() | |||
| parser.add_argument("limit", type=int, default=20, location="args") | |||
| parser.add_argument("keyword", type=str, default=None, location="args") | |||
| parser.add_argument("page", type=int, default=1, location="args") | |||
| args = parser.parse_args() | |||
| page = args["page"] | |||
| limit = min(args["limit"], 100) | |||
| keyword = args["keyword"] | |||
| child_chunks = SegmentService.get_child_chunks(segment_id, document_id, dataset_id, page, limit, keyword) | |||
| return { | |||
| "data": marshal(child_chunks.items, child_chunk_fields), | |||
| "total": child_chunks.total, | |||
| "total_pages": child_chunks.pages, | |||
| "page": page, | |||
| "limit": limit, | |||
| }, 200 | |||
| class DatasetChildChunkApi(DatasetApiResource): | |||
| """Resource for updating child chunks.""" | |||
| @cloud_edition_billing_knowledge_limit_check("add_segment", "dataset") | |||
| def delete(self, tenant_id, dataset_id, document_id, segment_id, child_chunk_id): | |||
| """Delete child chunk.""" | |||
| # check dataset | |||
| dataset_id = str(dataset_id) | |||
| tenant_id = str(tenant_id) | |||
| dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() | |||
| if not dataset: | |||
| raise NotFound("Dataset not found.") | |||
| # check document | |||
| document_id = str(document_id) | |||
| document = DocumentService.get_document(dataset.id, document_id) | |||
| if not document: | |||
| raise NotFound("Document not found.") | |||
| # check segment | |||
| segment_id = str(segment_id) | |||
| segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) | |||
| if not segment: | |||
| raise NotFound("Segment not found.") | |||
| # check child chunk | |||
| child_chunk_id = str(child_chunk_id) | |||
| child_chunk = SegmentService.get_child_chunk_by_id( | |||
| child_chunk_id=child_chunk_id, tenant_id=current_user.current_tenant_id | |||
| ) | |||
| if not child_chunk: | |||
| raise NotFound("Child chunk not found.") | |||
| try: | |||
| SegmentService.delete_child_chunk(child_chunk, dataset) | |||
| except ChildChunkDeleteIndexServiceError as e: | |||
| raise ChildChunkDeleteIndexError(str(e)) | |||
| return {"result": "success"}, 200 | |||
| @cloud_edition_billing_resource_check("vector_space", "dataset") | |||
| @cloud_edition_billing_knowledge_limit_check("add_segment", "dataset") | |||
| def patch(self, tenant_id, dataset_id, document_id, segment_id, child_chunk_id): | |||
| """Update child chunk.""" | |||
| # check dataset | |||
| dataset_id = str(dataset_id) | |||
| tenant_id = str(tenant_id) | |||
| dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() | |||
| if not dataset: | |||
| raise NotFound("Dataset not found.") | |||
| # get document | |||
| document = DocumentService.get_document(dataset_id, document_id) | |||
| if not document: | |||
| raise NotFound("Document not found.") | |||
| # get segment | |||
| segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id) | |||
| if not segment: | |||
| raise NotFound("Segment not found.") | |||
| # get child chunk | |||
| child_chunk = SegmentService.get_child_chunk_by_id( | |||
| child_chunk_id=child_chunk_id, tenant_id=current_user.current_tenant_id | |||
| ) | |||
| if not child_chunk: | |||
| raise NotFound("Child chunk not found.") | |||
| # validate args | |||
| parser = reqparse.RequestParser() | |||
| parser.add_argument("content", type=str, required=True, nullable=False, location="json") | |||
| args = parser.parse_args() | |||
| try: | |||
| child_chunk = SegmentService.update_child_chunk( | |||
| args.get("content"), child_chunk, segment, document, dataset | |||
| ) | |||
| except ChildChunkIndexingServiceError as e: | |||
| raise ChildChunkIndexingError(str(e)) | |||
| return {"data": marshal(child_chunk, child_chunk_fields)}, 200 | |||
| api.add_resource(SegmentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments") | |||
| api.add_resource( | |||
| DatasetSegmentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>" | |||
| ) | |||
| api.add_resource( | |||
| ChildChunkApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>/child_chunks" | |||
| ) | |||
| api.add_resource( | |||
| DatasetChildChunkApi, | |||
| "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>/child_chunks/<uuid:child_chunk_id>", | |||
| ) | |||
| @@ -2140,6 +2140,88 @@ class SegmentService: | |||
| query = query.where(ChildChunk.content.ilike(f"%{keyword}%")) | |||
| return query.paginate(page=page, per_page=limit, max_per_page=100, error_out=False) | |||
| @classmethod | |||
| def get_child_chunk_by_id(cls, child_chunk_id: str, tenant_id: str) -> Optional[ChildChunk]: | |||
| """Get a child chunk by its ID.""" | |||
| result = ChildChunk.query.filter(ChildChunk.id == child_chunk_id, ChildChunk.tenant_id == tenant_id).first() | |||
| return result if isinstance(result, ChildChunk) else None | |||
| @classmethod | |||
| def get_segments( | |||
| cls, document_id: str, tenant_id: str, status_list: list[str] | None = None, keyword: str | None = None | |||
| ): | |||
| """Get segments for a document with optional filtering.""" | |||
| query = DocumentSegment.query.filter( | |||
| DocumentSegment.document_id == document_id, DocumentSegment.tenant_id == tenant_id | |||
| ) | |||
| if status_list: | |||
| query = query.filter(DocumentSegment.status.in_(status_list)) | |||
| if keyword: | |||
| query = query.filter(DocumentSegment.content.ilike(f"%{keyword}%")) | |||
| segments = query.order_by(DocumentSegment.position.asc()).all() | |||
| total = len(segments) | |||
| return segments, total | |||
| @classmethod | |||
| def update_segment_by_id( | |||
| cls, tenant_id: str, dataset_id: str, document_id: str, segment_id: str, segment_data: dict, user_id: str | |||
| ) -> tuple[DocumentSegment, Document]: | |||
| """Update a segment by its ID with validation and checks.""" | |||
| # check dataset | |||
| dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first() | |||
| if not dataset: | |||
| raise NotFound("Dataset not found.") | |||
| # check user's model setting | |||
| DatasetService.check_dataset_model_setting(dataset) | |||
| # check document | |||
| document = DocumentService.get_document(dataset_id, document_id) | |||
| if not document: | |||
| raise NotFound("Document not found.") | |||
| # check embedding model setting if high quality | |||
| if dataset.indexing_technique == "high_quality": | |||
| try: | |||
| model_manager = ModelManager() | |||
| model_manager.get_model_instance( | |||
| tenant_id=user_id, | |||
| provider=dataset.embedding_model_provider, | |||
| model_type=ModelType.TEXT_EMBEDDING, | |||
| model=dataset.embedding_model, | |||
| ) | |||
| except LLMBadRequestError: | |||
| raise ValueError( | |||
| "No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider." | |||
| ) | |||
| except ProviderTokenNotInitError as ex: | |||
| raise ValueError(ex.description) | |||
| # check segment | |||
| segment = DocumentSegment.query.filter( | |||
| DocumentSegment.id == segment_id, DocumentSegment.tenant_id == user_id | |||
| ).first() | |||
| if not segment: | |||
| raise NotFound("Segment not found.") | |||
| # validate and update segment | |||
| cls.segment_create_args_validate(segment_data, document) | |||
| updated_segment = cls.update_segment(SegmentUpdateArgs(**segment_data), segment, document, dataset) | |||
| return updated_segment, document | |||
| @classmethod | |||
| def get_segment_by_id(cls, segment_id: str, tenant_id: str) -> Optional[DocumentSegment]: | |||
| """Get a segment by its ID.""" | |||
| result = DocumentSegment.query.filter( | |||
| DocumentSegment.id == segment_id, DocumentSegment.tenant_id == tenant_id | |||
| ).first() | |||
| return result if isinstance(result, DocumentSegment) else None | |||
| class DatasetCollectionBindingService: | |||
| @classmethod | |||
| @@ -1158,6 +1158,276 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi | |||
| <hr className='ml-0 mr-0' /> | |||
| <Heading | |||
| url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' | |||
| method='POST' | |||
| title='Create Child Chunk' | |||
| name='#create_child_chunk' | |||
| /> | |||
| <Row> | |||
| <Col> | |||
| ### Params | |||
| <Properties> | |||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||
| Knowledge ID | |||
| </Property> | |||
| <Property name='document_id' type='string' key='document_id'> | |||
| Document ID | |||
| </Property> | |||
| <Property name='segment_id' type='string' key='segment_id'> | |||
| Segment ID | |||
| </Property> | |||
| </Properties> | |||
| ### Request Body | |||
| <Properties> | |||
| <Property name='content' type='string' key='content'> | |||
| Child chunk content | |||
| </Property> | |||
| </Properties> | |||
| </Col> | |||
| <Col sticky> | |||
| <CodeGroup | |||
| title="Request" | |||
| tag="POST" | |||
| label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks" | |||
| targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"content": "Child chunk content"}'`} | |||
| > | |||
| ```bash {{ title: 'cURL' }} | |||
| curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' \ | |||
| --header 'Authorization: Bearer {api_key}' \ | |||
| --header 'Content-Type: application/json' \ | |||
| --data-raw '{ | |||
| "content": "Child chunk content" | |||
| }' | |||
| ``` | |||
| </CodeGroup> | |||
| <CodeGroup title="Response"> | |||
| ```json {{ title: 'Response' }} | |||
| { | |||
| "data": { | |||
| "id": "", | |||
| "segment_id": "", | |||
| "content": "Child chunk content", | |||
| "word_count": 25, | |||
| "tokens": 0, | |||
| "index_node_id": "", | |||
| "index_node_hash": "", | |||
| "status": "completed", | |||
| "created_by": "", | |||
| "created_at": 1695312007, | |||
| "indexing_at": 1695312007, | |||
| "completed_at": 1695312007, | |||
| "error": null, | |||
| "stopped_at": null | |||
| } | |||
| } | |||
| ``` | |||
| </CodeGroup> | |||
| </Col> | |||
| </Row> | |||
| <hr className='ml-0 mr-0' /> | |||
| <Heading | |||
| url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' | |||
| method='GET' | |||
| title='Get Child Chunks' | |||
| name='#get_child_chunks' | |||
| /> | |||
| <Row> | |||
| <Col> | |||
| ### Params | |||
| <Properties> | |||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||
| Knowledge ID | |||
| </Property> | |||
| <Property name='document_id' type='string' key='document_id'> | |||
| Document ID | |||
| </Property> | |||
| <Property name='segment_id' type='string' key='segment_id'> | |||
| Segment ID | |||
| </Property> | |||
| </Properties> | |||
| ### Query | |||
| <Properties> | |||
| <Property name='keyword' type='string' key='keyword'> | |||
| Search keyword (optional) | |||
| </Property> | |||
| <Property name='page' type='integer' key='page'> | |||
| Page number (optional, default: 1) | |||
| </Property> | |||
| <Property name='limit' type='integer' key='limit'> | |||
| Items per page (optional, default: 20, max: 100) | |||
| </Property> | |||
| </Properties> | |||
| </Col> | |||
| <Col sticky> | |||
| <CodeGroup | |||
| title="Request" | |||
| tag="GET" | |||
| label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks" | |||
| targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`} | |||
| > | |||
| ```bash {{ title: 'cURL' }} | |||
| curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks?page=1&limit=20' \ | |||
| --header 'Authorization: Bearer {api_key}' | |||
| ``` | |||
| </CodeGroup> | |||
| <CodeGroup title="Response"> | |||
| ```json {{ title: 'Response' }} | |||
| { | |||
| "data": [{ | |||
| "id": "", | |||
| "segment_id": "", | |||
| "content": "Child chunk content", | |||
| "word_count": 25, | |||
| "tokens": 0, | |||
| "index_node_id": "", | |||
| "index_node_hash": "", | |||
| "status": "completed", | |||
| "created_by": "", | |||
| "created_at": 1695312007, | |||
| "indexing_at": 1695312007, | |||
| "completed_at": 1695312007, | |||
| "error": null, | |||
| "stopped_at": null | |||
| }], | |||
| "total": 1, | |||
| "total_pages": 1, | |||
| "page": 1, | |||
| "limit": 20 | |||
| } | |||
| ``` | |||
| </CodeGroup> | |||
| </Col> | |||
| </Row> | |||
| <hr className='ml-0 mr-0' /> | |||
| <Heading | |||
| url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' | |||
| method='DELETE' | |||
| title='Delete Child Chunk' | |||
| name='#delete_child_chunk' | |||
| /> | |||
| <Row> | |||
| <Col> | |||
| ### Params | |||
| <Properties> | |||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||
| Knowledge ID | |||
| </Property> | |||
| <Property name='document_id' type='string' key='document_id'> | |||
| Document ID | |||
| </Property> | |||
| <Property name='segment_id' type='string' key='segment_id'> | |||
| Segment ID | |||
| </Property> | |||
| <Property name='child_chunk_id' type='string' key='child_chunk_id'> | |||
| Child Chunk ID | |||
| </Property> | |||
| </Properties> | |||
| </Col> | |||
| <Col sticky> | |||
| <CodeGroup | |||
| title="Request" | |||
| tag="DELETE" | |||
| label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}" | |||
| targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}'`} | |||
| > | |||
| ```bash {{ title: 'cURL' }} | |||
| curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \ | |||
| --header 'Authorization: Bearer {api_key}' | |||
| ``` | |||
| </CodeGroup> | |||
| <CodeGroup title="Response"> | |||
| ```json {{ title: 'Response' }} | |||
| { | |||
| "result": "success" | |||
| } | |||
| ``` | |||
| </CodeGroup> | |||
| </Col> | |||
| </Row> | |||
| <hr className='ml-0 mr-0' /> | |||
| <Heading | |||
| url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' | |||
| method='PATCH' | |||
| title='Update Child Chunk' | |||
| name='#update_child_chunk' | |||
| /> | |||
| <Row> | |||
| <Col> | |||
| ### Params | |||
| <Properties> | |||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||
| Knowledge ID | |||
| </Property> | |||
| <Property name='document_id' type='string' key='document_id'> | |||
| Document ID | |||
| </Property> | |||
| <Property name='segment_id' type='string' key='segment_id'> | |||
| Segment ID | |||
| </Property> | |||
| <Property name='child_chunk_id' type='string' key='child_chunk_id'> | |||
| Child Chunk ID | |||
| </Property> | |||
| </Properties> | |||
| ### Request Body | |||
| <Properties> | |||
| <Property name='content' type='string' key='content'> | |||
| Child chunk content | |||
| </Property> | |||
| </Properties> | |||
| </Col> | |||
| <Col sticky> | |||
| <CodeGroup | |||
| title="Request" | |||
| tag="PATCH" | |||
| label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}" | |||
| targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"content": "Updated child chunk content"}'`} | |||
| > | |||
| ```bash {{ title: 'cURL' }} | |||
| curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \ | |||
| --header 'Authorization: Bearer {api_key}' \ | |||
| --header 'Content-Type: application/json' \ | |||
| --data-raw '{ | |||
| "content": "Updated child chunk content" | |||
| }' | |||
| ``` | |||
| </CodeGroup> | |||
| <CodeGroup title="Response"> | |||
| ```json {{ title: 'Response' }} | |||
| { | |||
| "data": { | |||
| "id": "", | |||
| "segment_id": "", | |||
| "content": "Updated child chunk content", | |||
| "word_count": 25, | |||
| "tokens": 0, | |||
| "index_node_id": "", | |||
| "index_node_hash": "", | |||
| "status": "completed", | |||
| "created_by": "", | |||
| "created_at": 1695312007, | |||
| "indexing_at": 1695312007, | |||
| "completed_at": 1695312007, | |||
| "error": null, | |||
| "stopped_at": null | |||
| } | |||
| } | |||
| ``` | |||
| </CodeGroup> | |||
| </Col> | |||
| </Row> | |||
| <hr className='ml-0 mr-0' /> | |||
| <Heading | |||
| url='/datasets/{dataset_id}/documents/{document_id}/upload-file' | |||
| method='GET' | |||
| @@ -1704,4 +1974,4 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi | |||
| </tr> | |||
| </tbody> | |||
| </table> | |||
| <div className="pb-4" /> | |||
| <div className="pb-4" /> | |||
| @@ -1159,6 +1159,310 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi | |||
| <hr className='ml-0 mr-0' /> | |||
| <Heading | |||
| url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' | |||
| method='POST' | |||
| title='新增文档子分段' | |||
| name='#create_child_chunk' | |||
| /> | |||
| <Row> | |||
| <Col> | |||
| ### Path | |||
| <Properties> | |||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||
| 知识库 ID | |||
| </Property> | |||
| <Property name='document_id' type='string' key='document_id'> | |||
| 文档 ID | |||
| </Property> | |||
| <Property name='segment_id' type='string' key='segment_id'> | |||
| 分段 ID | |||
| </Property> | |||
| </Properties> | |||
| ### Request Body | |||
| <Properties> | |||
| <Property name='content' type='string' key='content'> | |||
| 子分段内容 | |||
| </Property> | |||
| </Properties> | |||
| </Col> | |||
| <Col sticky> | |||
| <CodeGroup | |||
| title="Request" | |||
| tag="POST" | |||
| label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks" | |||
| targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"content": "子分段内容"}'`} | |||
| > | |||
| ```bash {{ title: 'cURL' }} | |||
| curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' \ | |||
| --header 'Authorization: Bearer {api_key}' \ | |||
| --header 'Content-Type: application/json' \ | |||
| --data-raw '{ | |||
| "content": "子分段内容" | |||
| }' | |||
| ``` | |||
| </CodeGroup> | |||
| <CodeGroup title="Response"> | |||
| ```json {{ title: 'Response' }} | |||
| { | |||
| "data": { | |||
| "id": "", | |||
| "segment_id": "", | |||
| "content": "子分段内容", | |||
| "word_count": 25, | |||
| "tokens": 0, | |||
| "index_node_id": "", | |||
| "index_node_hash": "", | |||
| "status": "completed", | |||
| "created_by": "", | |||
| "created_at": 1695312007, | |||
| "indexing_at": 1695312007, | |||
| "completed_at": 1695312007, | |||
| "error": null, | |||
| "stopped_at": null | |||
| } | |||
| } | |||
| ``` | |||
| </CodeGroup> | |||
| </Col> | |||
| </Row> | |||
| <hr className='ml-0 mr-0' /> | |||
| <Heading | |||
| url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' | |||
| method='GET' | |||
| title='查询文档子分段' | |||
| name='#get_child_chunks' | |||
| /> | |||
| <Row> | |||
| <Col> | |||
| ### Path | |||
| <Properties> | |||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||
| 知识库 ID | |||
| </Property> | |||
| <Property name='document_id' type='string' key='document_id'> | |||
| 文档 ID | |||
| </Property> | |||
| <Property name='segment_id' type='string' key='segment_id'> | |||
| 分段 ID | |||
| </Property> | |||
| </Properties> | |||
| ### Query | |||
| <Properties> | |||
| <Property name='keyword' type='string' key='keyword'> | |||
| 搜索关键词(选填) | |||
| </Property> | |||
| <Property name='page' type='integer' key='page'> | |||
| 页码(选填,默认1) | |||
| </Property> | |||
| <Property name='limit' type='integer' key='limit'> | |||
| 每页数量(选填,默认20,最大100) | |||
| </Property> | |||
| </Properties> | |||
| </Col> | |||
| <Col sticky> | |||
| <CodeGroup | |||
| title="Request" | |||
| tag="GET" | |||
| label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks" | |||
| targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`} | |||
| > | |||
| ```bash {{ title: 'cURL' }} | |||
| curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks?page=1&limit=20' \ | |||
| --header 'Authorization: Bearer {api_key}' | |||
| ``` | |||
| </CodeGroup> | |||
| <CodeGroup title="Response"> | |||
| ```json {{ title: 'Response' }} | |||
| { | |||
| "data": [{ | |||
| "id": "", | |||
| "segment_id": "", | |||
| "content": "子分段内容", | |||
| "word_count": 25, | |||
| "tokens": 0, | |||
| "index_node_id": "", | |||
| "index_node_hash": "", | |||
| "status": "completed", | |||
| "created_by": "", | |||
| "created_at": 1695312007, | |||
| "indexing_at": 1695312007, | |||
| "completed_at": 1695312007, | |||
| "error": null, | |||
| "stopped_at": null | |||
| }], | |||
| "total": 1, | |||
| "total_pages": 1, | |||
| "page": 1, | |||
| "limit": 20 | |||
| } | |||
| ``` | |||
| </CodeGroup> | |||
| </Col> | |||
| </Row> | |||
| <hr className='ml-0 mr-0' /> | |||
| <Heading | |||
| url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' | |||
| method='DELETE' | |||
| title='删除文档子分段' | |||
| name='#delete_child_chunk' | |||
| /> | |||
| <Row> | |||
| <Col> | |||
| ### Path | |||
| <Properties> | |||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||
| 知识库 ID | |||
| </Property> | |||
| <Property name='document_id' type='string' key='document_id'> | |||
| 文档 ID | |||
| </Property> | |||
| <Property name='segment_id' type='string' key='segment_id'> | |||
| 分段 ID | |||
| </Property> | |||
| <Property name='child_chunk_id' type='string' key='child_chunk_id'> | |||
| 子分段 ID | |||
| </Property> | |||
| </Properties> | |||
| </Col> | |||
| <Col sticky> | |||
| <CodeGroup | |||
| title="Request" | |||
| tag="DELETE" | |||
| label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}" | |||
| targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}'`} | |||
| > | |||
| ```bash {{ title: 'cURL' }} | |||
| curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \ | |||
| --header 'Authorization: Bearer {api_key}' | |||
| ``` | |||
| </CodeGroup> | |||
| <CodeGroup title="Response"> | |||
| ```json {{ title: 'Response' }} | |||
| { | |||
| "result": "success" | |||
| } | |||
| ``` | |||
| </CodeGroup> | |||
| </Col> | |||
| </Row> | |||
| <hr className='ml-0 mr-0' /> | |||
| <Row> | |||
| <Col> | |||
| ### 错误信息 | |||
| <Properties> | |||
| <Property name='code' type='string' key='code'> | |||
| 返回的错误代码 | |||
| </Property> | |||
| </Properties> | |||
| <Properties> | |||
| <Property name='status' type='number' key='status'> | |||
| 返回的错误状态 | |||
| </Property> | |||
| </Properties> | |||
| <Properties> | |||
| <Property name='message' type='string' key='message'> | |||
| 返回的错误信息 | |||
| </Property> | |||
| </Properties> | |||
| </Col> | |||
| <Col> | |||
| <CodeGroup title="Example"> | |||
| ```json {{ title: 'Response' }} | |||
| { | |||
| "code": "no_file_uploaded", | |||
| "message": "Please upload your file.", | |||
| "status": 400 | |||
| } | |||
| ``` | |||
| </CodeGroup> | |||
| </Col> | |||
| </Row> | |||
| <hr className='ml-0 mr-0' /> | |||
| <Heading | |||
| url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' | |||
| method='PATCH' | |||
| title='更新文档子分段' | |||
| name='#update_child_chunk' | |||
| /> | |||
| <Row> | |||
| <Col> | |||
| ### Path | |||
| <Properties> | |||
| <Property name='dataset_id' type='string' key='dataset_id'> | |||
| 知识库 ID | |||
| </Property> | |||
| <Property name='document_id' type='string' key='document_id'> | |||
| 文档 ID | |||
| </Property> | |||
| <Property name='segment_id' type='string' key='segment_id'> | |||
| 分段 ID | |||
| </Property> | |||
| <Property name='child_chunk_id' type='string' key='child_chunk_id'> | |||
| 子分段 ID | |||
| </Property> | |||
| </Properties> | |||
| ### Request Body | |||
| <Properties> | |||
| <Property name='content' type='string' key='content'> | |||
| 子分段内容 | |||
| </Property> | |||
| </Properties> | |||
| </Col> | |||
| <Col sticky> | |||
| <CodeGroup | |||
| title="Request" | |||
| tag="PATCH" | |||
| label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}" | |||
| targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"content": "更新的子分段内容"}'`} | |||
| > | |||
| ```bash {{ title: 'cURL' }} | |||
| curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \ | |||
| --header 'Authorization: Bearer {api_key}' \ | |||
| --header 'Content-Type: application/json' \ | |||
| --data-raw '{ | |||
| "content": "更新的子分段内容" | |||
| }' | |||
| ``` | |||
| </CodeGroup> | |||
| <CodeGroup title="Response"> | |||
| ```json {{ title: 'Response' }} | |||
| { | |||
| "data": { | |||
| "id": "", | |||
| "segment_id": "", | |||
| "content": "更新的子分段内容", | |||
| "word_count": 25, | |||
| "tokens": 0, | |||
| "index_node_id": "", | |||
| "index_node_hash": "", | |||
| "status": "completed", | |||
| "created_by": "", | |||
| "created_at": 1695312007, | |||
| "indexing_at": 1695312007, | |||
| "completed_at": 1695312007, | |||
| "error": null, | |||
| "stopped_at": null | |||
| } | |||
| } | |||
| ``` | |||
| </CodeGroup> | |||
| </Col> | |||
| </Row> | |||
| <hr className='ml-0 mr-0' /> | |||
| <Heading | |||
| url='/datasets/{dataset_id}/documents/{document_id}/upload-file' | |||
| method='GET' | |||