| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129 | 
							- import datetime
 - import uuid
 - 
 - from flask import current_app
 - from flask_restful import reqparse
 - from werkzeug.exceptions import NotFound
 - 
 - import services.dataset_service
 - from controllers.service_api import api
 - from controllers.service_api.app.error import ProviderNotInitializeError
 - from controllers.service_api.dataset.error import ArchivedDocumentImmutableError, DocumentIndexingError, \
 -     DatasetNotInitedError
 - from controllers.service_api.wraps import DatasetApiResource
 - from core.llm.error import ProviderTokenNotInitError
 - from extensions.ext_database import db
 - from extensions.ext_storage import storage
 - from models.model import UploadFile
 - from services.dataset_service import DocumentService
 - 
 - 
 - class DocumentListApi(DatasetApiResource):
 -     """Resource for documents."""
 - 
 -     def post(self, dataset):
 -         """Create document."""
 -         parser = reqparse.RequestParser()
 -         parser.add_argument('name', type=str, required=True, nullable=False, location='json')
 -         parser.add_argument('text', type=str, required=True, nullable=False, location='json')
 -         parser.add_argument('doc_type', type=str, location='json')
 -         parser.add_argument('doc_metadata', type=dict, location='json')
 -         args = parser.parse_args()
 - 
 -         if not dataset.indexing_technique:
 -             raise DatasetNotInitedError("Dataset indexing technique must be set.")
 - 
 -         doc_type = args.get('doc_type')
 -         doc_metadata = args.get('doc_metadata')
 - 
 -         if doc_type and doc_type not in DocumentService.DOCUMENT_METADATA_SCHEMA:
 -             raise ValueError('Invalid doc_type.')
 - 
 -         # user uuid as file name
 -         file_uuid = str(uuid.uuid4())
 -         file_key = 'upload_files/' + dataset.tenant_id + '/' + file_uuid + '.txt'
 - 
 -         # save file to storage
 -         storage.save(file_key, args.get('text'))
 - 
 -         # save file to db
 -         config = current_app.config
 -         upload_file = UploadFile(
 -             tenant_id=dataset.tenant_id,
 -             storage_type=config['STORAGE_TYPE'],
 -             key=file_key,
 -             name=args.get('name') + '.txt',
 -             size=len(args.get('text')),
 -             extension='txt',
 -             mime_type='text/plain',
 -             created_by=dataset.created_by,
 -             created_at=datetime.datetime.utcnow(),
 -             used=True,
 -             used_by=dataset.created_by,
 -             used_at=datetime.datetime.utcnow()
 -         )
 - 
 -         db.session.add(upload_file)
 -         db.session.commit()
 - 
 -         document_data = {
 -             'data_source': {
 -                 'type': 'upload_file',
 -                 'info': upload_file.id
 -             }
 -         }
 - 
 -         try:
 -             document = DocumentService.save_document_with_dataset_id(
 -                 dataset=dataset,
 -                 document_data=document_data,
 -                 account=dataset.created_by_account,
 -                 dataset_process_rule=dataset.latest_process_rule,
 -                 created_from='api'
 -             )
 -         except ProviderTokenNotInitError:
 -             raise ProviderNotInitializeError()
 - 
 -         if doc_type and doc_metadata:
 -             metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[doc_type]
 - 
 -             document.doc_metadata = {}
 - 
 -             for key, value_type in metadata_schema.items():
 -                 value = doc_metadata.get(key)
 -                 if value is not None and isinstance(value, value_type):
 -                     document.doc_metadata[key] = value
 - 
 -             document.doc_type = doc_type
 -             document.updated_at = datetime.datetime.utcnow()
 -             db.session.commit()
 - 
 -         return {'id': document.id}
 - 
 - 
 - class DocumentApi(DatasetApiResource):
 -     def delete(self, dataset, document_id):
 -         """Delete document."""
 -         document_id = str(document_id)
 - 
 -         document = DocumentService.get_document(dataset.id, document_id)
 - 
 -         # 404 if document not found
 -         if document is None:
 -             raise NotFound("Document Not Exists.")
 - 
 -         # 403 if document is archived
 -         if DocumentService.check_archived(document):
 -             raise ArchivedDocumentImmutableError()
 - 
 -         try:
 -             # delete document
 -             DocumentService.delete_document(document)
 -         except services.errors.document.DocumentIndexingError:
 -             raise DocumentIndexingError('Cannot delete document during indexing.')
 - 
 -         return {'result': 'success'}, 204
 - 
 - 
 - api.add_resource(DocumentListApi, '/documents')
 - api.add_resource(DocumentApi, '/documents/<uuid:document_id>')
 
 
  |