浏览代码

fix image files not deleted on indexing_estimate #9541 (#10798)

Co-authored-by: root <root@localhost.localdomain>
tags/0.12.0
wy96f 11 个月前
父节点
当前提交
94c9cadbd8
没有帐户链接到提交者的电子邮件
共有 3 个文件被更改,包括 16 次插入0 次删除
  1. 14
    0
      api/core/indexing_runner.py
  2. 1
    0
      api/tasks/clean_dataset_task.py
  3. 1
    0
      api/tasks/clean_document_task.py

+ 14
- 0
api/core/indexing_runner.py 查看文件

@@ -30,6 +30,7 @@ from core.rag.splitter.fixed_text_splitter import (
)
from core.rag.splitter.text_splitter import TextSplitter
from core.tools.utils.text_processing_utils import remove_leading_symbols
from core.tools.utils.web_reader_tool import get_image_upload_file_ids
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from extensions.ext_storage import storage
@@ -279,6 +280,19 @@ class IndexingRunner:
if len(preview_texts) < 5:
preview_texts.append(document.page_content)

# delete image files and related db records
image_upload_file_ids = get_image_upload_file_ids(document.page_content)
for upload_file_id in image_upload_file_ids:
image_file = db.session.query(UploadFile).filter(UploadFile.id == upload_file_id).first()
try:
storage.delete(image_file.key)
except Exception:
logging.exception(
"Delete image_files failed while indexing_estimate, \
image_upload_file_is: {}".format(upload_file_id)
)
db.session.delete(image_file)

if doc_form and doc_form == "qa_model":
if len(preview_texts) > 0:
# qa model document

+ 1
- 0
api/tasks/clean_dataset_task.py 查看文件

@@ -78,6 +78,7 @@ def clean_dataset_task(
"Delete image_files failed when storage deleted, \
image_upload_file_is: {}".format(upload_file_id)
)
db.session.delete(image_file)
db.session.delete(segment)

db.session.query(DatasetProcessRule).filter(DatasetProcessRule.dataset_id == dataset_id).delete()

+ 1
- 0
api/tasks/clean_document_task.py 查看文件

@@ -51,6 +51,7 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i
"Delete image_files failed when storage deleted, \
image_upload_file_is: {}".format(upload_file_id)
)
db.session.delete(image_file)
db.session.delete(segment)

db.session.commit()

正在加载...
取消
保存