浏览代码

fix uploading docx for mind map (#2064)

### What problem does this PR solve?

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
tags/v0.10.0
Kevin Hu 1年前
父节点
当前提交
89b05ad79f
没有帐户链接到提交者的电子邮件
共有 1 个文件被更改,包括 9 次插入1 次删除
  1. 9
    1
      api/db/services/document_service.py

+ 9
- 1
api/db/services/document_service.py 查看文件

@@ -17,6 +17,8 @@ import hashlib
import json
import os
import random
import re
import traceback
from concurrent.futures import ThreadPoolExecutor
from copy import deepcopy
from datetime import datetime
@@ -33,7 +35,7 @@ from graphrag.mind_map_extractor import MindMapExtractor
from rag.settings import SVR_QUEUE_NAME
from rag.utils.es_conn import ELASTICSEARCH
from rag.utils.minio_conn import MINIO
from rag.nlp import search
from rag.nlp import search, rag_tokenizer

from api.db import FileType, TaskStatus, ParserType, LLMType
from api.db.db_models import DB, Knowledgebase, Tenant, Task
@@ -432,6 +434,9 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
parser_config = {"chunk_token_num": 4096, "delimiter": "\n!?;。;!?", "layout_recognize": False}
exe = ThreadPoolExecutor(max_workers=12)
threads = []
doc_nm = {}
for d, blob in files:
doc_nm[d["id"]] = d["name"]
for d, blob in files:
kwargs = {
"callback": dummy,
@@ -504,6 +509,9 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
"id": get_uuid(),
"doc_id": doc_id,
"kb_id": [kb.id],
"docnm_kwd": doc_nm[doc_id],
"title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", doc_nm[doc_id])),
"content_ltks": "",
"content_with_weight": mind_map,
"knowledge_graph_kwd": "mind_map"
})

正在加载...
取消
保存