瀏覽代碼

fix uploading docx for mind map (#2064)

### What problem does this PR solve?

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
tags/v0.10.0
Kevin Hu 1 年之前
父節點
當前提交
89b05ad79f
沒有連結到貢獻者的電子郵件帳戶。
共有 1 個檔案被更改,包括 9 行新增1 行删除
  1. 9
    1
      api/db/services/document_service.py

+ 9
- 1
api/db/services/document_service.py 查看文件

import json import json
import os import os
import random import random
import re
import traceback
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from copy import deepcopy from copy import deepcopy
from datetime import datetime from datetime import datetime
from rag.settings import SVR_QUEUE_NAME from rag.settings import SVR_QUEUE_NAME
from rag.utils.es_conn import ELASTICSEARCH from rag.utils.es_conn import ELASTICSEARCH
from rag.utils.minio_conn import MINIO from rag.utils.minio_conn import MINIO
from rag.nlp import search
from rag.nlp import search, rag_tokenizer


from api.db import FileType, TaskStatus, ParserType, LLMType from api.db import FileType, TaskStatus, ParserType, LLMType
from api.db.db_models import DB, Knowledgebase, Tenant, Task from api.db.db_models import DB, Knowledgebase, Tenant, Task
parser_config = {"chunk_token_num": 4096, "delimiter": "\n!?;。;!?", "layout_recognize": False} parser_config = {"chunk_token_num": 4096, "delimiter": "\n!?;。;!?", "layout_recognize": False}
exe = ThreadPoolExecutor(max_workers=12) exe = ThreadPoolExecutor(max_workers=12)
threads = [] threads = []
doc_nm = {}
for d, blob in files:
doc_nm[d["id"]] = d["name"]
for d, blob in files: for d, blob in files:
kwargs = { kwargs = {
"callback": dummy, "callback": dummy,
"id": get_uuid(), "id": get_uuid(),
"doc_id": doc_id, "doc_id": doc_id,
"kb_id": [kb.id], "kb_id": [kb.id],
"docnm_kwd": doc_nm[doc_id],
"title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", doc_nm[doc_id])),
"content_ltks": "",
"content_with_weight": mind_map, "content_with_weight": mind_map,
"knowledge_graph_kwd": "mind_map" "knowledge_graph_kwd": "mind_map"
}) })

Loading…
取消
儲存