### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)

1 年之前 · a5c03ccd4c
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
    @classmethod
    @DB.connection_context()
    def get_unfinished_docs(cls):
        fields = [cls.model.id, cls.model.process_begin_at, cls.model.parser_config, cls.model.progress_msg]
        fields = [cls.model.id, cls.model.process_begin_at, cls.model.parser_config, cls.model.progress_msg, cls.model.run]
        docs = cls.model.select(*fields) \
            .where(
                cls.model.status == StatusEnum.VALID.value,
                prg = 0
                finished = True
                bad = 0
                status = TaskStatus.RUNNING.value
                status = d["run"]#TaskStatus.RUNNING.value
                for t in tsks:
                    if 0 <= t.progress < 1:
                        finished = False
--- a/conf/llm_factories.json
+++ b/conf/llm_factories.json
                    "max_tokens": 32768,
                    "model_type": "chat"
                },
                {
                    "llm_name": "qwen-max-1201",
                    "tags": "LLM,CHAT,6K",
                    "max_tokens": 5899,
                    "model_type": "chat"
                },
                {
                    "llm_name": "text-embedding-v2",
                    "tags": "TEXT EMBEDDING,2K",
--- a/graphrag/mind_map_prompt.py
+++ b/graphrag/mind_map_prompt.py
   3. If the subject matter is really complex, split them into sub-sections. 
 - Output requirement:
   - Always try to maximize the number of sub-sections. 
   - In language of 
   - MUST IN FORMAT OF MARKDOWN
--- a/rag/app/knowledge_graph.py
+++ b/rag/app/knowledge_graph.py
    eng = lang.lower() == "english"
    parser_config["layout_recognize"] = False
    sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True, callback=callback ,parser_config=parser_config)
    sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
                           parser_config=parser_config, callback=callback)
    chunks = build_knowlege_graph_chunks(tenant_id, sections, callback,
                                         parser_config.get("entity_types", ["organization", "person", "location", "event", "time"])
                                         )
    doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"])
    chunks.extend(tokenize_chunks(sections, doc, eng))
    return chunks
    return chunks