### What problem does this PR solve? Close #8943 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)

3 个月前 · 0b487dee43
--- a/api/apps/sdk/doc.py
+++ b/api/apps/sdk/doc.py
@@ -38,7 +38,7 @@ from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_
 from rag.app.qa import beAdoc, rmPrefix
 from rag.app.tag import label_question
 from rag.nlp import rag_tokenizer, search
 from rag.prompts import keyword_extraction
 from rag.prompts import keyword_extraction, cross_languages
 from rag.utils import rmSpace
 from rag.utils.storage_factory import STORAGE_IMPL

@@ -1382,6 +1382,7 @@ def retrieval_test(tenant_id):
    question = req["question"]
    doc_ids = req.get("document_ids", [])
    use_kg = req.get("use_kg", False)
    langs = req.get("cross_languages", [])
    if not isinstance(doc_ids, list):
        return get_error_data_result("`documents` should be a list")
    doc_ids_list = KnowledgebaseService.list_documents_by_ids(kb_ids)
@@ -1406,6 +1407,9 @@ def retrieval_test(tenant_id):
        if req.get("rerank_id"):
            rerank_mdl = LLMBundle(kb.tenant_id, LLMType.RERANK, llm_name=req["rerank_id"])

        if langs:
            question = cross_languages(kb.tenant_id, None, question, langs)

        if req.get("keyword", False):
            chat_mdl = LLMBundle(kb.tenant_id, LLMType.CHAT)
            question += keyword_extraction(chat_mdl, question)
--- a/docs/references/http_api_reference.md
+++ b/docs/references/http_api_reference.md
@@ -1725,6 +1725,7 @@ Retrieves chunks from specified datasets.
  - `"rerank_id"`: `string`  
  - `"keyword"`: `boolean`  
  - `"highlight"`: `boolean`
  - `"cross_languages"`: `list[string]`  

 ##### Request example

@@ -1769,6 +1770,8 @@ curl --request POST \
  Specifies whether to enable highlighting of matched terms in the results:  
  - `true`: Enable highlighting of matched terms.
  - `false`: Disable highlighting of matched terms (default).
 - `"cross_languages"`: (*Body parameter*) `list[string]`  
  The languages that should be translated into, in order to achieve keywords retrievals in different languages.

 #### Response

--- a/docs/references/python_api_reference.md
+++ b/docs/references/python_api_reference.md
@@ -953,6 +953,10 @@ Specifies whether to enable highlighting of matched terms in the results:
 - `True`: Enable highlighting of matched terms.
 - `False`: Disable highlighting of matched terms (default).

 ##### cross_languages:  `list[string]`  

 The languages that should be translated into, in order to achieve keywords retrievals in different languages.

 #### Returns

 - Success: A list of `Chunk` objects representing the document chunks.
--- a/graphrag/general/extractor.py
+++ b/graphrag/general/extractor.py
@@ -250,5 +250,5 @@ class Extractor:
        use_prompt = prompt_template.format(**context_base)
        logging.info(f"Trigger summary: {entity_or_relation_name}")
        async with chat_limiter:
            summary = await trio.to_thread.run_sync(lambda: self._chat(use_prompt, [{"role": "user", "content": "Output: "}], {"temperature": 0.8}))
            summary = await trio.to_thread.run_sync(lambda: self._chat(use_prompt, [{"role": "user", "content": "Output: "}]))
        return summary
--- a/graphrag/general/graph_extractor.py
+++ b/graphrag/general/graph_extractor.py
@@ -128,7 +128,7 @@ class GraphExtractor(Extractor):
            history.append({"role": "assistant", "content": response})
            history.append({"role": "user", "content": LOOP_PROMPT})
            async with chat_limiter:
                continuation = await trio.to_thread.run_sync(lambda: self._chat("", history, {"temperature": 0.8}))
                continuation = await trio.to_thread.run_sync(lambda: self._chat("", history))
            token_count += num_tokens_from_string("\n".join([m["content"] for m in history]) + response)
            if continuation != "Y":
                break
--- a/graphrag/light/graph_extractor.py
+++ b/graphrag/light/graph_extractor.py
@@ -86,7 +86,7 @@ class GraphExtractor(Extractor):
            **self._context_base, input_text="{input_text}"
        ).format(**self._context_base, input_text=content)

        gen_conf = {"temperature": 0.8}
        gen_conf = {}
        async with chat_limiter:
            final_result = await trio.to_thread.run_sync(lambda: self._chat(hint_prompt, [{"role": "user", "content": "Output:"}], gen_conf))
        token_count += num_tokens_from_string(hint_prompt + final_result)
--- a/sdk/python/ragflow_sdk/ragflow.py
+++ b/sdk/python/ragflow_sdk/ragflow.py
@@ -197,6 +197,7 @@ class RAGFlow:
        top_k=1024,
        rerank_id: str | None = None,
        keyword: bool = False,
        cross_languages: list[str]|None = None
    ):
        if document_ids is None:
            document_ids = []
@@ -211,6 +212,7 @@ class RAGFlow:
            "question": question,
            "dataset_ids": dataset_ids,
            "document_ids": document_ids,
            "cross_languages": cross_languages
        }
        # Send a POST request to the backend service (using requests library as an example, actual implementation may vary)
        res = self.post("/retrieval", json=data_json)