### What problem does this PR solve? Close #8943 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)tags/v0.20.0
| @@ -38,7 +38,7 @@ from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_ | |||
| from rag.app.qa import beAdoc, rmPrefix | |||
| from rag.app.tag import label_question | |||
| from rag.nlp import rag_tokenizer, search | |||
| from rag.prompts import keyword_extraction | |||
| from rag.prompts import keyword_extraction, cross_languages | |||
| from rag.utils import rmSpace | |||
| from rag.utils.storage_factory import STORAGE_IMPL | |||
| @@ -1382,6 +1382,7 @@ def retrieval_test(tenant_id): | |||
| question = req["question"] | |||
| doc_ids = req.get("document_ids", []) | |||
| use_kg = req.get("use_kg", False) | |||
| langs = req.get("cross_languages", []) | |||
| if not isinstance(doc_ids, list): | |||
| return get_error_data_result("`documents` should be a list") | |||
| doc_ids_list = KnowledgebaseService.list_documents_by_ids(kb_ids) | |||
| @@ -1406,6 +1407,9 @@ def retrieval_test(tenant_id): | |||
| if req.get("rerank_id"): | |||
| rerank_mdl = LLMBundle(kb.tenant_id, LLMType.RERANK, llm_name=req["rerank_id"]) | |||
| if langs: | |||
| question = cross_languages(kb.tenant_id, None, question, langs) | |||
| if req.get("keyword", False): | |||
| chat_mdl = LLMBundle(kb.tenant_id, LLMType.CHAT) | |||
| question += keyword_extraction(chat_mdl, question) | |||
| @@ -1725,6 +1725,7 @@ Retrieves chunks from specified datasets. | |||
| - `"rerank_id"`: `string` | |||
| - `"keyword"`: `boolean` | |||
| - `"highlight"`: `boolean` | |||
| - `"cross_languages"`: `list[string]` | |||
| ##### Request example | |||
| @@ -1769,6 +1770,8 @@ curl --request POST \ | |||
| Specifies whether to enable highlighting of matched terms in the results: | |||
| - `true`: Enable highlighting of matched terms. | |||
| - `false`: Disable highlighting of matched terms (default). | |||
| - `"cross_languages"`: (*Body parameter*) `list[string]` | |||
| The languages that should be translated into, in order to achieve keywords retrievals in different languages. | |||
| #### Response | |||
| @@ -953,6 +953,10 @@ Specifies whether to enable highlighting of matched terms in the results: | |||
| - `True`: Enable highlighting of matched terms. | |||
| - `False`: Disable highlighting of matched terms (default). | |||
| ##### cross_languages: `list[string]` | |||
| The languages that should be translated into, in order to achieve keywords retrievals in different languages. | |||
| #### Returns | |||
| - Success: A list of `Chunk` objects representing the document chunks. | |||
| @@ -250,5 +250,5 @@ class Extractor: | |||
| use_prompt = prompt_template.format(**context_base) | |||
| logging.info(f"Trigger summary: {entity_or_relation_name}") | |||
| async with chat_limiter: | |||
| summary = await trio.to_thread.run_sync(lambda: self._chat(use_prompt, [{"role": "user", "content": "Output: "}], {"temperature": 0.8})) | |||
| summary = await trio.to_thread.run_sync(lambda: self._chat(use_prompt, [{"role": "user", "content": "Output: "}])) | |||
| return summary | |||
| @@ -128,7 +128,7 @@ class GraphExtractor(Extractor): | |||
| history.append({"role": "assistant", "content": response}) | |||
| history.append({"role": "user", "content": LOOP_PROMPT}) | |||
| async with chat_limiter: | |||
| continuation = await trio.to_thread.run_sync(lambda: self._chat("", history, {"temperature": 0.8})) | |||
| continuation = await trio.to_thread.run_sync(lambda: self._chat("", history)) | |||
| token_count += num_tokens_from_string("\n".join([m["content"] for m in history]) + response) | |||
| if continuation != "Y": | |||
| break | |||
| @@ -86,7 +86,7 @@ class GraphExtractor(Extractor): | |||
| **self._context_base, input_text="{input_text}" | |||
| ).format(**self._context_base, input_text=content) | |||
| gen_conf = {"temperature": 0.8} | |||
| gen_conf = {} | |||
| async with chat_limiter: | |||
| final_result = await trio.to_thread.run_sync(lambda: self._chat(hint_prompt, [{"role": "user", "content": "Output:"}], gen_conf)) | |||
| token_count += num_tokens_from_string(hint_prompt + final_result) | |||
| @@ -197,6 +197,7 @@ class RAGFlow: | |||
| top_k=1024, | |||
| rerank_id: str | None = None, | |||
| keyword: bool = False, | |||
| cross_languages: list[str]|None = None | |||
| ): | |||
| if document_ids is None: | |||
| document_ids = [] | |||
| @@ -211,6 +212,7 @@ class RAGFlow: | |||
| "question": question, | |||
| "dataset_ids": dataset_ids, | |||
| "document_ids": document_ids, | |||
| "cross_languages": cross_languages | |||
| } | |||
| # Send a POST request to the backend service (using requests library as an example, actual implementation may vary) | |||
| res = self.post("/retrieval", json=data_json) | |||