### What problem does this PR solve? #5173 ### Type of change - [x] Refactoringtags/v0.17.0
| COPY rag rag | COPY rag rag | ||||
| COPY agent agent | COPY agent agent | ||||
| COPY graphrag graphrag | COPY graphrag graphrag | ||||
| COPY agentic_reasoning agentic_reasoning | |||||
| COPY pyproject.toml uv.lock ./ | COPY pyproject.toml uv.lock ./ | ||||
| COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template | COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template |
| import pandas as pd | import pandas as pd | ||||
| from api.db import LLMType | from api.db import LLMType | ||||
| from api.db.services.conversation_service import structure_answer | from api.db.services.conversation_service import structure_answer | ||||
| from api.db.services.dialog_service import message_fit_in | |||||
| from api.db.services.llm_service import LLMBundle | from api.db.services.llm_service import LLMBundle | ||||
| from api import settings | from api import settings | ||||
| from agent.component.base import ComponentBase, ComponentParamBase | from agent.component.base import ComponentBase, ComponentParamBase | ||||
| from rag.prompts import message_fit_in | |||||
| class GenerateParam(ComponentParamBase): | class GenerateParam(ComponentParamBase): |
| import pandas as pd | import pandas as pd | ||||
| from api.db import LLMType | from api.db import LLMType | ||||
| from api.db.services.dialog_service import label_question | |||||
| from api.db.services.knowledgebase_service import KnowledgebaseService | from api.db.services.knowledgebase_service import KnowledgebaseService | ||||
| from api.db.services.llm_service import LLMBundle | from api.db.services.llm_service import LLMBundle | ||||
| from api import settings | from api import settings | ||||
| from agent.component.base import ComponentBase, ComponentParamBase | from agent.component.base import ComponentBase, ComponentParamBase | ||||
| from rag.app.tag import label_question | |||||
| class RetrievalParam(ComponentParamBase): | class RetrievalParam(ComponentParamBase): |
| from .deep_research import DeepResearcher as DeepResearcher |
| # | |||||
| # Copyright 2024 The InfiniFlow Authors. All Rights Reserved. | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # | |||||
| import logging | |||||
| import re | |||||
| from functools import partial | |||||
| from agentic_reasoning.prompts import BEGIN_SEARCH_QUERY, BEGIN_SEARCH_RESULT, END_SEARCH_RESULT, MAX_SEARCH_LIMIT, \ | |||||
| END_SEARCH_QUERY, REASON_PROMPT, RELEVANT_EXTRACTION_PROMPT | |||||
| from api.db.services.llm_service import LLMBundle | |||||
| from rag.nlp import extract_between | |||||
| from rag.prompts import kb_prompt | |||||
| from rag.utils.tavily_conn import Tavily | |||||
| class DeepResearcher: | |||||
| def __init__(self, | |||||
| chat_mdl: LLMBundle, | |||||
| prompt_config: dict, | |||||
| kb_retrieve: partial = None, | |||||
| kg_retrieve: partial = None | |||||
| ): | |||||
| self.chat_mdl = chat_mdl | |||||
| self.prompt_config = prompt_config | |||||
| self._kb_retrieve = kb_retrieve | |||||
| self._kg_retrieve = kg_retrieve | |||||
| def thinking(self, chunk_info: dict, question: str): | |||||
| def rm_query_tags(line): | |||||
| pattern = re.escape(BEGIN_SEARCH_QUERY) + r"(.*?)" + re.escape(END_SEARCH_QUERY) | |||||
| return re.sub(pattern, "", line) | |||||
| def rm_result_tags(line): | |||||
| pattern = re.escape(BEGIN_SEARCH_RESULT) + r"(.*?)" + re.escape(END_SEARCH_RESULT) | |||||
| return re.sub(pattern, "", line) | |||||
| executed_search_queries = [] | |||||
| msg_hisotry = [{"role": "user", "content": f'Question:\"{question}\"\n'}] | |||||
| all_reasoning_steps = [] | |||||
| think = "<think>" | |||||
| for ii in range(MAX_SEARCH_LIMIT + 1): | |||||
| if ii == MAX_SEARCH_LIMIT - 1: | |||||
| summary_think = f"\n{BEGIN_SEARCH_RESULT}\nThe maximum search limit is exceeded. You are not allowed to search.\n{END_SEARCH_RESULT}\n" | |||||
| yield {"answer": think + summary_think + "</think>", "reference": {}, "audio_binary": None} | |||||
| all_reasoning_steps.append(summary_think) | |||||
| msg_hisotry.append({"role": "assistant", "content": summary_think}) | |||||
| break | |||||
| query_think = "" | |||||
| if msg_hisotry[-1]["role"] != "user": | |||||
| msg_hisotry.append({"role": "user", "content": "Continues reasoning with the new information.\n"}) | |||||
| else: | |||||
| msg_hisotry[-1]["content"] += "\n\nContinues reasoning with the new information.\n" | |||||
| for ans in self.chat_mdl.chat_streamly(REASON_PROMPT, msg_hisotry, {"temperature": 0.7}): | |||||
| ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL) | |||||
| if not ans: | |||||
| continue | |||||
| query_think = ans | |||||
| yield {"answer": think + rm_query_tags(query_think) + "</think>", "reference": {}, "audio_binary": None} | |||||
| think += rm_query_tags(query_think) | |||||
| all_reasoning_steps.append(query_think) | |||||
| queries = extract_between(query_think, BEGIN_SEARCH_QUERY, END_SEARCH_QUERY) | |||||
| if not queries: | |||||
| if ii > 0: | |||||
| break | |||||
| queries = [question] | |||||
| for search_query in queries: | |||||
| logging.info(f"[THINK]Query: {ii}. {search_query}") | |||||
| msg_hisotry.append({"role": "assistant", "content": search_query}) | |||||
| think += f"\n\n> {ii +1}. {search_query}\n\n" | |||||
| yield {"answer": think + "</think>", "reference": {}, "audio_binary": None} | |||||
| summary_think = "" | |||||
| # The search query has been searched in previous steps. | |||||
| if search_query in executed_search_queries: | |||||
| summary_think = f"\n{BEGIN_SEARCH_RESULT}\nYou have searched this query. Please refer to previous results.\n{END_SEARCH_RESULT}\n" | |||||
| yield {"answer": think + summary_think + "</think>", "reference": {}, "audio_binary": None} | |||||
| all_reasoning_steps.append(summary_think) | |||||
| msg_hisotry.append({"role": "user", "content": summary_think}) | |||||
| think += summary_think | |||||
| continue | |||||
| truncated_prev_reasoning = "" | |||||
| for i, step in enumerate(all_reasoning_steps): | |||||
| truncated_prev_reasoning += f"Step {i + 1}: {step}\n\n" | |||||
| prev_steps = truncated_prev_reasoning.split('\n\n') | |||||
| if len(prev_steps) <= 5: | |||||
| truncated_prev_reasoning = '\n\n'.join(prev_steps) | |||||
| else: | |||||
| truncated_prev_reasoning = '' | |||||
| for i, step in enumerate(prev_steps): | |||||
| if i == 0 or i >= len(prev_steps) - 4 or BEGIN_SEARCH_QUERY in step or BEGIN_SEARCH_RESULT in step: | |||||
| truncated_prev_reasoning += step + '\n\n' | |||||
| else: | |||||
| if truncated_prev_reasoning[-len('\n\n...\n\n'):] != '\n\n...\n\n': | |||||
| truncated_prev_reasoning += '...\n\n' | |||||
| truncated_prev_reasoning = truncated_prev_reasoning.strip('\n') | |||||
| # Retrieval procedure: | |||||
| # 1. KB search | |||||
| # 2. Web search (optional) | |||||
| # 3. KG search (optional) | |||||
| kbinfos = self._kb_retrieve(question=search_query) if self._kb_retrieve else {"chunks": [], "doc_aggs": []} | |||||
| if self.prompt_config.get("tavily_api_key"): | |||||
| tav = Tavily(self.prompt_config["tavily_api_key"]) | |||||
| tav_res = tav.retrieve_chunks(" ".join(search_query)) | |||||
| kbinfos["chunks"].extend(tav_res["chunks"]) | |||||
| kbinfos["doc_aggs"].extend(tav_res["doc_aggs"]) | |||||
| if self.prompt_config.get("use_kg") and self._kg_retrieve: | |||||
| ck = self._kg_retrieve(question=search_query) | |||||
| if ck["content_with_weight"]: | |||||
| kbinfos["chunks"].insert(0, ck) | |||||
| # Merge chunk info for citations | |||||
| if not chunk_info["chunks"]: | |||||
| for k in chunk_info.keys(): | |||||
| chunk_info[k] = kbinfos[k] | |||||
| else: | |||||
| cids = [c["chunk_id"] for c in chunk_info["chunks"]] | |||||
| for c in kbinfos["chunks"]: | |||||
| if c["chunk_id"] in cids: | |||||
| continue | |||||
| chunk_info["chunks"].append(c) | |||||
| dids = [d["doc_id"] for d in chunk_info["doc_aggs"]] | |||||
| for d in kbinfos["doc_aggs"]: | |||||
| if d["doc_id"] in dids: | |||||
| continue | |||||
| chunk_info["doc_aggs"].append(d) | |||||
| think += "\n\n" | |||||
| for ans in self.chat_mdl.chat_streamly( | |||||
| RELEVANT_EXTRACTION_PROMPT.format( | |||||
| prev_reasoning=truncated_prev_reasoning, | |||||
| search_query=search_query, | |||||
| document="\n".join(kb_prompt(kbinfos, 4096)) | |||||
| ), | |||||
| [{"role": "user", | |||||
| "content": f'Now you should analyze each web page and find helpful information based on the current search query "{search_query}" and previous reasoning steps.'}], | |||||
| {"temperature": 0.7}): | |||||
| ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL) | |||||
| if not ans: | |||||
| continue | |||||
| summary_think = ans | |||||
| yield {"answer": think + rm_result_tags(summary_think) + "</think>", "reference": {}, "audio_binary": None} | |||||
| all_reasoning_steps.append(summary_think) | |||||
| msg_hisotry.append( | |||||
| {"role": "user", "content": f"\n\n{BEGIN_SEARCH_RESULT}{summary_think}{END_SEARCH_RESULT}\n\n"}) | |||||
| think += rm_result_tags(summary_think) | |||||
| logging.info(f"[THINK]Summary: {ii}. {summary_think}") | |||||
| yield think + "</think>" |
| # | |||||
| # Copyright 2024 The InfiniFlow Authors. All Rights Reserved. | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # | |||||
| BEGIN_SEARCH_QUERY = "<|begin_search_query|>" | |||||
| END_SEARCH_QUERY = "<|end_search_query|>" | |||||
| BEGIN_SEARCH_RESULT = "<|begin_search_result|>" | |||||
| END_SEARCH_RESULT = "<|end_search_result|>" | |||||
| MAX_SEARCH_LIMIT = 6 | |||||
| REASON_PROMPT = ( | |||||
| "You are a reasoning assistant with the ability to perform dataset searches to help " | |||||
| "you answer the user's question accurately. You have special tools:\n\n" | |||||
| f"- To perform a search: write {BEGIN_SEARCH_QUERY} your query here {END_SEARCH_QUERY}.\n" | |||||
| f"Then, the system will search and analyze relevant content, then provide you with helpful information in the format {BEGIN_SEARCH_RESULT} ...search results... {END_SEARCH_RESULT}.\n\n" | |||||
| f"You can repeat the search process multiple times if necessary. The maximum number of search attempts is limited to {MAX_SEARCH_LIMIT}.\n\n" | |||||
| "Once you have all the information you need, continue your reasoning.\n\n" | |||||
| "-- Example 1 --\n" ######################################## | |||||
| "Question: \"Are both the directors of Jaws and Casino Royale from the same country?\"\n" | |||||
| "Assistant:\n" | |||||
| f" {BEGIN_SEARCH_QUERY}Who is the director of Jaws?{END_SEARCH_QUERY}\n\n" | |||||
| "User:\n" | |||||
| f" {BEGIN_SEARCH_RESULT}\nThe director of Jaws is Steven Spielberg...\n{END_SEARCH_RESULT}\n\n" | |||||
| "Continues reasoning with the new information.\n" | |||||
| "Assistant:\n" | |||||
| f" {BEGIN_SEARCH_QUERY}Where is Steven Spielberg from?{END_SEARCH_QUERY}\n\n" | |||||
| "User:\n" | |||||
| f" {BEGIN_SEARCH_RESULT}\nSteven Allan Spielberg is an American filmmaker...\n{END_SEARCH_RESULT}\n\n" | |||||
| "Continues reasoning with the new information...\n\n" | |||||
| "Assistant:\n" | |||||
| f" {BEGIN_SEARCH_QUERY}Who is the director of Casino Royale?{END_SEARCH_QUERY}\n\n" | |||||
| "User:\n" | |||||
| f" {BEGIN_SEARCH_RESULT}\nCasino Royale is a 2006 spy film directed by Martin Campbell...\n{END_SEARCH_RESULT}\n\n" | |||||
| "Continues reasoning with the new information...\n\n" | |||||
| "Assistant:\n" | |||||
| f" {BEGIN_SEARCH_QUERY}Where is Martin Campbell from?{END_SEARCH_QUERY}\n\n" | |||||
| "User:\n" | |||||
| f" {BEGIN_SEARCH_RESULT}\nMartin Campbell (born 24 October 1943) is a New Zealand film and television director...\n{END_SEARCH_RESULT}\n\n" | |||||
| "Continues reasoning with the new information...\n\n" | |||||
| "Assistant:\nIt's enough to answer the question\n" | |||||
| "-- Example 2 --\n" ######################################### | |||||
| "Question: \"When was the founder of craigslist born?\"\n" | |||||
| "Assistant:\n" | |||||
| f" {BEGIN_SEARCH_QUERY}Who was the founder of craigslist?{END_SEARCH_QUERY}\n\n" | |||||
| "User:\n" | |||||
| f" {BEGIN_SEARCH_RESULT}\nCraigslist was founded by Craig Newmark...\n{END_SEARCH_RESULT}\n\n" | |||||
| "Continues reasoning with the new information.\n" | |||||
| "Assistant:\n" | |||||
| f" {BEGIN_SEARCH_QUERY} When was Craig Newmark born?{END_SEARCH_QUERY}\n\n" | |||||
| "User:\n" | |||||
| f" {BEGIN_SEARCH_RESULT}\nCraig Newmark was born on December 6, 1952...\n{END_SEARCH_RESULT}\n\n" | |||||
| "Continues reasoning with the new information...\n\n" | |||||
| "Assistant:\nIt's enough to answer the question\n" | |||||
| "**Remember**:\n" | |||||
| f"- You have a dataset to search, so you just provide a proper search query.\n" | |||||
| f"- Use {BEGIN_SEARCH_QUERY} to request a dataset search and end with {END_SEARCH_QUERY}.\n" | |||||
| "- The language of query MUST be as the same as 'Question' or 'search result'.\n" | |||||
| "- When done searching, continue your reasoning.\n\n" | |||||
| 'Please answer the following question. You should think step by step to solve it.\n\n' | |||||
| ) | |||||
| RELEVANT_EXTRACTION_PROMPT = """**Task Instruction:** | |||||
| You are tasked with reading and analyzing web pages based on the following inputs: **Previous Reasoning Steps**, **Current Search Query**, and **Searched Web Pages**. Your objective is to extract relevant and helpful information for **Current Search Query** from the **Searched Web Pages** and seamlessly integrate this information into the **Previous Reasoning Steps** to continue reasoning for the original question. | |||||
| **Guidelines:** | |||||
| 1. **Analyze the Searched Web Pages:** | |||||
| - Carefully review the content of each searched web page. | |||||
| - Identify factual information that is relevant to the **Current Search Query** and can aid in the reasoning process for the original question. | |||||
| 2. **Extract Relevant Information:** | |||||
| - Select the information from the Searched Web Pages that directly contributes to advancing the **Previous Reasoning Steps**. | |||||
| - Ensure that the extracted information is accurate and relevant. | |||||
| 3. **Output Format:** | |||||
| - **If the web pages provide helpful information for current search query:** Present the information beginning with `**Final Information**` as shown below. | |||||
| - The language of query **MUST BE** as the same as 'Search Query' or 'Web Pages'.\n" | |||||
| **Final Information** | |||||
| [Helpful information] | |||||
| - **If the web pages do not provide any helpful information for current search query:** Output the following text. | |||||
| **Final Information** | |||||
| No helpful information found. | |||||
| **Inputs:** | |||||
| - **Previous Reasoning Steps:** | |||||
| {prev_reasoning} | |||||
| - **Current Search Query:** | |||||
| {search_query} | |||||
| - **Searched Web Pages:** | |||||
| {document} | |||||
| """ |
| from api.db.db_models import APIToken, Task, File | from api.db.db_models import APIToken, Task, File | ||||
| from api.db.services import duplicate_name | from api.db.services import duplicate_name | ||||
| from api.db.services.api_service import APITokenService, API4ConversationService | from api.db.services.api_service import APITokenService, API4ConversationService | ||||
| from api.db.services.dialog_service import DialogService, chat, keyword_extraction, label_question | |||||
| from api.db.services.dialog_service import DialogService, chat | |||||
| from api.db.services.document_service import DocumentService, doc_upload_and_parse | from api.db.services.document_service import DocumentService, doc_upload_and_parse | ||||
| from api.db.services.file2document_service import File2DocumentService | from api.db.services.file2document_service import File2DocumentService | ||||
| from api.db.services.file_service import FileService | from api.db.services.file_service import FileService | ||||
| generate_confirmation_token | generate_confirmation_token | ||||
| from api.utils.file_utils import filename_type, thumbnail | from api.utils.file_utils import filename_type, thumbnail | ||||
| from rag.app.tag import label_question | |||||
| from rag.prompts import keyword_extraction | |||||
| from rag.utils.storage_factory import STORAGE_IMPL | from rag.utils.storage_factory import STORAGE_IMPL | ||||
| from api.db.services.canvas_service import UserCanvasService | from api.db.services.canvas_service import UserCanvasService |
| from flask import request | from flask import request | ||||
| from flask_login import login_required, current_user | from flask_login import login_required, current_user | ||||
| from api.db.services.dialog_service import keyword_extraction, label_question | |||||
| from rag.app.qa import rmPrefix, beAdoc | from rag.app.qa import rmPrefix, beAdoc | ||||
| from rag.app.tag import label_question | |||||
| from rag.nlp import search, rag_tokenizer | from rag.nlp import search, rag_tokenizer | ||||
| from rag.prompts import keyword_extraction | |||||
| from rag.settings import PAGERANK_FLD | from rag.settings import PAGERANK_FLD | ||||
| from rag.utils import rmSpace | from rag.utils import rmSpace | ||||
| from api.db import LLMType, ParserType | from api.db import LLMType, ParserType |
| from flask_login import login_required, current_user | from flask_login import login_required, current_user | ||||
| from api.db import LLMType | from api.db import LLMType | ||||
| from api.db.services.dialog_service import DialogService, chat, ask, label_question | |||||
| from api.db.services.dialog_service import DialogService, chat, ask | |||||
| from api.db.services.knowledgebase_service import KnowledgebaseService | from api.db.services.knowledgebase_service import KnowledgebaseService | ||||
| from api.db.services.llm_service import LLMBundle, TenantService | from api.db.services.llm_service import LLMBundle, TenantService | ||||
| from api import settings | from api import settings | ||||
| from api.utils.api_utils import get_json_result | from api.utils.api_utils import get_json_result | ||||
| from api.utils.api_utils import server_error_response, get_data_error_result, validate_request | from api.utils.api_utils import server_error_response, get_data_error_result, validate_request | ||||
| from graphrag.general.mind_map_extractor import MindMapExtractor | from graphrag.general.mind_map_extractor import MindMapExtractor | ||||
| from rag.app.tag import label_question | |||||
| @manager.route('/set', methods=['POST']) # noqa: F821 | @manager.route('/set', methods=['POST']) # noqa: F821 |
| elif factory == "Tencent Cloud": | elif factory == "Tencent Cloud": | ||||
| req["api_key"] = apikey_json(["tencent_cloud_sid", "tencent_cloud_sk"]) | req["api_key"] = apikey_json(["tencent_cloud_sid", "tencent_cloud_sk"]) | ||||
| return set_api_key() | |||||
| elif factory == "Bedrock": | elif factory == "Bedrock": | ||||
| # For Bedrock, due to its special authentication method | # For Bedrock, due to its special authentication method |
| from flask import request, jsonify | from flask import request, jsonify | ||||
| from api.db import LLMType | from api.db import LLMType | ||||
| from api.db.services.dialog_service import label_question | |||||
| from api.db.services.knowledgebase_service import KnowledgebaseService | from api.db.services.knowledgebase_service import KnowledgebaseService | ||||
| from api.db.services.llm_service import LLMBundle | from api.db.services.llm_service import LLMBundle | ||||
| from api import settings | from api import settings | ||||
| from api.utils.api_utils import validate_request, build_error_result, apikey_required | from api.utils.api_utils import validate_request, build_error_result, apikey_required | ||||
| from rag.app.tag import label_question | |||||
| @manager.route('/dify/retrieval', methods=['POST']) # noqa: F821 | @manager.route('/dify/retrieval', methods=['POST']) # noqa: F821 |
| import pathlib | import pathlib | ||||
| import datetime | import datetime | ||||
| from api.db.services.dialog_service import keyword_extraction, label_question | |||||
| from rag.app.qa import rmPrefix, beAdoc | from rag.app.qa import rmPrefix, beAdoc | ||||
| from rag.nlp import rag_tokenizer | from rag.nlp import rag_tokenizer | ||||
| from api.db import LLMType, ParserType | from api.db import LLMType, ParserType | ||||
| from api.db.services.knowledgebase_service import KnowledgebaseService | from api.db.services.knowledgebase_service import KnowledgebaseService | ||||
| from api.utils.api_utils import construct_json_result, get_parser_config | from api.utils.api_utils import construct_json_result, get_parser_config | ||||
| from rag.nlp import search | from rag.nlp import search | ||||
| from rag.prompts import keyword_extraction | |||||
| from rag.app.tag import label_question | |||||
| from rag.utils import rmSpace | from rag.utils import rmSpace | ||||
| from rag.utils.storage_factory import STORAGE_IMPL | from rag.utils.storage_factory import STORAGE_IMPL | ||||
| # | # | ||||
| import logging | import logging | ||||
| import binascii | import binascii | ||||
| import os | |||||
| import json | |||||
| import time | import time | ||||
| import json_repair | |||||
| from functools import partial | |||||
| import re | import re | ||||
| from collections import defaultdict | |||||
| from copy import deepcopy | from copy import deepcopy | ||||
| from timeit import default_timer as timer | from timeit import default_timer as timer | ||||
| import datetime | |||||
| from datetime import timedelta | |||||
| from agentic_reasoning import DeepResearcher | |||||
| from api.db import LLMType, ParserType, StatusEnum | from api.db import LLMType, ParserType, StatusEnum | ||||
| from api.db.db_models import Dialog, DB | from api.db.db_models import Dialog, DB | ||||
| from api.db.services.common_service import CommonService | from api.db.services.common_service import CommonService | ||||
| from api.db.services.document_service import DocumentService | |||||
| from api.db.services.knowledgebase_service import KnowledgebaseService | from api.db.services.knowledgebase_service import KnowledgebaseService | ||||
| from api.db.services.llm_service import TenantLLMService, LLMBundle | from api.db.services.llm_service import TenantLLMService, LLMBundle | ||||
| from api import settings | from api import settings | ||||
| from graphrag.utils import get_tags_from_cache, set_tags_to_cache | |||||
| from rag.app.resume import forbidden_select_fields4resume | from rag.app.resume import forbidden_select_fields4resume | ||||
| from rag.nlp import extract_between | |||||
| from rag.app.tag import label_question | |||||
| from rag.nlp.search import index_name | from rag.nlp.search import index_name | ||||
| from rag.settings import TAG_FLD | |||||
| from rag.utils import rmSpace, num_tokens_from_string, encoder | |||||
| from api.utils.file_utils import get_project_base_directory | |||||
| from rag.prompts import kb_prompt, message_fit_in, llm_id2llm_type, keyword_extraction, full_question | |||||
| from rag.utils import rmSpace, num_tokens_from_string | |||||
| from rag.utils.tavily_conn import Tavily | from rag.utils.tavily_conn import Tavily | ||||
| return list(chats.dicts()) | return list(chats.dicts()) | ||||
| def message_fit_in(msg, max_length=4000): | |||||
| def count(): | |||||
| nonlocal msg | |||||
| tks_cnts = [] | |||||
| for m in msg: | |||||
| tks_cnts.append( | |||||
| {"role": m["role"], "count": num_tokens_from_string(m["content"])}) | |||||
| total = 0 | |||||
| for m in tks_cnts: | |||||
| total += m["count"] | |||||
| return total | |||||
| c = count() | |||||
| if c < max_length: | |||||
| return c, msg | |||||
| msg_ = [m for m in msg[:-1] if m["role"] == "system"] | |||||
| if len(msg) > 1: | |||||
| msg_.append(msg[-1]) | |||||
| msg = msg_ | |||||
| c = count() | |||||
| if c < max_length: | |||||
| return c, msg | |||||
| ll = num_tokens_from_string(msg_[0]["content"]) | |||||
| ll2 = num_tokens_from_string(msg_[-1]["content"]) | |||||
| if ll / (ll + ll2) > 0.8: | |||||
| m = msg_[0]["content"] | |||||
| m = encoder.decode(encoder.encode(m)[:max_length - ll2]) | |||||
| msg[0]["content"] = m | |||||
| return max_length, msg | |||||
| m = msg_[1]["content"] | |||||
| m = encoder.decode(encoder.encode(m)[:max_length - ll2]) | |||||
| msg[1]["content"] = m | |||||
| return max_length, msg | |||||
| def llm_id2llm_type(llm_id): | |||||
| llm_id, _ = TenantLLMService.split_model_name_and_factory(llm_id) | |||||
| fnm = os.path.join(get_project_base_directory(), "conf") | |||||
| llm_factories = json.load(open(os.path.join(fnm, "llm_factories.json"), "r")) | |||||
| for llm_factory in llm_factories["factory_llm_infos"]: | |||||
| for llm in llm_factory["llm"]: | |||||
| if llm_id == llm["llm_name"]: | |||||
| return llm["model_type"].strip(",")[-1] | |||||
| def kb_prompt(kbinfos, max_tokens): | |||||
| knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]] | |||||
| used_token_count = 0 | |||||
| chunks_num = 0 | |||||
| for i, c in enumerate(knowledges): | |||||
| used_token_count += num_tokens_from_string(c) | |||||
| chunks_num += 1 | |||||
| if max_tokens * 0.97 < used_token_count: | |||||
| knowledges = knowledges[:i] | |||||
| logging.warning(f"Not all the retrieval into prompt: {i+1}/{len(knowledges)}") | |||||
| break | |||||
| docs = DocumentService.get_by_ids([ck["doc_id"] for ck in kbinfos["chunks"][:chunks_num]]) | |||||
| docs = {d.id: d.meta_fields for d in docs} | |||||
| doc2chunks = defaultdict(lambda: {"chunks": [], "meta": []}) | |||||
| for ck in kbinfos["chunks"][:chunks_num]: | |||||
| doc2chunks[ck["docnm_kwd"]]["chunks"].append((f"URL: {ck['url']}\n" if "url" in ck else "") + ck["content_with_weight"]) | |||||
| doc2chunks[ck["docnm_kwd"]]["meta"] = docs.get(ck["doc_id"], {}) | |||||
| knowledges = [] | |||||
| for nm, cks_meta in doc2chunks.items(): | |||||
| txt = f"Document: {nm} \n" | |||||
| for k, v in cks_meta["meta"].items(): | |||||
| txt += f"{k}: {v}\n" | |||||
| txt += "Relevant fragments as following:\n" | |||||
| for i, chunk in enumerate(cks_meta["chunks"], 1): | |||||
| txt += f"{i}. {chunk}\n" | |||||
| knowledges.append(txt) | |||||
| return knowledges | |||||
| def label_question(question, kbs): | |||||
| tags = None | |||||
| tag_kb_ids = [] | |||||
| for kb in kbs: | |||||
| if kb.parser_config.get("tag_kb_ids"): | |||||
| tag_kb_ids.extend(kb.parser_config["tag_kb_ids"]) | |||||
| if tag_kb_ids: | |||||
| all_tags = get_tags_from_cache(tag_kb_ids) | |||||
| if not all_tags: | |||||
| all_tags = settings.retrievaler.all_tags_in_portion(kb.tenant_id, tag_kb_ids) | |||||
| set_tags_to_cache(all_tags, tag_kb_ids) | |||||
| else: | |||||
| all_tags = json.loads(all_tags) | |||||
| tag_kbs = KnowledgebaseService.get_by_ids(tag_kb_ids) | |||||
| tags = settings.retrievaler.tag_query(question, | |||||
| list(set([kb.tenant_id for kb in tag_kbs])), | |||||
| tag_kb_ids, | |||||
| all_tags, | |||||
| kb.parser_config.get("topn_tags", 3) | |||||
| ) | |||||
| return tags | |||||
| def chat_solo(dialog, messages, stream=True): | def chat_solo(dialog, messages, stream=True): | ||||
| if llm_id2llm_type(dialog.llm_id) == "image2text": | if llm_id2llm_type(dialog.llm_id) == "image2text": | ||||
| chat_mdl = LLMBundle(dialog.tenant_id, LLMType.IMAGE2TEXT, dialog.llm_id) | chat_mdl = LLMBundle(dialog.tenant_id, LLMType.IMAGE2TEXT, dialog.llm_id) | ||||
| knowledges = [] | knowledges = [] | ||||
| if prompt_config.get("reasoning", False): | if prompt_config.get("reasoning", False): | ||||
| for think in reasoning(kbinfos, " ".join(questions), chat_mdl, embd_mdl, tenant_ids, dialog.kb_ids, prompt_config, MAX_SEARCH_LIMIT=3): | |||||
| reasoner = DeepResearcher(chat_mdl, | |||||
| prompt_config, | |||||
| partial(retriever.retrieval, embd_mdl=embd_mdl, tenant_ids=tenant_ids, kb_ids=dialog.kb_ids, page=1, page_size=dialog.top_n, similarity_threshold=0.2, vector_similarity_weight=0.3)) | |||||
| for think in reasoner.thinking(kbinfos, " ".join(questions)): | |||||
| if isinstance(think, str): | if isinstance(think, str): | ||||
| thought = think | thought = think | ||||
| knowledges = [t for t in think.split("\n") if t] | knowledges = [t for t in think.split("\n") if t] | ||||
| } | } | ||||
| def relevant(tenant_id, llm_id, question, contents: list): | |||||
| if llm_id2llm_type(llm_id) == "image2text": | |||||
| chat_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, llm_id) | |||||
| else: | |||||
| chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id) | |||||
| prompt = """ | |||||
| You are a grader assessing relevance of a retrieved document to a user question. | |||||
| It does not need to be a stringent test. The goal is to filter out erroneous retrievals. | |||||
| If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. | |||||
| Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. | |||||
| No other words needed except 'yes' or 'no'. | |||||
| """ | |||||
| if not contents: | |||||
| return False | |||||
| contents = "Documents: \n" + " - ".join(contents) | |||||
| contents = f"Question: {question}\n" + contents | |||||
| if num_tokens_from_string(contents) >= chat_mdl.max_length - 4: | |||||
| contents = encoder.decode(encoder.encode(contents)[:chat_mdl.max_length - 4]) | |||||
| ans = chat_mdl.chat(prompt, [{"role": "user", "content": contents}], {"temperature": 0.01}) | |||||
| if ans.lower().find("yes") >= 0: | |||||
| return True | |||||
| return False | |||||
| def rewrite(tenant_id, llm_id, question): | |||||
| if llm_id2llm_type(llm_id) == "image2text": | |||||
| chat_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, llm_id) | |||||
| else: | |||||
| chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id) | |||||
| prompt = """ | |||||
| You are an expert at query expansion to generate a paraphrasing of a question. | |||||
| I can't retrieval relevant information from the knowledge base by using user's question directly. | |||||
| You need to expand or paraphrase user's question by multiple ways such as using synonyms words/phrase, | |||||
| writing the abbreviation in its entirety, adding some extra descriptions or explanations, | |||||
| changing the way of expression, translating the original question into another language (English/Chinese), etc. | |||||
| And return 5 versions of question and one is from translation. | |||||
| Just list the question. No other words are needed. | |||||
| """ | |||||
| ans = chat_mdl.chat(prompt, [{"role": "user", "content": question}], {"temperature": 0.8}) | |||||
| return ans | |||||
| def keyword_extraction(chat_mdl, content, topn=3): | |||||
| prompt = f""" | |||||
| Role: You're a text analyzer. | |||||
| Task: extract the most important keywords/phrases of a given piece of text content. | |||||
| Requirements: | |||||
| - Summarize the text content, and give top {topn} important keywords/phrases. | |||||
| - The keywords MUST be in language of the given piece of text content. | |||||
| - The keywords are delimited by ENGLISH COMMA. | |||||
| - Keywords ONLY in output. | |||||
| ### Text Content | |||||
| {content} | |||||
| """ | |||||
| msg = [ | |||||
| {"role": "system", "content": prompt}, | |||||
| {"role": "user", "content": "Output: "} | |||||
| ] | |||||
| _, msg = message_fit_in(msg, chat_mdl.max_length) | |||||
| kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2}) | |||||
| if isinstance(kwd, tuple): | |||||
| kwd = kwd[0] | |||||
| kwd = re.sub(r"<think>.*</think>", "", kwd, flags=re.DOTALL) | |||||
| if kwd.find("**ERROR**") >= 0: | |||||
| return "" | |||||
| return kwd | |||||
| def question_proposal(chat_mdl, content, topn=3): | |||||
| prompt = f""" | |||||
| Role: You're a text analyzer. | |||||
| Task: propose {topn} questions about a given piece of text content. | |||||
| Requirements: | |||||
| - Understand and summarize the text content, and propose top {topn} important questions. | |||||
| - The questions SHOULD NOT have overlapping meanings. | |||||
| - The questions SHOULD cover the main content of the text as much as possible. | |||||
| - The questions MUST be in language of the given piece of text content. | |||||
| - One question per line. | |||||
| - Question ONLY in output. | |||||
| ### Text Content | |||||
| {content} | |||||
| """ | |||||
| msg = [ | |||||
| {"role": "system", "content": prompt}, | |||||
| {"role": "user", "content": "Output: "} | |||||
| ] | |||||
| _, msg = message_fit_in(msg, chat_mdl.max_length) | |||||
| kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2}) | |||||
| if isinstance(kwd, tuple): | |||||
| kwd = kwd[0] | |||||
| kwd = re.sub(r"<think>.*</think>", "", kwd, flags=re.DOTALL) | |||||
| if kwd.find("**ERROR**") >= 0: | |||||
| return "" | |||||
| return kwd | |||||
| def full_question(tenant_id, llm_id, messages): | |||||
| if llm_id2llm_type(llm_id) == "image2text": | |||||
| chat_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, llm_id) | |||||
| else: | |||||
| chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id) | |||||
| conv = [] | |||||
| for m in messages: | |||||
| if m["role"] not in ["user", "assistant"]: | |||||
| continue | |||||
| conv.append("{}: {}".format(m["role"].upper(), m["content"])) | |||||
| conv = "\n".join(conv) | |||||
| today = datetime.date.today().isoformat() | |||||
| yesterday = (datetime.date.today() - timedelta(days=1)).isoformat() | |||||
| tomorrow = (datetime.date.today() + timedelta(days=1)).isoformat() | |||||
| prompt = f""" | |||||
| Role: A helpful assistant | |||||
| Task and steps: | |||||
| 1. Generate a full user question that would follow the conversation. | |||||
| 2. If the user's question involves relative date, you need to convert it into absolute date based on the current date, which is {today}. For example: 'yesterday' would be converted to {yesterday}. | |||||
| Requirements & Restrictions: | |||||
| - Text generated MUST be in the same language of the original user's question. | |||||
| - If the user's latest question is completely, don't do anything, just return the original question. | |||||
| - DON'T generate anything except a refined question. | |||||
| ###################### | |||||
| -Examples- | |||||
| ###################### | |||||
| # Example 1 | |||||
| ## Conversation | |||||
| USER: What is the name of Donald Trump's father? | |||||
| ASSISTANT: Fred Trump. | |||||
| USER: And his mother? | |||||
| ############### | |||||
| Output: What's the name of Donald Trump's mother? | |||||
| ------------ | |||||
| # Example 2 | |||||
| ## Conversation | |||||
| USER: What is the name of Donald Trump's father? | |||||
| ASSISTANT: Fred Trump. | |||||
| USER: And his mother? | |||||
| ASSISTANT: Mary Trump. | |||||
| User: What's her full name? | |||||
| ############### | |||||
| Output: What's the full name of Donald Trump's mother Mary Trump? | |||||
| ------------ | |||||
| # Example 3 | |||||
| ## Conversation | |||||
| USER: What's the weather today in London? | |||||
| ASSISTANT: Cloudy. | |||||
| USER: What's about tomorrow in Rochester? | |||||
| ############### | |||||
| Output: What's the weather in Rochester on {tomorrow}? | |||||
| ###################### | |||||
| # Real Data | |||||
| ## Conversation | |||||
| {conv} | |||||
| ############### | |||||
| """ | |||||
| ans = chat_mdl.chat(prompt, [{"role": "user", "content": "Output: "}], {"temperature": 0.2}) | |||||
| ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL) | |||||
| return ans if ans.find("**ERROR**") < 0 else messages[-1]["content"] | |||||
| def tts(tts_mdl, text): | def tts(tts_mdl, text): | ||||
| if not tts_mdl or not text: | if not tts_mdl or not text: | ||||
| return | return | ||||
| yield decorate_answer(answer) | yield decorate_answer(answer) | ||||
| def content_tagging(chat_mdl, content, all_tags, examples, topn=3): | |||||
| prompt = f""" | |||||
| Role: You're a text analyzer. | |||||
| Task: Tag (put on some labels) to a given piece of text content based on the examples and the entire tag set. | |||||
| Steps:: | |||||
| - Comprehend the tag/label set. | |||||
| - Comprehend examples which all consist of both text content and assigned tags with relevance score in format of JSON. | |||||
| - Summarize the text content, and tag it with top {topn} most relevant tags from the set of tag/label and the corresponding relevance score. | |||||
| Requirements | |||||
| - The tags MUST be from the tag set. | |||||
| - The output MUST be in JSON format only, the key is tag and the value is its relevance score. | |||||
| - The relevance score must be range from 1 to 10. | |||||
| - Keywords ONLY in output. | |||||
| # TAG SET | |||||
| {", ".join(all_tags)} | |||||
| """ | |||||
| for i, ex in enumerate(examples): | |||||
| prompt += """ | |||||
| # Examples {} | |||||
| ### Text Content | |||||
| {} | |||||
| Output: | |||||
| {} | |||||
| """.format(i, ex["content"], json.dumps(ex[TAG_FLD], indent=2, ensure_ascii=False)) | |||||
| prompt += f""" | |||||
| # Real Data | |||||
| ### Text Content | |||||
| {content} | |||||
| """ | |||||
| msg = [ | |||||
| {"role": "system", "content": prompt}, | |||||
| {"role": "user", "content": "Output: "} | |||||
| ] | |||||
| _, msg = message_fit_in(msg, chat_mdl.max_length) | |||||
| kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.5}) | |||||
| if isinstance(kwd, tuple): | |||||
| kwd = kwd[0] | |||||
| kwd = re.sub(r"<think>.*</think>", "", kwd, flags=re.DOTALL) | |||||
| if kwd.find("**ERROR**") >= 0: | |||||
| raise Exception(kwd) | |||||
| try: | |||||
| return json_repair.loads(kwd) | |||||
| except json_repair.JSONDecodeError: | |||||
| try: | |||||
| result = kwd.replace(prompt[:-1], '').replace('user', '').replace('model', '').strip() | |||||
| result = '{' + result.split('{')[1].split('}')[0] + '}' | |||||
| return json_repair.loads(result) | |||||
| except Exception as e: | |||||
| logging.exception(f"JSON parsing error: {result} -> {e}") | |||||
| raise e | |||||
| def reasoning(chunk_info: dict, question: str, chat_mdl: LLMBundle, embd_mdl: LLMBundle, | |||||
| tenant_ids: list[str], kb_ids: list[str], prompt_config, MAX_SEARCH_LIMIT: int = 6, | |||||
| top_n: int = 5, similarity_threshold: float = 0.4, vector_similarity_weight: float = 0.3): | |||||
| BEGIN_SEARCH_QUERY = "<|begin_search_query|>" | |||||
| END_SEARCH_QUERY = "<|end_search_query|>" | |||||
| BEGIN_SEARCH_RESULT = "<|begin_search_result|>" | |||||
| END_SEARCH_RESULT = "<|end_search_result|>" | |||||
| def rm_query_tags(line): | |||||
| pattern = re.escape(BEGIN_SEARCH_QUERY) + r"(.*?)" + re.escape(END_SEARCH_QUERY) | |||||
| return re.sub(pattern, "", line) | |||||
| def rm_result_tags(line): | |||||
| pattern = re.escape(BEGIN_SEARCH_RESULT) + r"(.*?)" + re.escape(END_SEARCH_RESULT) | |||||
| return re.sub(pattern, "", line) | |||||
| reason_prompt = ( | |||||
| "You are a reasoning assistant with the ability to perform dataset searches to help " | |||||
| "you answer the user's question accurately. You have special tools:\n\n" | |||||
| f"- To perform a search: write {BEGIN_SEARCH_QUERY} your query here {END_SEARCH_QUERY}.\n" | |||||
| f"Then, the system will search and analyze relevant content, then provide you with helpful information in the format {BEGIN_SEARCH_RESULT} ...search results... {END_SEARCH_RESULT}.\n\n" | |||||
| f"You can repeat the search process multiple times if necessary. The maximum number of search attempts is limited to {MAX_SEARCH_LIMIT}.\n\n" | |||||
| "Once you have all the information you need, continue your reasoning.\n\n" | |||||
| "-- Example 1 --\n" ######################################## | |||||
| "Question: \"Are both the directors of Jaws and Casino Royale from the same country?\"\n" | |||||
| "Assistant:\n" | |||||
| f" {BEGIN_SEARCH_QUERY}Who is the director of Jaws?{END_SEARCH_QUERY}\n\n" | |||||
| "User:\n" | |||||
| f" {BEGIN_SEARCH_RESULT}\nThe director of Jaws is Steven Spielberg...\n{END_SEARCH_RESULT}\n\n" | |||||
| "Continues reasoning with the new information.\n" | |||||
| "Assistant:\n" | |||||
| f" {BEGIN_SEARCH_QUERY}Where is Steven Spielberg from?{END_SEARCH_QUERY}\n\n" | |||||
| "User:\n" | |||||
| f" {BEGIN_SEARCH_RESULT}\nSteven Allan Spielberg is an American filmmaker...\n{END_SEARCH_RESULT}\n\n" | |||||
| "Continues reasoning with the new information...\n\n" | |||||
| "Assistant:\n" | |||||
| f" {BEGIN_SEARCH_QUERY}Who is the director of Casino Royale?{END_SEARCH_QUERY}\n\n" | |||||
| "User:\n" | |||||
| f" {BEGIN_SEARCH_RESULT}\nCasino Royale is a 2006 spy film directed by Martin Campbell...\n{END_SEARCH_RESULT}\n\n" | |||||
| "Continues reasoning with the new information...\n\n" | |||||
| "Assistant:\n" | |||||
| f" {BEGIN_SEARCH_QUERY}Where is Martin Campbell from?{END_SEARCH_QUERY}\n\n" | |||||
| "User:\n" | |||||
| f" {BEGIN_SEARCH_RESULT}\nMartin Campbell (born 24 October 1943) is a New Zealand film and television director...\n{END_SEARCH_RESULT}\n\n" | |||||
| "Continues reasoning with the new information...\n\n" | |||||
| "Assistant:\nIt's enough to answer the question\n" | |||||
| "-- Example 2 --\n" ######################################### | |||||
| "Question: \"When was the founder of craigslist born?\"\n" | |||||
| "Assistant:\n" | |||||
| f" {BEGIN_SEARCH_QUERY}Who was the founder of craigslist?{END_SEARCH_QUERY}\n\n" | |||||
| "User:\n" | |||||
| f" {BEGIN_SEARCH_RESULT}\nCraigslist was founded by Craig Newmark...\n{END_SEARCH_RESULT}\n\n" | |||||
| "Continues reasoning with the new information.\n" | |||||
| "Assistant:\n" | |||||
| f" {BEGIN_SEARCH_QUERY} When was Craig Newmark born?{END_SEARCH_QUERY}\n\n" | |||||
| "User:\n" | |||||
| f" {BEGIN_SEARCH_RESULT}\nCraig Newmark was born on December 6, 1952...\n{END_SEARCH_RESULT}\n\n" | |||||
| "Continues reasoning with the new information...\n\n" | |||||
| "Assistant:\nIt's enough to answer the question\n" | |||||
| "**Remember**:\n" | |||||
| f"- You have a dataset to search, so you just provide a proper search query.\n" | |||||
| f"- Use {BEGIN_SEARCH_QUERY} to request a dataset search and end with {END_SEARCH_QUERY}.\n" | |||||
| "- The language of query MUST be as the same as 'Question' or 'search result'.\n" | |||||
| "- When done searching, continue your reasoning.\n\n" | |||||
| 'Please answer the following question. You should think step by step to solve it.\n\n' | |||||
| ) | |||||
| relevant_extraction_prompt = """**Task Instruction:** | |||||
| You are tasked with reading and analyzing web pages based on the following inputs: **Previous Reasoning Steps**, **Current Search Query**, and **Searched Web Pages**. Your objective is to extract relevant and helpful information for **Current Search Query** from the **Searched Web Pages** and seamlessly integrate this information into the **Previous Reasoning Steps** to continue reasoning for the original question. | |||||
| **Guidelines:** | |||||
| 1. **Analyze the Searched Web Pages:** | |||||
| - Carefully review the content of each searched web page. | |||||
| - Identify factual information that is relevant to the **Current Search Query** and can aid in the reasoning process for the original question. | |||||
| 2. **Extract Relevant Information:** | |||||
| - Select the information from the Searched Web Pages that directly contributes to advancing the **Previous Reasoning Steps**. | |||||
| - Ensure that the extracted information is accurate and relevant. | |||||
| 3. **Output Format:** | |||||
| - **If the web pages provide helpful information for current search query:** Present the information beginning with `**Final Information**` as shown below. | |||||
| - The language of query **MUST BE** as the same as 'Search Query' or 'Web Pages'.\n" | |||||
| **Final Information** | |||||
| [Helpful information] | |||||
| - **If the web pages do not provide any helpful information for current search query:** Output the following text. | |||||
| **Final Information** | |||||
| No helpful information found. | |||||
| **Inputs:** | |||||
| - **Previous Reasoning Steps:** | |||||
| {prev_reasoning} | |||||
| - **Current Search Query:** | |||||
| {search_query} | |||||
| - **Searched Web Pages:** | |||||
| {document} | |||||
| """ | |||||
| executed_search_queries = [] | |||||
| msg_hisotry = [{"role": "user", "content": f'Question:\"{question}\"\n'}] | |||||
| all_reasoning_steps = [] | |||||
| think = "<think>" | |||||
| for ii in range(MAX_SEARCH_LIMIT + 1): | |||||
| if ii == MAX_SEARCH_LIMIT - 1: | |||||
| summary_think = f"\n{BEGIN_SEARCH_RESULT}\nThe maximum search limit is exceeded. You are not allowed to search.\n{END_SEARCH_RESULT}\n" | |||||
| yield {"answer": think + summary_think + "</think>", "reference": {}, "audio_binary": None} | |||||
| all_reasoning_steps.append(summary_think) | |||||
| msg_hisotry.append({"role": "assistant", "content": summary_think}) | |||||
| break | |||||
| query_think = "" | |||||
| if msg_hisotry[-1]["role"] != "user": | |||||
| msg_hisotry.append({"role": "user", "content": "Continues reasoning with the new information.\n"}) | |||||
| else: | |||||
| msg_hisotry[-1]["content"] += "\n\nContinues reasoning with the new information.\n" | |||||
| for ans in chat_mdl.chat_streamly(reason_prompt, msg_hisotry, {"temperature": 0.7}): | |||||
| ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL) | |||||
| if not ans: | |||||
| continue | |||||
| query_think = ans | |||||
| yield {"answer": think + rm_query_tags(query_think) + "</think>", "reference": {}, "audio_binary": None} | |||||
| think += rm_query_tags(query_think) | |||||
| all_reasoning_steps.append(query_think) | |||||
| queries = extract_between(query_think, BEGIN_SEARCH_QUERY, END_SEARCH_QUERY) | |||||
| if not queries: | |||||
| if ii > 0: | |||||
| break | |||||
| queries = [question] | |||||
| for search_query in queries: | |||||
| logging.info(f"[THINK]Query: {ii}. {search_query}") | |||||
| msg_hisotry.append({"role": "assistant", "content": search_query}) | |||||
| think += f"\n\n> {ii+1}. {search_query}\n\n" | |||||
| yield {"answer": think + "</think>", "reference": {}, "audio_binary": None} | |||||
| summary_think = "" | |||||
| # The search query has been searched in previous steps. | |||||
| if search_query in executed_search_queries: | |||||
| summary_think = f"\n{BEGIN_SEARCH_RESULT}\nYou have searched this query. Please refer to previous results.\n{END_SEARCH_RESULT}\n" | |||||
| yield {"answer": think + summary_think + "</think>", "reference": {}, "audio_binary": None} | |||||
| all_reasoning_steps.append(summary_think) | |||||
| msg_hisotry.append({"role": "user", "content": summary_think}) | |||||
| think += summary_think | |||||
| continue | |||||
| truncated_prev_reasoning = "" | |||||
| for i, step in enumerate(all_reasoning_steps): | |||||
| truncated_prev_reasoning += f"Step {i + 1}: {step}\n\n" | |||||
| prev_steps = truncated_prev_reasoning.split('\n\n') | |||||
| if len(prev_steps) <= 5: | |||||
| truncated_prev_reasoning = '\n\n'.join(prev_steps) | |||||
| else: | |||||
| truncated_prev_reasoning = '' | |||||
| for i, step in enumerate(prev_steps): | |||||
| if i == 0 or i >= len(prev_steps) - 4 or BEGIN_SEARCH_QUERY in step or BEGIN_SEARCH_RESULT in step: | |||||
| truncated_prev_reasoning += step + '\n\n' | |||||
| else: | |||||
| if truncated_prev_reasoning[-len('\n\n...\n\n'):] != '\n\n...\n\n': | |||||
| truncated_prev_reasoning += '...\n\n' | |||||
| truncated_prev_reasoning = truncated_prev_reasoning.strip('\n') | |||||
| # Retrieval procedure: | |||||
| # 1. KB search | |||||
| # 2. Web search (optional) | |||||
| # 3. KG search (optional) | |||||
| kbinfos = settings.retrievaler.retrieval(search_query, embd_mdl, tenant_ids, kb_ids, 1, top_n, | |||||
| similarity_threshold, | |||||
| vector_similarity_weight | |||||
| ) | |||||
| if prompt_config.get("tavily_api_key", "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"): | |||||
| tav = Tavily(prompt_config["tavily_api_key"]) | |||||
| tav_res = tav.retrieve_chunks(" ".join(search_query)) | |||||
| kbinfos["chunks"].extend(tav_res["chunks"]) | |||||
| kbinfos["doc_aggs"].extend(tav_res["doc_aggs"]) | |||||
| if prompt_config.get("use_kg"): | |||||
| ck = settings.kg_retrievaler.retrieval(search_query, | |||||
| tenant_ids, | |||||
| kb_ids, | |||||
| embd_mdl, | |||||
| chat_mdl) | |||||
| if ck["content_with_weight"]: | |||||
| kbinfos["chunks"].insert(0, ck) | |||||
| # Merge chunk info for citations | |||||
| if not chunk_info["chunks"]: | |||||
| for k in chunk_info.keys(): | |||||
| chunk_info[k] = kbinfos[k] | |||||
| else: | |||||
| cids = [c["chunk_id"] for c in chunk_info["chunks"]] | |||||
| for c in kbinfos["chunks"]: | |||||
| if c["chunk_id"] in cids: | |||||
| continue | |||||
| chunk_info["chunks"].append(c) | |||||
| dids = [d["doc_id"] for d in chunk_info["doc_aggs"]] | |||||
| for d in kbinfos["doc_aggs"]: | |||||
| if d["doc_id"] in dids: | |||||
| continue | |||||
| chunk_info["doc_aggs"].append(d) | |||||
| think += "\n\n" | |||||
| for ans in chat_mdl.chat_streamly( | |||||
| relevant_extraction_prompt.format( | |||||
| prev_reasoning=truncated_prev_reasoning, | |||||
| search_query=search_query, | |||||
| document="\n".join(kb_prompt(kbinfos, 4096)) | |||||
| ), | |||||
| [{"role": "user", | |||||
| "content": f'Now you should analyze each web page and find helpful information based on the current search query "{search_query}" and previous reasoning steps.'}], | |||||
| {"temperature": 0.7}): | |||||
| ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL) | |||||
| if not ans: | |||||
| continue | |||||
| summary_think = ans | |||||
| yield {"answer": think + rm_result_tags(summary_think) + "</think>", "reference": {}, "audio_binary": None} | |||||
| all_reasoning_steps.append(summary_think) | |||||
| msg_hisotry.append( | |||||
| {"role": "user", "content": f"\n\n{BEGIN_SEARCH_RESULT}{summary_think}{END_SEARCH_RESULT}\n\n"}) | |||||
| think += rm_result_tags(summary_think) | |||||
| logging.info(f"[THINK]Summary: {ii}. {summary_think}") | |||||
| yield think + "</think>" |
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # | # | ||||
| import json | |||||
| import re | import re | ||||
| import csv | import csv | ||||
| from copy import deepcopy | from copy import deepcopy | ||||
| "Excel, csv(txt) format files are supported.") | "Excel, csv(txt) format files are supported.") | ||||
| def label_question(question, kbs): | |||||
| from api.db.services.knowledgebase_service import KnowledgebaseService | |||||
| from graphrag.utils import get_tags_from_cache, set_tags_to_cache | |||||
| from api import settings | |||||
| tags = None | |||||
| tag_kb_ids = [] | |||||
| for kb in kbs: | |||||
| if kb.parser_config.get("tag_kb_ids"): | |||||
| tag_kb_ids.extend(kb.parser_config["tag_kb_ids"]) | |||||
| if tag_kb_ids: | |||||
| all_tags = get_tags_from_cache(tag_kb_ids) | |||||
| if not all_tags: | |||||
| all_tags = settings.retrievaler.all_tags_in_portion(kb.tenant_id, tag_kb_ids) | |||||
| set_tags_to_cache(all_tags, tag_kb_ids) | |||||
| else: | |||||
| all_tags = json.loads(all_tags) | |||||
| tag_kbs = KnowledgebaseService.get_by_ids(tag_kb_ids) | |||||
| tags = settings.retrievaler.tag_query(question, | |||||
| list(set([kb.tenant_id for kb in tag_kbs])), | |||||
| tag_kb_ids, | |||||
| all_tags, | |||||
| kb.parser_config.get("topn_tags", 3) | |||||
| ) | |||||
| return tags | |||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||
| import sys | import sys | ||||
| tks_w = [(re.sub(r"^[\+-]", "", tk), w) for tk, w in tks_w if tk] | tks_w = [(re.sub(r"^[\+-]", "", tk), w) for tk, w in tks_w if tk] | ||||
| tks_w = [(tk.strip(), w) for tk, w in tks_w if tk.strip()] | tks_w = [(tk.strip(), w) for tk, w in tks_w if tk.strip()] | ||||
| syns = [] | syns = [] | ||||
| for tk, w in tks_w: | |||||
| for tk, w in tks_w[:256]: | |||||
| syn = self.syn.lookup(tk) | syn = self.syn.lookup(tk) | ||||
| syn = rag_tokenizer.tokenize(" ".join(syn)).split() | syn = rag_tokenizer.tokenize(" ".join(syn)).split() | ||||
| keywords.extend(syn) | keywords.extend(syn) |
| # | |||||
| # Copyright 2024 The InfiniFlow Authors. All Rights Reserved. | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # | |||||
| import datetime | |||||
| import json | |||||
| import logging | |||||
| import os | |||||
| import re | |||||
| from collections import defaultdict | |||||
| import json_repair | |||||
| from api.db import LLMType | |||||
| from api.db.services.document_service import DocumentService | |||||
| from api.db.services.llm_service import TenantLLMService, LLMBundle | |||||
| from api.utils.file_utils import get_project_base_directory | |||||
| from rag.settings import TAG_FLD | |||||
| from rag.utils import num_tokens_from_string, encoder | |||||
| def llm_id2llm_type(llm_id): | |||||
| llm_id, _ = TenantLLMService.split_model_name_and_factory(llm_id) | |||||
| fnm = os.path.join(get_project_base_directory(), "conf") | |||||
| llm_factories = json.load(open(os.path.join(fnm, "llm_factories.json"), "r")) | |||||
| for llm_factory in llm_factories["factory_llm_infos"]: | |||||
| for llm in llm_factory["llm"]: | |||||
| if llm_id == llm["llm_name"]: | |||||
| return llm["model_type"].strip(",")[-1] | |||||
| def message_fit_in(msg, max_length=4000): | |||||
| def count(): | |||||
| nonlocal msg | |||||
| tks_cnts = [] | |||||
| for m in msg: | |||||
| tks_cnts.append( | |||||
| {"role": m["role"], "count": num_tokens_from_string(m["content"])}) | |||||
| total = 0 | |||||
| for m in tks_cnts: | |||||
| total += m["count"] | |||||
| return total | |||||
| c = count() | |||||
| if c < max_length: | |||||
| return c, msg | |||||
| msg_ = [m for m in msg[:-1] if m["role"] == "system"] | |||||
| if len(msg) > 1: | |||||
| msg_.append(msg[-1]) | |||||
| msg = msg_ | |||||
| c = count() | |||||
| if c < max_length: | |||||
| return c, msg | |||||
| ll = num_tokens_from_string(msg_[0]["content"]) | |||||
| ll2 = num_tokens_from_string(msg_[-1]["content"]) | |||||
| if ll / (ll + ll2) > 0.8: | |||||
| m = msg_[0]["content"] | |||||
| m = encoder.decode(encoder.encode(m)[:max_length - ll2]) | |||||
| msg[0]["content"] = m | |||||
| return max_length, msg | |||||
| m = msg_[1]["content"] | |||||
| m = encoder.decode(encoder.encode(m)[:max_length - ll2]) | |||||
| msg[1]["content"] = m | |||||
| return max_length, msg | |||||
| def kb_prompt(kbinfos, max_tokens): | |||||
| knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]] | |||||
| used_token_count = 0 | |||||
| chunks_num = 0 | |||||
| for i, c in enumerate(knowledges): | |||||
| used_token_count += num_tokens_from_string(c) | |||||
| chunks_num += 1 | |||||
| if max_tokens * 0.97 < used_token_count: | |||||
| knowledges = knowledges[:i] | |||||
| logging.warning(f"Not all the retrieval into prompt: {i+1}/{len(knowledges)}") | |||||
| break | |||||
| docs = DocumentService.get_by_ids([ck["doc_id"] for ck in kbinfos["chunks"][:chunks_num]]) | |||||
| docs = {d.id: d.meta_fields for d in docs} | |||||
| doc2chunks = defaultdict(lambda: {"chunks": [], "meta": []}) | |||||
| for ck in kbinfos["chunks"][:chunks_num]: | |||||
| doc2chunks[ck["docnm_kwd"]]["chunks"].append((f"URL: {ck['url']}\n" if "url" in ck else "") + ck["content_with_weight"]) | |||||
| doc2chunks[ck["docnm_kwd"]]["meta"] = docs.get(ck["doc_id"], {}) | |||||
| knowledges = [] | |||||
| for nm, cks_meta in doc2chunks.items(): | |||||
| txt = f"Document: {nm} \n" | |||||
| for k, v in cks_meta["meta"].items(): | |||||
| txt += f"{k}: {v}\n" | |||||
| txt += "Relevant fragments as following:\n" | |||||
| for i, chunk in enumerate(cks_meta["chunks"], 1): | |||||
| txt += f"{i}. {chunk}\n" | |||||
| knowledges.append(txt) | |||||
| return knowledges | |||||
| def keyword_extraction(chat_mdl, content, topn=3): | |||||
| prompt = f""" | |||||
| Role: You're a text analyzer. | |||||
| Task: extract the most important keywords/phrases of a given piece of text content. | |||||
| Requirements: | |||||
| - Summarize the text content, and give top {topn} important keywords/phrases. | |||||
| - The keywords MUST be in language of the given piece of text content. | |||||
| - The keywords are delimited by ENGLISH COMMA. | |||||
| - Keywords ONLY in output. | |||||
| ### Text Content | |||||
| {content} | |||||
| """ | |||||
| msg = [ | |||||
| {"role": "system", "content": prompt}, | |||||
| {"role": "user", "content": "Output: "} | |||||
| ] | |||||
| _, msg = message_fit_in(msg, chat_mdl.max_length) | |||||
| kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2}) | |||||
| if isinstance(kwd, tuple): | |||||
| kwd = kwd[0] | |||||
| kwd = re.sub(r"<think>.*</think>", "", kwd, flags=re.DOTALL) | |||||
| if kwd.find("**ERROR**") >= 0: | |||||
| return "" | |||||
| return kwd | |||||
| def question_proposal(chat_mdl, content, topn=3): | |||||
| prompt = f""" | |||||
| Role: You're a text analyzer. | |||||
| Task: propose {topn} questions about a given piece of text content. | |||||
| Requirements: | |||||
| - Understand and summarize the text content, and propose top {topn} important questions. | |||||
| - The questions SHOULD NOT have overlapping meanings. | |||||
| - The questions SHOULD cover the main content of the text as much as possible. | |||||
| - The questions MUST be in language of the given piece of text content. | |||||
| - One question per line. | |||||
| - Question ONLY in output. | |||||
| ### Text Content | |||||
| {content} | |||||
| """ | |||||
| msg = [ | |||||
| {"role": "system", "content": prompt}, | |||||
| {"role": "user", "content": "Output: "} | |||||
| ] | |||||
| _, msg = message_fit_in(msg, chat_mdl.max_length) | |||||
| kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2}) | |||||
| if isinstance(kwd, tuple): | |||||
| kwd = kwd[0] | |||||
| kwd = re.sub(r"<think>.*</think>", "", kwd, flags=re.DOTALL) | |||||
| if kwd.find("**ERROR**") >= 0: | |||||
| return "" | |||||
| return kwd | |||||
| def full_question(tenant_id, llm_id, messages): | |||||
| if llm_id2llm_type(llm_id) == "image2text": | |||||
| chat_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, llm_id) | |||||
| else: | |||||
| chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id) | |||||
| conv = [] | |||||
| for m in messages: | |||||
| if m["role"] not in ["user", "assistant"]: | |||||
| continue | |||||
| conv.append("{}: {}".format(m["role"].upper(), m["content"])) | |||||
| conv = "\n".join(conv) | |||||
| today = datetime.date.today().isoformat() | |||||
| yesterday = (datetime.date.today() - datetime.timedelta(days=1)).isoformat() | |||||
| tomorrow = (datetime.date.today() + datetime.timedelta(days=1)).isoformat() | |||||
| prompt = f""" | |||||
| Role: A helpful assistant | |||||
| Task and steps: | |||||
| 1. Generate a full user question that would follow the conversation. | |||||
| 2. If the user's question involves relative date, you need to convert it into absolute date based on the current date, which is {today}. For example: 'yesterday' would be converted to {yesterday}. | |||||
| Requirements & Restrictions: | |||||
| - Text generated MUST be in the same language of the original user's question. | |||||
| - If the user's latest question is completely, don't do anything, just return the original question. | |||||
| - DON'T generate anything except a refined question. | |||||
| ###################### | |||||
| -Examples- | |||||
| ###################### | |||||
| # Example 1 | |||||
| ## Conversation | |||||
| USER: What is the name of Donald Trump's father? | |||||
| ASSISTANT: Fred Trump. | |||||
| USER: And his mother? | |||||
| ############### | |||||
| Output: What's the name of Donald Trump's mother? | |||||
| ------------ | |||||
| # Example 2 | |||||
| ## Conversation | |||||
| USER: What is the name of Donald Trump's father? | |||||
| ASSISTANT: Fred Trump. | |||||
| USER: And his mother? | |||||
| ASSISTANT: Mary Trump. | |||||
| User: What's her full name? | |||||
| ############### | |||||
| Output: What's the full name of Donald Trump's mother Mary Trump? | |||||
| ------------ | |||||
| # Example 3 | |||||
| ## Conversation | |||||
| USER: What's the weather today in London? | |||||
| ASSISTANT: Cloudy. | |||||
| USER: What's about tomorrow in Rochester? | |||||
| ############### | |||||
| Output: What's the weather in Rochester on {tomorrow}? | |||||
| ###################### | |||||
| # Real Data | |||||
| ## Conversation | |||||
| {conv} | |||||
| ############### | |||||
| """ | |||||
| ans = chat_mdl.chat(prompt, [{"role": "user", "content": "Output: "}], {"temperature": 0.2}) | |||||
| ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL) | |||||
| return ans if ans.find("**ERROR**") < 0 else messages[-1]["content"] | |||||
| def content_tagging(chat_mdl, content, all_tags, examples, topn=3): | |||||
| prompt = f""" | |||||
| Role: You're a text analyzer. | |||||
| Task: Tag (put on some labels) to a given piece of text content based on the examples and the entire tag set. | |||||
| Steps:: | |||||
| - Comprehend the tag/label set. | |||||
| - Comprehend examples which all consist of both text content and assigned tags with relevance score in format of JSON. | |||||
| - Summarize the text content, and tag it with top {topn} most relevant tags from the set of tag/label and the corresponding relevance score. | |||||
| Requirements | |||||
| - The tags MUST be from the tag set. | |||||
| - The output MUST be in JSON format only, the key is tag and the value is its relevance score. | |||||
| - The relevance score must be range from 1 to 10. | |||||
| - Keywords ONLY in output. | |||||
| # TAG SET | |||||
| {", ".join(all_tags)} | |||||
| """ | |||||
| for i, ex in enumerate(examples): | |||||
| prompt += """ | |||||
| # Examples {} | |||||
| ### Text Content | |||||
| {} | |||||
| Output: | |||||
| {} | |||||
| """.format(i, ex["content"], json.dumps(ex[TAG_FLD], indent=2, ensure_ascii=False)) | |||||
| prompt += f""" | |||||
| # Real Data | |||||
| ### Text Content | |||||
| {content} | |||||
| """ | |||||
| msg = [ | |||||
| {"role": "system", "content": prompt}, | |||||
| {"role": "user", "content": "Output: "} | |||||
| ] | |||||
| _, msg = message_fit_in(msg, chat_mdl.max_length) | |||||
| kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.5}) | |||||
| if isinstance(kwd, tuple): | |||||
| kwd = kwd[0] | |||||
| kwd = re.sub(r"<think>.*</think>", "", kwd, flags=re.DOTALL) | |||||
| if kwd.find("**ERROR**") >= 0: | |||||
| raise Exception(kwd) | |||||
| try: | |||||
| return json_repair.loads(kwd) | |||||
| except json_repair.JSONDecodeError: | |||||
| try: | |||||
| result = kwd.replace(prompt[:-1], '').replace('user', '').replace('model', '').strip() | |||||
| result = '{' + result.split('{')[1].split('}')[0] + '}' | |||||
| return json_repair.loads(result) | |||||
| except Exception as e: | |||||
| logging.exception(f"JSON parsing error: {result} -> {e}") | |||||
| raise e |
| from graphrag.light.graph_extractor import GraphExtractor as LightKGExt | from graphrag.light.graph_extractor import GraphExtractor as LightKGExt | ||||
| from graphrag.general.graph_extractor import GraphExtractor as GeneralKGExt | from graphrag.general.graph_extractor import GraphExtractor as GeneralKGExt | ||||
| from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache | from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache | ||||
| from rag.prompts import keyword_extraction, question_proposal, content_tagging | |||||
| CONSUMER_NO = "0" if len(sys.argv) < 2 else sys.argv[1] | CONSUMER_NO = "0" if len(sys.argv) < 2 else sys.argv[1] | ||||
| CONSUMER_NAME = "task_executor_" + CONSUMER_NO | CONSUMER_NAME = "task_executor_" + CONSUMER_NO | ||||
| from peewee import DoesNotExist | from peewee import DoesNotExist | ||||
| from api.db import LLMType, ParserType, TaskStatus | from api.db import LLMType, ParserType, TaskStatus | ||||
| from api.db.services.dialog_service import keyword_extraction, question_proposal, content_tagging | |||||
| from api.db.services.document_service import DocumentService | from api.db.services.document_service import DocumentService | ||||
| from api.db.services.llm_service import LLMBundle | from api.db.services.llm_service import LLMBundle | ||||
| from api.db.services.task_service import TaskService | from api.db.services.task_service import TaskService |
| version = "0.8.2" | version = "0.8.2" | ||||
| source = { registry = "https://mirrors.aliyun.com/pypi/simple" } | source = { registry = "https://mirrors.aliyun.com/pypi/simple" } | ||||
| sdist = { url = "https://mirrors.aliyun.com/pypi/packages/9d/fe/db74bd405d515f06657f11ad529878fd389576dca4812bea6f98d9b31574/datrie-0.8.2.tar.gz", hash = "sha256:525b08f638d5cf6115df6ccd818e5a01298cd230b2dac91c8ff2e6499d18765d" } | sdist = { url = "https://mirrors.aliyun.com/pypi/packages/9d/fe/db74bd405d515f06657f11ad529878fd389576dca4812bea6f98d9b31574/datrie-0.8.2.tar.gz", hash = "sha256:525b08f638d5cf6115df6ccd818e5a01298cd230b2dac91c8ff2e6499d18765d" } | ||||
| wheels = [ | |||||
| { url = "https://mirrors.aliyun.com/pypi/packages/44/02/53f0cf0bf0cd629ba6c2cc13f2f9db24323459e9c19463783d890a540a96/datrie-0.8.2-pp273-pypy_73-win32.whl", hash = "sha256:b07bd5fdfc3399a6dab86d6e35c72b1dbd598e80c97509c7c7518ab8774d3fda" }, | |||||
| ] | |||||
| [[package]] | [[package]] | ||||
| name = "decorator" | name = "decorator" | ||||
| { url = "https://mirrors.aliyun.com/pypi/packages/48/7d/0f2b09490b98cc6a902ac15dda8760c568b9c18cfe70e0ef7a16de64d53a/pycryptodomex-3.20.0-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:7a7a8f33a1f1fb762ede6cc9cbab8f2a9ba13b196bfaf7bc6f0b39d2ba315a43" }, | { url = "https://mirrors.aliyun.com/pypi/packages/48/7d/0f2b09490b98cc6a902ac15dda8760c568b9c18cfe70e0ef7a16de64d53a/pycryptodomex-3.20.0-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:7a7a8f33a1f1fb762ede6cc9cbab8f2a9ba13b196bfaf7bc6f0b39d2ba315a43" }, | ||||
| { url = "https://mirrors.aliyun.com/pypi/packages/b0/1c/375adb14b71ee1c8d8232904e928b3e7af5bbbca7c04e4bec94fe8e90c3d/pycryptodomex-3.20.0-cp35-abi3-win32.whl", hash = "sha256:c39778fd0548d78917b61f03c1fa8bfda6cfcf98c767decf360945fe6f97461e" }, | { url = "https://mirrors.aliyun.com/pypi/packages/b0/1c/375adb14b71ee1c8d8232904e928b3e7af5bbbca7c04e4bec94fe8e90c3d/pycryptodomex-3.20.0-cp35-abi3-win32.whl", hash = "sha256:c39778fd0548d78917b61f03c1fa8bfda6cfcf98c767decf360945fe6f97461e" }, | ||||
| { url = "https://mirrors.aliyun.com/pypi/packages/b2/e8/1b92184ab7e5595bf38000587e6f8cf9556ebd1bf0a583619bee2057afbd/pycryptodomex-3.20.0-cp35-abi3-win_amd64.whl", hash = "sha256:2a47bcc478741b71273b917232f521fd5704ab4b25d301669879e7273d3586cc" }, | { url = "https://mirrors.aliyun.com/pypi/packages/b2/e8/1b92184ab7e5595bf38000587e6f8cf9556ebd1bf0a583619bee2057afbd/pycryptodomex-3.20.0-cp35-abi3-win_amd64.whl", hash = "sha256:2a47bcc478741b71273b917232f521fd5704ab4b25d301669879e7273d3586cc" }, | ||||
| { url = "https://mirrors.aliyun.com/pypi/packages/e7/c5/9140bb867141d948c8e242013ec8a8011172233c898dfdba0a2417c3169a/pycryptodomex-3.20.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:1be97461c439a6af4fe1cf8bf6ca5936d3db252737d2f379cc6b2e394e12a458" }, | |||||
| { url = "https://mirrors.aliyun.com/pypi/packages/5e/6a/04acb4978ce08ab16890c70611ebc6efd251681341617bbb9e53356dee70/pycryptodomex-3.20.0-pp27-pypy_73-win32.whl", hash = "sha256:19764605feea0df966445d46533729b645033f134baeb3ea26ad518c9fdf212c" }, | |||||
| { url = "https://mirrors.aliyun.com/pypi/packages/eb/df/3f1ea084e43b91e6d2b6b3493cc948864c17ea5d93ff1261a03812fbfd1a/pycryptodomex-3.20.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f2e497413560e03421484189a6b65e33fe800d3bd75590e6d78d4dfdb7accf3b" }, | { url = "https://mirrors.aliyun.com/pypi/packages/eb/df/3f1ea084e43b91e6d2b6b3493cc948864c17ea5d93ff1261a03812fbfd1a/pycryptodomex-3.20.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f2e497413560e03421484189a6b65e33fe800d3bd75590e6d78d4dfdb7accf3b" }, | ||||
| { url = "https://mirrors.aliyun.com/pypi/packages/c9/f3/83ffbdfa0c8f9154bcd8866895f6cae5a3ec749da8b0840603cf936c4412/pycryptodomex-3.20.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e48217c7901edd95f9f097feaa0388da215ed14ce2ece803d3f300b4e694abea" }, | { url = "https://mirrors.aliyun.com/pypi/packages/c9/f3/83ffbdfa0c8f9154bcd8866895f6cae5a3ec749da8b0840603cf936c4412/pycryptodomex-3.20.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e48217c7901edd95f9f097feaa0388da215ed14ce2ece803d3f300b4e694abea" }, | ||||
| { url = "https://mirrors.aliyun.com/pypi/packages/c9/9d/c113e640aaf02af5631ae2686b742aac5cd0e1402b9d6512b1c7ec5ef05d/pycryptodomex-3.20.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d00fe8596e1cc46b44bf3907354e9377aa030ec4cd04afbbf6e899fc1e2a7781" }, | { url = "https://mirrors.aliyun.com/pypi/packages/c9/9d/c113e640aaf02af5631ae2686b742aac5cd0e1402b9d6512b1c7ec5ef05d/pycryptodomex-3.20.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d00fe8596e1cc46b44bf3907354e9377aa030ec4cd04afbbf6e899fc1e2a7781" }, |