### What problem does this PR solve? ### Type of change - [x] Refactoringtags/v0.20.0
| @@ -152,7 +152,6 @@ class EntityResolution(Extractor): | |||
| ) | |||
| async def _resolve_candidate(self, candidate_resolution_i: tuple[str, list[tuple[str, str]]], resolution_result: set[str], resolution_result_lock: trio.Lock): | |||
| gen_conf = {"temperature": 0.5} | |||
| pair_txt = [ | |||
| f'When determining whether two {candidate_resolution_i[0]}s are the same, you should only focus on critical properties and overlook noisy factors.\n'] | |||
| for index, candidate in enumerate(candidate_resolution_i[1]): | |||
| @@ -171,7 +170,7 @@ class EntityResolution(Extractor): | |||
| async with chat_limiter: | |||
| try: | |||
| with trio.move_on_after(120) as cancel_scope: | |||
| response = await trio.to_thread.run_sync(self._chat, text, [{"role": "user", "content": "Output:"}], gen_conf) | |||
| response = await trio.to_thread.run_sync(self._chat, text, [{"role": "user", "content": "Output:"}], {}) | |||
| if cancel_scope.cancelled_caught: | |||
| logging.warning("_resolve_candidate._chat timeout, skipping...") | |||
| return | |||
| @@ -90,11 +90,10 @@ class CommunityReportsExtractor(Extractor): | |||
| "relation_df": rela_df.to_csv(index_label="id") | |||
| } | |||
| text = perform_variable_replacements(self._extraction_prompt, variables=prompt_variables) | |||
| gen_conf = {"temperature": 0.3} | |||
| async with chat_limiter: | |||
| try: | |||
| with trio.move_on_after(80) as cancel_scope: | |||
| response = await trio.to_thread.run_sync( self._chat, text, [{"role": "user", "content": "Output:"}], gen_conf) | |||
| response = await trio.to_thread.run_sync( self._chat, text, [{"role": "user", "content": "Output:"}], {}) | |||
| if cancel_scope.cancelled_caught: | |||
| logging.warning("extract_community_report._chat timeout, skipping...") | |||
| return | |||
| @@ -105,10 +105,9 @@ class GraphExtractor(Extractor): | |||
| **self._prompt_variables, | |||
| self._input_text_key: content, | |||
| } | |||
| gen_conf = {"temperature": 0.3} | |||
| hint_prompt = perform_variable_replacements(self._extraction_prompt, variables=variables) | |||
| async with chat_limiter: | |||
| response = await trio.to_thread.run_sync(lambda: self._chat(hint_prompt, [{"role": "user", "content": "Output:"}], gen_conf)) | |||
| response = await trio.to_thread.run_sync(lambda: self._chat(hint_prompt, [{"role": "user", "content": "Output:"}], {})) | |||
| token_count += num_tokens_from_string(hint_prompt + response) | |||
| results = response or "" | |||
| @@ -118,7 +117,7 @@ class GraphExtractor(Extractor): | |||
| for i in range(self._max_gleanings): | |||
| history.append({"role": "user", "content": CONTINUE_PROMPT}) | |||
| async with chat_limiter: | |||
| response = await trio.to_thread.run_sync(lambda: self._chat("", history, gen_conf)) | |||
| response = await trio.to_thread.run_sync(lambda: self._chat("", history, {})) | |||
| token_count += num_tokens_from_string("\n".join([m["content"] for m in history]) + response) | |||
| results += response or "" | |||
| @@ -171,9 +171,8 @@ class MindMapExtractor(Extractor): | |||
| self._input_text_key: text, | |||
| } | |||
| text = perform_variable_replacements(self._mind_map_prompt, variables=variables) | |||
| gen_conf = {"temperature": 0.5} | |||
| async with chat_limiter: | |||
| response = await trio.to_thread.run_sync(lambda: self._chat(text, [{"role": "user", "content": "Output:"}], gen_conf)) | |||
| response = await trio.to_thread.run_sync(lambda: self._chat(text, [{"role": "user", "content": "Output:"}], {})) | |||
| response = re.sub(r"```[^\n]*", "", response) | |||
| logging.debug(response) | |||
| logging.debug(self._todict(markdown_to_json.dictify(response))) | |||
| @@ -45,7 +45,7 @@ class KGSearch(Dealer): | |||
| ty2ents = trio.run(lambda: get_entity_type2sampels(idxnms, kb_ids)) | |||
| hint_prompt = PROMPTS["minirag_query2kwd"].format(query=question, | |||
| TYPE_POOL=json.dumps(ty2ents, ensure_ascii=False, indent=2)) | |||
| result = self._chat(llm, hint_prompt, [{"role": "user", "content": "Output:"}], {"temperature": .5}) | |||
| result = self._chat(llm, hint_prompt, [{"role": "user", "content": "Output:"}], {}) | |||
| try: | |||
| keywords_data = json_repair.loads(result) | |||
| type_keywords = keywords_data.get("answer_type_keywords", []) | |||
| @@ -107,7 +107,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval: | |||
| ), | |||
| } | |||
| ], | |||
| {"temperature": 0.3, "max_tokens": self._max_token}, | |||
| {"max_tokens": self._max_token}, | |||
| ) | |||
| cnt = re.sub( | |||
| "(······\n由于长度的原因,回答被截断了,要继续吗?|For the content length reason, it stopped, continue?)", | |||
| @@ -103,6 +103,7 @@ MAX_CONCURRENT_CHUNK_BUILDERS = int(os.environ.get('MAX_CONCURRENT_CHUNK_BUILDER | |||
| MAX_CONCURRENT_MINIO = int(os.environ.get('MAX_CONCURRENT_MINIO', '10')) | |||
| task_limiter = trio.Semaphore(MAX_CONCURRENT_TASKS) | |||
| chunk_limiter = trio.CapacityLimiter(MAX_CONCURRENT_CHUNK_BUILDERS) | |||
| embed_limiter = trio.CapacityLimiter(MAX_CONCURRENT_CHUNK_BUILDERS) | |||
| minio_limiter = trio.CapacityLimiter(MAX_CONCURRENT_MINIO) | |||
| kg_limiter = trio.CapacityLimiter(2) | |||
| WORKER_HEARTBEAT_TIMEOUT = int(os.environ.get('WORKER_HEARTBEAT_TIMEOUT', '120')) | |||
| @@ -442,7 +443,8 @@ async def embedding(docs, mdl, parser_config=None, callback=None): | |||
| cnts_ = np.array([]) | |||
| for i in range(0, len(cnts), EMBEDDING_BATCH_SIZE): | |||
| vts, c = await trio.to_thread.run_sync(lambda: mdl.encode([truncate(c, mdl.max_length-10) for c in cnts[i: i + EMBEDDING_BATCH_SIZE]])) | |||
| async with embed_limiter: | |||
| vts, c = await trio.to_thread.run_sync(lambda: mdl.encode([truncate(c, mdl.max_length-10) for c in cnts[i: i + EMBEDDING_BATCH_SIZE]])) | |||
| if len(cnts_) == 0: | |||
| cnts_ = vts | |||
| else: | |||