Просмотр исходного кода

Let ThreadPool exit gracefully. (#3653)

### What problem does this PR solve?

#3646

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
tags/v0.14.1
Kevin Hu 11 месяцев назад
Родитель
Сommit
0891a393d7
Аккаунт пользователя с таким Email не найден
5 измененных файлов: 39 добавлений и 41 удалений
  1. 0
    4
      agent/component/crawler.py
  2. 18
    18
      graphrag/index.py
  3. 18
    18
      graphrag/mind_map_extractor.py
  4. 1
    1
      rag/llm/chat_model.py
  5. 2
    0
      rag/svr/task_executor.py

+ 0
- 4
agent/component/crawler.py Просмотреть файл

@@ -65,7 +65,3 @@ class Crawler(ComponentBase, ABC):
elif self._param.extract_type == 'content':
result.extracted_content
return result.markdown



+ 18
- 18
graphrag/index.py Просмотреть файл

@@ -64,27 +64,27 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: list[str], callback, en
BATCH_SIZE=4
texts, graphs = [], []
cnt = 0
threads = []
max_workers = int(os.environ.get('GRAPH_EXTRACTOR_MAX_WORKERS', 50))
exe = ThreadPoolExecutor(max_workers=max_workers)
for i in range(len(chunks)):
tkn_cnt = num_tokens_from_string(chunks[i])
if cnt+tkn_cnt >= left_token_count and texts:
with ThreadPoolExecutor(max_workers=max_workers) as exe:
threads = []
for i in range(len(chunks)):
tkn_cnt = num_tokens_from_string(chunks[i])
if cnt+tkn_cnt >= left_token_count and texts:
for b in range(0, len(texts), BATCH_SIZE):
threads.append(exe.submit(ext, ["\n".join(texts[b:b+BATCH_SIZE])], {"entity_types": entity_types}, callback))
texts = []
cnt = 0
texts.append(chunks[i])
cnt += tkn_cnt
if texts:
for b in range(0, len(texts), BATCH_SIZE):
threads.append(exe.submit(ext, ["\n".join(texts[b:b+BATCH_SIZE])], {"entity_types": entity_types}, callback))
texts = []
cnt = 0
texts.append(chunks[i])
cnt += tkn_cnt
if texts:
for b in range(0, len(texts), BATCH_SIZE):
threads.append(exe.submit(ext, ["\n".join(texts[b:b+BATCH_SIZE])], {"entity_types": entity_types}, callback))

callback(0.5, "Extracting entities.")
graphs = []
for i, _ in enumerate(threads):
graphs.append(_.result().output)
callback(0.5 + 0.1*i/len(threads), f"Entities extraction progress ... {i+1}/{len(threads)}")

callback(0.5, "Extracting entities.")
graphs = []
for i, _ in enumerate(threads):
graphs.append(_.result().output)
callback(0.5 + 0.1*i/len(threads), f"Entities extraction progress ... {i+1}/{len(threads)}")

graph = reduce(graph_merge, graphs) if graphs else nx.Graph()
er = EntityResolution(llm_bdl)

+ 18
- 18
graphrag/mind_map_extractor.py Просмотреть файл

@@ -88,26 +88,26 @@ class MindMapExtractor:
prompt_variables = {}

try:
max_workers = int(os.environ.get('MINDMAP_EXTRACTOR_MAX_WORKERS', 12))
exe = ThreadPoolExecutor(max_workers=max_workers)
threads = []
token_count = max(self._llm.max_length * 0.8, self._llm.max_length - 512)
texts = []
res = []
cnt = 0
for i in range(len(sections)):
section_cnt = num_tokens_from_string(sections[i])
if cnt + section_cnt >= token_count and texts:
max_workers = int(os.environ.get('MINDMAP_EXTRACTOR_MAX_WORKERS', 12))
with ThreadPoolExecutor(max_workers=max_workers) as exe:
threads = []
token_count = max(self._llm.max_length * 0.8, self._llm.max_length - 512)
texts = []
cnt = 0
for i in range(len(sections)):
section_cnt = num_tokens_from_string(sections[i])
if cnt + section_cnt >= token_count and texts:
threads.append(exe.submit(self._process_document, "".join(texts), prompt_variables))
texts = []
cnt = 0
texts.append(sections[i])
cnt += section_cnt
if texts:
threads.append(exe.submit(self._process_document, "".join(texts), prompt_variables))
texts = []
cnt = 0
texts.append(sections[i])
cnt += section_cnt
if texts:
threads.append(exe.submit(self._process_document, "".join(texts), prompt_variables))

for i, _ in enumerate(threads):
res.append(_.result())

for i, _ in enumerate(threads):
res.append(_.result())

if not res:
return MindMapResult(output={"id": "root", "children": []})

+ 1
- 1
rag/llm/chat_model.py Просмотреть файл

@@ -366,7 +366,7 @@ class OllamaChat(Base):
keep_alive=-1
)
ans = response["message"]["content"].strip()
return ans, response["eval_count"] + response.get("prompt_eval_count", 0)
return ans, response.get("eval_count", 0) + response.get("prompt_eval_count", 0)
except Exception as e:
return "**ERROR**: " + str(e), 0


+ 2
- 0
rag/svr/task_executor.py Просмотреть файл

@@ -492,6 +492,7 @@ def report_status():
logging.exception("report_status got exception")
time.sleep(30)


def analyze_heap(snapshot1: tracemalloc.Snapshot, snapshot2: tracemalloc.Snapshot, snapshot_id: int, dump_full: bool):
msg = ""
if dump_full:
@@ -508,6 +509,7 @@ def analyze_heap(snapshot1: tracemalloc.Snapshot, snapshot2: tracemalloc.Snapsho
msg += '\n'.join(stat.traceback.format())
logging.info(msg)


def main():
settings.init_settings()
background_thread = threading.Thread(target=report_status)

Загрузка…
Отмена
Сохранить