浏览代码

Perf: add limit to embedding druing KG. (#8881)

### What problem does this PR solve?

### Type of change

- [x] Performance Improvement
tags/v0.20.0
Kevin Hu 3 个月前
父节点
当前提交
7120b37882
没有帐户链接到提交者的电子邮件
共有 1 个文件被更改,包括 9 次插入5 次删除
  1. 9
    5
      graphrag/utils.py

+ 9
- 5
graphrag/utils.py 查看文件

return xxhash.xxh64((chunk["content_with_weight"] + chunk["kb_id"]).encode("utf-8")).hexdigest() return xxhash.xxh64((chunk["content_with_weight"] + chunk["kb_id"]).encode("utf-8")).hexdigest()




@timeout(1, 3)
@timeout(3, 3)
async def graph_node_to_chunk(kb_id, embd_mdl, ent_name, meta, chunks): async def graph_node_to_chunk(kb_id, embd_mdl, ent_name, meta, chunks):
chunk = { chunk = {
"id": get_uuid(), "id": get_uuid(),
chunks.append(chunk) chunks.append(chunk)




@timeout(3, 3)
def get_relation(tenant_id, kb_id, from_ent_name, to_ent_name, size=1): def get_relation(tenant_id, kb_id, from_ent_name, to_ent_name, size=1):
ents = from_ent_name ents = from_ent_name
if isinstance(ents, str): if isinstance(ents, str):
return res return res




@timeout(1, 3)
@timeout(3, 3)
async def graph_edge_to_chunk(kb_id, embd_mdl, from_ent_name, to_ent_name, meta, chunks): async def graph_edge_to_chunk(kb_id, embd_mdl, from_ent_name, to_ent_name, meta, chunks):
chunk = { chunk = {
"id": get_uuid(), "id": get_uuid(),
"available_int": 0, "available_int": 0,
"removed_kwd": "N" "removed_kwd": "N"
}) })

semaphore = trio.Semaphore(5)
async with trio.open_nursery() as nursery: async with trio.open_nursery() as nursery:
for node in change.added_updated_nodes: for node in change.added_updated_nodes:
node_attrs = graph.nodes[node] node_attrs = graph.nodes[node]
nursery.start_soon(graph_node_to_chunk, kb_id, embd_mdl, node, node_attrs, chunks)
async with semaphore:
nursery.start_soon(graph_node_to_chunk, kb_id, embd_mdl, node, node_attrs, chunks)
for from_node, to_node in change.added_updated_edges: for from_node, to_node in change.added_updated_edges:
edge_attrs = graph.get_edge_data(from_node, to_node) edge_attrs = graph.get_edge_data(from_node, to_node)
if not edge_attrs: if not edge_attrs:
# added_updated_edges could record a non-existing edge if both from_node and to_node participate in nodes merging. # added_updated_edges could record a non-existing edge if both from_node and to_node participate in nodes merging.
continue continue
nursery.start_soon(graph_edge_to_chunk, kb_id, embd_mdl, from_node, to_node, edge_attrs, chunks)
async with semaphore:
nursery.start_soon(graph_edge_to_chunk, kb_id, embd_mdl, from_node, to_node, edge_attrs, chunks)
now = trio.current_time() now = trio.current_time()
if callback: if callback:
callback(msg=f"set_graph converted graph change to {len(chunks)} chunks in {now - start:.2f}s.") callback(msg=f"set_graph converted graph change to {len(chunks)} chunks in {now - start:.2f}s.")

正在加载...
取消
保存