瀏覽代碼

Perf: pass useless check for tidy graph (#8121)

### What problem does this PR solve?
Support passing the attribute check when the upstream has already made
sure it.

### Type of change
- [X] Performance Improvement
tags/v0.19.1
Stephen Hu 4 月之前
父節點
當前提交
2337bbf6ca
沒有連結到貢獻者的電子郵件帳戶。
共有 2 個檔案被更改,包括 15 行新增13 行删除
  1. 1
    1
      graphrag/general/index.py
  2. 14
    12
      graphrag/utils.py

+ 1
- 1
graphrag/general/index.py 查看文件

@@ -166,7 +166,7 @@ async def generate_subgraph(
)
if ignored_rels:
callback(msg=f"ignored {ignored_rels} relations due to missing entities.")
tidy_graph(subgraph, callback)
tidy_graph(subgraph, callback, check_attribute=False)

subgraph.graph["source_id"] = [doc_id]
chunk = {

+ 14
- 12
graphrag/utils.py 查看文件

@@ -157,30 +157,32 @@ def set_tags_to_cache(kb_ids, tags):
k = hasher.hexdigest()
REDIS_CONN.set(k, json.dumps(tags).encode("utf-8"), 600)

def tidy_graph(graph: nx.Graph, callback):
def tidy_graph(graph: nx.Graph, callback, check_attribute: bool = True):
"""
Ensure all nodes and edges in the graph have some essential attribute.
"""
def is_valid_node(node_attrs: dict) -> bool:
def is_valid_item(node_attrs: dict) -> bool:
valid_node = True
for attr in ["description", "source_id"]:
if attr not in node_attrs:
valid_node = False
break
return valid_node
purged_nodes = []
for node, node_attrs in graph.nodes(data=True):
if not is_valid_node(node_attrs):
purged_nodes.append(node)
for node in purged_nodes:
graph.remove_node(node)
if purged_nodes and callback:
callback(msg=f"Purged {len(purged_nodes)} nodes from graph due to missing essential attributes.")
if check_attribute:
purged_nodes = []
for node, node_attrs in graph.nodes(data=True):
if not is_valid_item(node_attrs):
purged_nodes.append(node)
for node in purged_nodes:
graph.remove_node(node)
if purged_nodes and callback:
callback(msg=f"Purged {len(purged_nodes)} nodes from graph due to missing essential attributes.")

purged_edges = []
for source, target, attr in graph.edges(data=True):
if not is_valid_node(attr):
purged_edges.append((source, target))
if check_attribute:
if not is_valid_item(attr):
purged_edges.append((source, target))
if "keywords" not in attr:
attr["keywords"] = []
for source, target in purged_edges:

Loading…
取消
儲存