### Related Issue: #6653 ### Environment: Using nightly version Elasticsearch database ### Bug Description: When clicking the "Entity Resolution" button in KnowledgeGraph, encountered the following errors: graphrag/entity_resolution.py ``` list(sub_connect_graph.nodes) AttributeError ``` graphrag/general/extractor.py ``` node0_attrs[attr] = sorted(set(node0_attrs[attr].extend(node1_attrs[attr]))) TypeError: 'NoneType' object is not iterable ``` ``` for attr in ["keywords", "source_id"]: KeyError I think attribute "keywords" is in edges not nodes ``` graphrag/utils.py ``` settings.docStoreConn.delete() # Sync function called as async ``` ### Changes Made: Fixed AttributeError in entity_resolution.py by properly handling graph nodes Fixed TypeError and KeyError in extractor.py by separate operations Corrected async/sync mismatch in document deletion calltags/v0.18.0
| @@ -111,7 +111,7 @@ class EntityResolution(Extractor): | |||
| connect_graph.add_edges_from(resolution_result) | |||
| async with trio.open_nursery() as nursery: | |||
| for sub_connect_graph in nx.connected_components(connect_graph): | |||
| merging_nodes = list(sub_connect_graph.nodes) | |||
| merging_nodes = list(sub_connect_graph.nodes()) | |||
| nursery.start_soon(lambda: self._merge_graph_nodes(graph, merging_nodes, change)) | |||
| # Update pagerank | |||
| @@ -201,8 +201,7 @@ class Extractor: | |||
| # Merge two nodes, keep "entity_name", "entity_type", "page_rank" unchanged. | |||
| node1_attrs = graph.nodes[node1] | |||
| node0_attrs["description"] += f"{GRAPH_FIELD_SEP}{node1_attrs['description']}" | |||
| for attr in ["keywords", "source_id"]: | |||
| node0_attrs[attr] = sorted(set(node0_attrs[attr].extend(node1_attrs[attr]))) | |||
| node0_attrs["source_id"] = sorted(set(node0_attrs["source_id"].extend(node1_attrs.get("source_id", [])))) | |||
| for neighbor in graph.neighbors(node1): | |||
| change.removed_edges.add(get_from_to(node1, neighbor)) | |||
| if neighbor not in nodes_set: | |||
| @@ -213,8 +212,8 @@ class Extractor: | |||
| edge0_attrs = graph.get_edge_data(nodes[0], neighbor) | |||
| edge0_attrs["weight"] += edge1_attrs["weight"] | |||
| edge0_attrs["description"] += f"{GRAPH_FIELD_SEP}{edge1_attrs['description']}" | |||
| edge0_attrs["keywords"] = list(set(edge0_attrs["keywords"].extend(edge1_attrs["keywords"]))) | |||
| edge0_attrs["source_id"] = list(set(edge0_attrs["source_id"].extend(edge1_attrs["source_id"]))) | |||
| for attr in ["keywords", "source_id"]: | |||
| edge0_attrs[attr] = sorted(set(edge0_attrs[attr].extend(edge1_attrs.get(attr, [])))) | |||
| edge0_attrs["description"] = await self._handle_entity_relation_summary(f"({nodes[0]}, {neighbor})", edge0_attrs["description"]) | |||
| graph.add_edge(nodes[0], neighbor, **edge0_attrs) | |||
| else: | |||
| @@ -439,7 +439,7 @@ async def set_graph(tenant_id: str, kb_id: str, embd_mdl, graph: nx.Graph, chang | |||
| if change.removed_edges: | |||
| async with trio.open_nursery() as nursery: | |||
| for from_node, to_node in change.removed_edges: | |||
| nursery.start_soon(lambda: settings.docStoreConn.delete({"knowledge_graph_kwd": ["relation"], "from_entity_kwd": from_node, "to_entity_kwd": to_node}, search.index_name(tenant_id), kb_id)) | |||
| nursery.start_soon(lambda: trio.to_thread.run_sync(lambda: settings.docStoreConn.delete({"knowledge_graph_kwd": ["relation"], "from_entity_kwd": from_node, "to_entity_kwd": to_node}, search.index_name(tenant_id), kb_id))) | |||
| now = trio.current_time() | |||
| if callback: | |||
| callback(msg=f"set_graph removed {len(change.removed_nodes)} nodes and {len(change.removed_edges)} edges from index in {now - start:.2f}s.") | |||