### Related Issue: #6653 ### Environment: Using nightly version Elasticsearch database ### Bug Description: When clicking the "Entity Resolution" button in KnowledgeGraph, encountered the following errors: graphrag/entity_resolution.py ``` list(sub_connect_graph.nodes) AttributeError ``` graphrag/general/extractor.py ``` node0_attrs[attr] = sorted(set(node0_attrs[attr].extend(node1_attrs[attr]))) TypeError: 'NoneType' object is not iterable ``` ``` for attr in ["keywords", "source_id"]: KeyError I think attribute "keywords" is in edges not nodes ``` graphrag/utils.py ``` settings.docStoreConn.delete() # Sync function called as async ``` ### Changes Made: Fixed AttributeError in entity_resolution.py by properly handling graph nodes Fixed TypeError and KeyError in extractor.py by separate operations Corrected async/sync mismatch in document deletion calltags/v0.18.0
| connect_graph.add_edges_from(resolution_result) | connect_graph.add_edges_from(resolution_result) | ||||
| async with trio.open_nursery() as nursery: | async with trio.open_nursery() as nursery: | ||||
| for sub_connect_graph in nx.connected_components(connect_graph): | for sub_connect_graph in nx.connected_components(connect_graph): | ||||
| merging_nodes = list(sub_connect_graph.nodes) | |||||
| merging_nodes = list(sub_connect_graph.nodes()) | |||||
| nursery.start_soon(lambda: self._merge_graph_nodes(graph, merging_nodes, change)) | nursery.start_soon(lambda: self._merge_graph_nodes(graph, merging_nodes, change)) | ||||
| # Update pagerank | # Update pagerank |
| # Merge two nodes, keep "entity_name", "entity_type", "page_rank" unchanged. | # Merge two nodes, keep "entity_name", "entity_type", "page_rank" unchanged. | ||||
| node1_attrs = graph.nodes[node1] | node1_attrs = graph.nodes[node1] | ||||
| node0_attrs["description"] += f"{GRAPH_FIELD_SEP}{node1_attrs['description']}" | node0_attrs["description"] += f"{GRAPH_FIELD_SEP}{node1_attrs['description']}" | ||||
| for attr in ["keywords", "source_id"]: | |||||
| node0_attrs[attr] = sorted(set(node0_attrs[attr].extend(node1_attrs[attr]))) | |||||
| node0_attrs["source_id"] = sorted(set(node0_attrs["source_id"].extend(node1_attrs.get("source_id", [])))) | |||||
| for neighbor in graph.neighbors(node1): | for neighbor in graph.neighbors(node1): | ||||
| change.removed_edges.add(get_from_to(node1, neighbor)) | change.removed_edges.add(get_from_to(node1, neighbor)) | ||||
| if neighbor not in nodes_set: | if neighbor not in nodes_set: | ||||
| edge0_attrs = graph.get_edge_data(nodes[0], neighbor) | edge0_attrs = graph.get_edge_data(nodes[0], neighbor) | ||||
| edge0_attrs["weight"] += edge1_attrs["weight"] | edge0_attrs["weight"] += edge1_attrs["weight"] | ||||
| edge0_attrs["description"] += f"{GRAPH_FIELD_SEP}{edge1_attrs['description']}" | edge0_attrs["description"] += f"{GRAPH_FIELD_SEP}{edge1_attrs['description']}" | ||||
| edge0_attrs["keywords"] = list(set(edge0_attrs["keywords"].extend(edge1_attrs["keywords"]))) | |||||
| edge0_attrs["source_id"] = list(set(edge0_attrs["source_id"].extend(edge1_attrs["source_id"]))) | |||||
| for attr in ["keywords", "source_id"]: | |||||
| edge0_attrs[attr] = sorted(set(edge0_attrs[attr].extend(edge1_attrs.get(attr, [])))) | |||||
| edge0_attrs["description"] = await self._handle_entity_relation_summary(f"({nodes[0]}, {neighbor})", edge0_attrs["description"]) | edge0_attrs["description"] = await self._handle_entity_relation_summary(f"({nodes[0]}, {neighbor})", edge0_attrs["description"]) | ||||
| graph.add_edge(nodes[0], neighbor, **edge0_attrs) | graph.add_edge(nodes[0], neighbor, **edge0_attrs) | ||||
| else: | else: |
| if change.removed_edges: | if change.removed_edges: | ||||
| async with trio.open_nursery() as nursery: | async with trio.open_nursery() as nursery: | ||||
| for from_node, to_node in change.removed_edges: | for from_node, to_node in change.removed_edges: | ||||
| nursery.start_soon(lambda: settings.docStoreConn.delete({"knowledge_graph_kwd": ["relation"], "from_entity_kwd": from_node, "to_entity_kwd": to_node}, search.index_name(tenant_id), kb_id)) | |||||
| nursery.start_soon(lambda: trio.to_thread.run_sync(lambda: settings.docStoreConn.delete({"knowledge_graph_kwd": ["relation"], "from_entity_kwd": from_node, "to_entity_kwd": to_node}, search.index_name(tenant_id), kb_id))) | |||||
| now = trio.current_time() | now = trio.current_time() | ||||
| if callback: | if callback: | ||||
| callback(msg=f"set_graph removed {len(change.removed_nodes)} nodes and {len(change.removed_edges)} edges from index in {now - start:.2f}s.") | callback(msg=f"set_graph removed {len(change.removed_nodes)} nodes and {len(change.removed_edges)} edges from index in {now - start:.2f}s.") |