@inproceedings{f9757fc4a56c49359772b92e37316e3c,
title = "Billion-scale Detection of Isomorphic Nodes",
abstract = "This paper presents an algorithm for detecting attributed high-degree node isomorphism. High-degree isomorphic nodes seldom happen by chance and often represent duplicated entities or data processing errors. By definition, isomorphic nodes are topologically indistinguishable and can be problematic in graph ML tasks. The algorithm employs a parallel, 'degree-bounded' approach that fingerprints each node's local properties through a hash, which constrains the search to nodes within hash-defined buckets, thus minimising the number of comparisons. This method scales on graphs with billions of nodes and edges. Finally, we provide isomorphic node oddities identified in real-world data.",
keywords = "graph, node isomorphism, parallel algorithm",
author = "Luca Cappelletti and Tommaso Fontana and Justin Reese and Bader, {David A.}",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 2023 IEEE International Parallel and Distributed Processing Symposium Workshops, IPDPSW 2023 ; Conference date: 15-05-2023 Through 19-05-2023",
year = "2023",
doi = "10.1109/IPDPSW59300.2023.00046",
language = "English (US)",
series = "2023 IEEE International Parallel and Distributed Processing Symposium Workshops, IPDPSW 2023",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "230--233",
booktitle = "2023 IEEE International Parallel and Distributed Processing Symposium Workshops, IPDPSW 2023",
address = "United States",
}