@inproceedings{ef4ab32ce7044c41a8ff3579211ccef9,
title = "Statistically rigorous testing of clustering implementations",
abstract = "Clustering is a widely-used and well-studied AI branch, but defining clustering correctness, as well as verifying and validating clustering implementations, remains a challenge. To address this, we propose a statistically rigorous approach that couples differential clustering with statistical hypothesis testing, namely we conduct statistical hypothesis testing on the outcome (distribution) of differential clustering to reveal problematic outcomes. We employed this approach on widely-used clustering algorithms implemented in popular ML toolkits; the toolkits were tasked with clustering datasets from the Penn Machine Learning Benchmark. The results indicate that there are statistically significant differences in clustering outcomes in a variety of scenarios where users might not expect clustering outcome variation.",
keywords = "Clustering, Machine Learning, Statistics, Testing",
author = "Xin Yin and Vincenzo Musco and Iulian Neamtiu and Usman Roshan",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 1st IEEE International Conference on Artificial Intelligence Testing, AITest 2019 ; Conference date: 04-04-2019 Through 09-04-2019",
year = "2019",
month = may,
day = "17",
doi = "10.1109/AITest.2019.000-1",
language = "English (US)",
series = "Proceedings - 2019 IEEE International Conference on Artificial Intelligence Testing, AITest 2019",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "91--98",
booktitle = "Proceedings - 2019 IEEE International Conference on Artificial Intelligence Testing, AITest 2019",
address = "United States",
}