@inproceedings{ef4ab32ce7044c41a8ff3579211ccef9,
title = "Statistically rigorous testing of clustering implementations",
abstract = "Clustering is a widely-used and well-studied AI branch, but defining clustering correctness, as well as verifying and validating clustering implementations, remains a challenge. To address this, we propose a statistically rigorous approach that couples differential clustering with statistical hypothesis testing, namely we conduct statistical hypothesis testing on the outcome (distribution) of differential clustering to reveal problematic outcomes. We employed this approach on widely-used clustering algorithms implemented in popular ML toolkits; the toolkits were tasked with clustering datasets from the Penn Machine Learning Benchmark. The results indicate that there are statistically significant differences in clustering outcomes in a variety of scenarios where users might not expect clustering outcome variation.",
keywords = "Clustering, Machine Learning, Statistics, Testing",
author = "Xin Yin and Vincenzo Musco and Iulian Neamtiu and Usman Roshan",
note = "Funding Information: This material is based upon work supported by the NSF Grant No. CCF-1629186. Research was sponsored by the Army Research Laboratory and was accomplished under Cooperative Agreement Number W911NF-13-2-0045 (ARL Cyber Security CRA). Funding Information: This material is based upon work supported by the NSF Grant No. CCF-1629186. Research was sponsored by the Army Research Laboratory and was accomplished under Cooperative Agreement Number W911NF-13-2-0045 (ARL Cyber Security CRA). The views and conclusions contained in this document are those of the authors and should not be interpreted as representing the official policies, either expressed or implied, of the Army Research Laboratory or the U.S. Government. The U.S. Government is authorized to reproduce and distribute reprints for Government purposes notwithstanding any copyright notation here on. Publisher Copyright: {\textcopyright} 2019 IEEE.; 1st IEEE International Conference on Artificial Intelligence Testing, AITest 2019 ; Conference date: 04-04-2019 Through 09-04-2019",
year = "2019",
month = may,
day = "17",
doi = "10.1109/AITest.2019.000-1",
language = "English (US)",
series = "Proceedings - 2019 IEEE International Conference on Artificial Intelligence Testing, AITest 2019",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "91--98",
booktitle = "Proceedings - 2019 IEEE International Conference on Artificial Intelligence Testing, AITest 2019",
address = "United States",
}