@inproceedings{346d7fb1997548e3bbe0ed613be72930,
title = "Learning and generalization of one-hidden-layer neural networks, going beyond standard Gaussian data",
abstract = "This paper analyzes the convergence and generalization of training a one-hidden-layer neural network when the input features follow the Gaussian mixture model consisting of a finite number of Gaussian distributions. Assuming the labels are generated from a teacher model with an unknown ground truth weight, the learning problem is to estimate the underlying teacher model by minimizing a non-convex risk function over a student neural network. With a finite number of training samples, referred to the sample complexity, the iterations are proved to converge linearly to a critical point with guaranteed generalization error. In addition, for the first time, this paper characterizes the impact of the input distributions on the sample complexity and the learning rate.",
keywords = "Gaussian mixture model, convergence, generalization, neural networks, sample complexity",
author = "Hongkang Li and Shuai Zhang and Meng Wang",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 56th Annual Conference on Information Sciences and Systems, CISS 2022 ; Conference date: 09-03-2022 Through 11-03-2022",
year = "2022",
doi = "10.1109/CISS53076.2022.9751184",
language = "English (US)",
series = "2022 56th Annual Conference on Information Sciences and Systems, CISS 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "37--42",
booktitle = "2022 56th Annual Conference on Information Sciences and Systems, CISS 2022",
address = "United States",
}