@inproceedings{9bdd632a3b1945fb98c6f01dcc98ff7f,
title = "Learning from incomplete labeled data via adversarial data generation",
abstract = "Positive and unlabeled (PU) learning aims to obtain a well-performed classifier via an incomplete binary training set, in which only a part of labels of one category is known while the rest are unknown. However, in many real-world applications such as image recognition, the collected data samples often involve more than two categories. Moreover, only a small portion of the collected samples might have associated labels due to some practical reasons, and these labeled samples cannot always cover all the categories. We refer to this type of data as incomplete labeled data. In this paper, we first formally define the incomplete labeled data learning problem and then aim to tackle it via adversarial data generation. Specifically, we propose a novel generative framework LILA, which can produce synthetic labeled samples for both partially labeled categories and unlabeled categories. To enforce that the generated samples for unlabeled categories can associate with correct labels, we integrate two active learning processes into the LILA framework for selecting unlabeled samples in the collected sample set to query their labels effectively. After LILA has been well trained, a classifier can be trained on the balanced augmented data set consisting of both generated and original labeled samples. Extensive experiments on real image data demonstrate the effectiveness of our proposed framework. We release the implementation of the proposed framework via https://github.com/wentao-repo/LILA.",
keywords = "Active learning, Generative model, Incomplete labeled data",
author = "Wentao Wang and Tyler Derr and Yao Ma and Suhang Wang and Hui Liu and Zitao Liu and Jiliang Tang",
note = "Publisher Copyright: {\textcopyright} 2020 IEEE.; 20th IEEE International Conference on Data Mining, ICDM 2020 ; Conference date: 17-11-2020 Through 20-11-2020",
year = "2020",
month = nov,
doi = "10.1109/ICDM50108.2020.00170",
language = "English (US)",
series = "Proceedings - IEEE International Conference on Data Mining, ICDM",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1316--1321",
editor = "Claudia Plant and Haixun Wang and Alfredo Cuzzocrea and Carlo Zaniolo and Xindong Wu",
booktitle = "Proceedings - 20th IEEE International Conference on Data Mining, ICDM 2020",
address = "United States",
}