@inproceedings{95c8bbf6686f4f8c91fa5c0030abf5f4,
title = "User-Entity Differential Privacy in Learning Natural Language Models",
abstract = "In this paper, we introduce a novel concept of user-entity differential privacy (UeDP) to provide formal privacy protection simultaneously to both sensitive entities in textual data and data owners in learning natural language models (NLMs). To preserve UeDP, we developed a novel algorithm, called UeDP-Alg, optimizing the trade-off between privacy loss and model utility with a tight sensitivity bound derived from seamlessly combining user and sensitive entity sampling processes. An extensive theoretical analysis and evaluation show that our UeDP-Alg outperforms baseline approaches in model utility under the same privacy budget consumption on several NLM tasks, using benchmark datasets.",
keywords = "Differential privacy, entities, natural language models, user identity",
author = "Phung Lai and Phan, {Nhat Hai} and Tong Sun and Rajiv Jain and Franck Dernoncourt and Jiuxiang Gu and Nikolaos Barmpalios",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 2022 IEEE International Conference on Big Data, Big Data 2022 ; Conference date: 17-12-2022 Through 20-12-2022",
year = "2022",
doi = "10.1109/BigData55660.2022.10020247",
language = "English (US)",
series = "Proceedings - 2022 IEEE International Conference on Big Data, Big Data 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1465--1474",
editor = "Shusaku Tsumoto and Yukio Ohsawa and Lei Chen and {Van den Poel}, Dirk and Xiaohua Hu and Yoichi Motomura and Takuya Takagi and Lingfei Wu and Ying Xie and Akihiro Abe and Vijay Raghavan",
booktitle = "Proceedings - 2022 IEEE International Conference on Big Data, Big Data 2022",
address = "United States",
}