@inproceedings{4b5f9394dff1486786cb9511eff2cf40,
title = "Cross-layer Scheduling for MapReduce-based Big Data Workflows in Heterogeneous Hadoop Systems",
abstract = "The performance of big data workflows depends on both the workflow mapping scheme, which determines task assignment and container allocation in Hadoop, and the on-node scheduling policy, which governs resource allocation and container provisioning. Most research on big data workflow scheduling focuses solely on workflow mapping, achieving only limited success. We conduct an in-depth investigation into the impact of node-level scheduling on overall workflow performance and explore the benefits of combining these two levels of scheduling (workflow- and node-level). We formulate a generic problem that considers cross-layer scheduling to minimize the end-to-end delay of MapReduce-based big data workflows in the Hadoop system. The efficacy of our proposed solution, compared with existing methods, is demonstrated through extensive simulations and proof-of-concept experiments using real-life big data workflows deployed on a real-life cluster.",
keywords = "Big data, Hadoop, cross-layer scheduling",
author = "Yijie Zhang and Wu, {Chase Q.} and Aiqin Hou",
note = "Publisher Copyright: {\textcopyright} 2025 IEEE.; 2025 International Conference on Computing, Networking and Communications, ICNC 2025 ; Conference date: 17-02-2025 Through 20-02-2025",
year = "2025",
doi = "10.1109/ICNC64010.2025.10993951",
language = "English (US)",
series = "2025 International Conference on Computing, Networking and Communications, ICNC 2025",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "350--355",
booktitle = "2025 International Conference on Computing, Networking and Communications, ICNC 2025",
address = "United States",
}