@inproceedings{1309eb52105c458fb22b9ef89eac54cd,
title = "LAS: Logical-Block Affinity Scheduling in Big Data Analytics Systems",
abstract = "Parallel computing combined with distributed data storage and management has been widely adopted by most big data analytics systems. Scheduling computing tasks to improve data locality is crucial to the performance of such systems. While existing schedulers target near-data scheduling on top of physical data blocks, these systems face a new scheduling problem where computing tasks process table-based datasets directly and access large physical blocks indirectly through their indices stored in associated small logical blocks. This new problem invalidates the basic assumption made by many existing algorithms on near-data scheduling. In this paper, we propose a Logical-block Affinity Scheduling (LAS) algorithm to coordinate the near-data scheduling of computing tasks and the placement of logical blocks for a desired balance between data-locality and load-balancing to maximize system throughput. The proposed algorithm is implemented and evaluated using a well-known big data benchmark and a practical production system deployed in public clouds. Extensive experimental results illustrate the performance superiority of LAS over three existing scheduling algorithms.",
author = "Liang Bao and Wu, {Chase Q.} and Haiyang Qi and Weizhao Chen and Xin Zhang and Weina Han and Wei Wei and En Tail and Hao Wang and Jiahao Zhai and Xiang Chen",
note = "Funding Information: It is of our future interest to refine our system and cost models to improve the accuracy of performance modeling. We will investigate the user and system dynamics in shared environments and design new approaches to account for such dynamics in our algorithm design. We will also explore to integrate the proposed scheduling algorithm into the major Hadoop/Spark releases to add an additional level of intelligence to widely deployed big data systems combining parallel computing and distributed data management. Acknowledgment This work is supported by the National Natural Science Foundation of China under Grant No. 61202040 and 61772392 with XiDian University and Grant No. 61472320 and U1609202 with Northwest University, P.R. China. This research is also partially sponsored by the U.S. National Science Foundation under Grant No. CNS-1560698 with New Jersey Institute of Technology. References Publisher Copyright: {\textcopyright} 2018 IEEE.; 2018 IEEE Conference on Computer Communications, INFOCOM 2018 ; Conference date: 15-04-2018 Through 19-04-2018",
year = "2018",
month = oct,
day = "8",
doi = "10.1109/INFOCOM.2018.8486297",
language = "English (US)",
series = "Proceedings - IEEE INFOCOM",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "522--530",
booktitle = "INFOCOM 2018 - IEEE Conference on Computer Communications",
address = "United States",
}