@inproceedings{17d0f12b789747d7a68b2f7d3139c051,
title = "Dynamic Priority Job Scheduling on a Hadoop YARN Platform",
abstract = "In Hadoop's big data processing systems, YARN is responsible for resource management and job scheduling. The built-in job scheduling algorithms in YARN are simple to execute, but have some limitations such as job starvation, excessive server load, and load imbalance. In this paper, we propose a new Hybrid Dynamic Priority job Scheduling algorithm (HDPS) to address these limitations. HDPS dynamically adjusts the priority of a job as its waiting time increases to prevent job starvation. It also features a task assignment strategy designed specifically to address data locality by considering the available resources of servers and the distribution of data blocks stored on servers to reduce data transfer time and improve job execution efficiency. We implement and integrate HDPS into YARN and conduct experiments in a real Hadoop system using built-in benchmark test cases of Hadoop. Experimental results show that HDPS exhibits comprehensive superior performance over existing algorithms in terms of execution efficiency and load balance.",
keywords = "Data Locality, Hadoop, Job Scheduling, MapReduce, YARN",
author = "Nana Du and Yudong Ji and Aiqin Hou and Chase Wu and Weike Nie",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 29th IEEE International Conference on Parallel and Distributed Systems, ICPADS 2023 ; Conference date: 17-12-2023 Through 21-12-2023",
year = "2023",
doi = "10.1109/ICPADS60453.2023.00069",
language = "English (US)",
series = "Proceedings of the International Conference on Parallel and Distributed Systems - ICPADS",
publisher = "IEEE Computer Society",
pages = "412--419",
booktitle = "Proceedings - 2023 IEEE 29th International Conference on Parallel and Distributed Systems, ICPADS 2023",
address = "United States",
}