@inproceedings{b0fc6cbf01ad41559e2713039fdc1ae2,
title = "High-performance, massively scalable distributed systems using the MapReduce software framework: The SHARD Triple-Store",
abstract = "In this paper we discuss the use of the MapReduce software framework to address the challenge of constructing high-performance, massively-scalable distributed systems. We discuss several design considerations associated with constructing complex distributed systems using the MapReduce software framework, including the difficulty of scalably building indexes. We focus on Hadoop, the most popular MapReduce implementation. Our discussion and analysis are motivated by our construction of SHARD, a massively scalable, high-performance and robust triple-store technology on top of Hadoop. We provide a general approach to construct an information system from the MapReduce software framework that responds to data queries. We provide experimental results generated of an early version of SHARD. We close with a discussion of hypothetical MapReduce alternatives that can be used for the construction of more scalable distributed computing systems.",
keywords = "Distributed computing, Graph data, MapReduce, Performance evaluation, Programming, SPARQL, Semantic Web, Systems",
author = "Kurt Rohloff and Schantz, {Richard E.}",
year = "2010",
doi = "10.1145/1940747.1940751",
language = "English (US)",
isbn = "9781450305440",
series = "Workshop on Programming Support Innovations for Emerging Distributed Applications, PSI EtA - PsiH 2010",
booktitle = "Workshop on Programming Support Innovations for Emerging Distributed Applications, PSI EtA - PsiH 2010",
note = "SPLASH Workshop on Programming Support Innovations for Emerging Distributed Applications, PSI EtA - PsiH 2010 ; Conference date: 17-10-2010 Through 21-10-2010",
}