@Inbook{Chatzimilioudis2015,
author="Chatzimilioudis, Georgios
and Konstantinidis, Andreas
and Zeinalipour-Yazti, Demetrios",
editor="Pedrycz, Witold
and Chen, Shyi-Ming",
title="Nearest Neighbor Queries on Big Data",
bookTitle="Information Granularity, Big Data, and Computational Intelligence",
year="2015",
publisher="Springer International Publishing",
address="Cham",
pages="3--22",
abstract="k Nearest Neighbor (kNN) search is one of the simplest non-parametric learning approaches, mainly used for classification and regression. kNN identifies the k nearest neighbors to a given node given a distance metric. A new challenging kNN task is to identify the k nearest neighbors for all nodes simultaneously; also known as All kNN (AkNN) search. Similarly, the Continuous All kNN (CAkNN) search answers an AkNN search in real-time on streaming data. Although such techniques find immediate application in computational intelligence tasks, among others, they have not been efficiently optimized to this date. We study specialized scalable solutions for AkNN and CAkNN processing as demanded by the volume--velocity-variety of data in the Big Data era. We present an algorithm, coined Proximity, which does not require any additional infrastructure or specialized hardware, and its efficiency is mainly attributed to our smart search space sharing technique. Its implementation is based on a novel data structure, coined k                +-heap. Proximity, being parameter-free, performs efficiently in the face of high velocity and skewed data. In our analytical studies, we found that Proximity provides better time complexity compared to existing approaches and is very well suited for large scale scenarios.",
isbn="978-3-319-08254-7",
doi="10.1007/978-3-319-08254-7_1",
url="http://dx.doi.org/10.1007/978-3-319-08254-7_1"
}