## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") if (!requireNamespace("bigmemory", quietly = TRUE)) { cat("This vignette requires the 'bigmemory' package.\n") knitr::knit_exit() } library(bigKNN) library(bigmemory) ## ----helpers, include=FALSE--------------------------------------------------- knn_table <- function(result, query_ids, ref_ids) { do.call(rbind, lapply(seq_along(query_ids), function(i) { data.frame( query = query_ids[i], rank = seq_len(result$k), neighbor = ref_ids[result$index[i, ]], distance = signif(result$distance[i, ], 4), row.names = NULL ) })) } radius_slice <- function(result, i, ref_ids) { start <- result$offset[i] end <- result$offset[i + 1L] - 1L if (start > end) { return(data.frame(neighbor = character(0), distance = numeric(0))) } data.frame( neighbor = ref_ids[result$index[start:end]], distance = signif(result$distance[start:end], 4), row.names = NULL ) } ## ----create-reference--------------------------------------------------------- reference_points <- data.frame( id = paste0("p", 1:6), x1 = c(1, 2, 1, 2, 3, 4), x2 = c(1, 1, 2, 2, 2, 3) ) query_points <- data.frame( id = c("q1", "q2"), x1 = c(1.2, 2.8), x2 = c(1.1, 2.2) ) reference <- as.big.matrix(as.matrix(reference_points[c("x1", "x2")])) query_matrix <- as.matrix(query_points[c("x1", "x2")]) reference_points query_points ## ----self-knn----------------------------------------------------------------- self_knn <- knn_bigmatrix(reference, k = 2) self_knn ## ----self-knn-components------------------------------------------------------ self_knn$index round(self_knn$distance, 3) ## ----self-knn-table----------------------------------------------------------- knn_table(self_knn, query_ids = reference_points$id, ref_ids = reference_points$id) ## ----query-knn---------------------------------------------------------------- query_knn <- knn_bigmatrix( reference, query = query_matrix, k = 3, exclude_self = FALSE ) query_knn knn_table(query_knn, query_ids = query_points$id, ref_ids = reference_points$id) ## ----radius-search------------------------------------------------------------ radius_result <- radius_bigmatrix( reference, query = query_matrix, radius = 1.15, exclude_self = FALSE ) radius_result radius_result$n_match radius_result$offset ## ----radius-slices------------------------------------------------------------ radius_slice(radius_result, 1, reference_points$id) radius_slice(radius_result, 2, reference_points$id) ## ----radius-counts------------------------------------------------------------ count_within_radius_bigmatrix( reference, query = query_matrix, radius = 1.15, exclude_self = FALSE ) ## ----metric-comparison-------------------------------------------------------- metric_summary <- do.call(rbind, lapply( c("euclidean", "sqeuclidean", "cosine"), function(metric) { result <- knn_bigmatrix( reference, query = query_matrix, k = 1, metric = metric, exclude_self = FALSE ) data.frame( metric = metric, query = query_points$id, nearest = reference_points$id[result$index[, 1]], distance = signif(result$distance[, 1], 4), row.names = NULL ) } )) metric_summary