% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/matching_core.R
\name{greedy_couples}
\alias{greedy_couples}
\title{Fast approximate matching using greedy algorithm}
\usage{
greedy_couples(
  left,
  right = NULL,
  vars = NULL,
  distance = "euclidean",
  weights = NULL,
  scale = FALSE,
  auto_scale = FALSE,
  max_distance = Inf,
  calipers = NULL,
  block_id = NULL,
  ignore_blocks = FALSE,
  require_full_matching = FALSE,
  strategy = c("row_best", "sorted", "pq"),
  return_unmatched = TRUE,
  return_diagnostics = FALSE,
  parallel = FALSE,
  replace = FALSE,
  ratio = 1L,
  check_costs = TRUE
)
}
\arguments{
\item{left}{Data frame of "left" units (e.g., treated, cases)}

\item{right}{Data frame of "right" units (e.g., control, controls)}

\item{vars}{Variable names to use for distance computation}

\item{distance}{Distance metric: "euclidean", "manhattan", "mahalanobis",
or a custom function}

\item{weights}{Optional named vector of variable weights}

\item{scale}{Scaling method: FALSE (none), "standardize", "range", or "robust"}

\item{auto_scale}{If TRUE, automatically check variable health and select
scaling method (default: FALSE)}

\item{max_distance}{Maximum allowed distance (pairs exceeding this are forbidden)}

\item{calipers}{Named list of per-variable maximum absolute differences}

\item{block_id}{Column name containing block IDs (for stratified matching)}

\item{ignore_blocks}{If TRUE, ignore block_id even if present}

\item{require_full_matching}{If TRUE, error if any units remain unmatched}

\item{strategy}{Greedy strategy:
\itemize{
\item "row_best": For each row, find best available column (default)
\item "sorted": Sort all pairs by distance, greedily assign
\item "pq": Use priority queue (good for very large problems)
}}

\item{return_unmatched}{Include unmatched units in output}

\item{return_diagnostics}{Include detailed diagnostics in output}

\item{parallel}{Enable parallel processing for blocked matching.
Requires 'future' and 'future.apply' packages. Can be:
\itemize{
\item \code{FALSE}: Sequential processing (default)
\item \code{TRUE}: Auto-configure parallel backend
\item Character: Specify future plan (e.g., "multisession", "multicore")
}}

\item{replace}{If TRUE, allow matching with replacement (same right unit
can be matched to multiple left units). Default: FALSE.}

\item{ratio}{Integer, number of right units to match per left unit.
Default: 1 (one-to-one matching). For k:1 matching, set ratio = k.}

\item{check_costs}{If TRUE, check distance distribution for potential problems
and provide helpful warnings before matching (default: TRUE)}
}
\value{
A list with class "matching_result" (same structure as match_couples)
}
\description{
Performs fast one-to-one matching using greedy strategies. Does not guarantee
optimal total distance but is much faster than \code{\link[=match_couples]{match_couples()}} for large
datasets. Supports blocking, distance constraints, and various distance metrics.
}
\details{
Greedy strategies do not guarantee optimal total distance but are much faster:
\itemize{
\item "row_best": O(n*m) time, simple and often produces good results
\item "sorted": O(n\emph{m}log(n*m)) time, better quality but slower
\item "pq": O(n\emph{m}log(n*m)) time, memory-efficient for large problems
}

Use greedy_couples when:
\itemize{
\item Dataset is very large (> 10,000 x 10,000)
\item Approximate solution is acceptable
\item Speed is more important than optimality
}
}
\examples{
# Basic greedy matching
left <- data.frame(id = 1:100, x = rnorm(100))
right <- data.frame(id = 101:200, x = rnorm(100))
result <- greedy_couples(left, right, vars = "x")

# Compare to optimal
result_opt <- match_couples(left, right, vars = "x")
result_greedy <- greedy_couples(left, right, vars = "x")
result_greedy$info$total_distance / result_opt$info$total_distance  # Quality ratio

}
