#' Check if Vector is Binary-like
#'
#' @param x A vector to check.
#' @return Logical; TRUE if x contains only 0 and 1 values.
#' @keywords internal
.is_binary_like <- function(x) {
  is.numeric(x) && length(na.omit(unique(x))) <= 2 &&
    all(na.omit(x) %in% c(0, 1))
}


#' Discretize Numeric Vector into Terciles
#'
#' Converts a numeric vector into an ordered factor with three levels
#' (low, medium, high) using deterministic percent ranks to break ties.
#'
#' @param x Numeric vector to discretize.
#'
#' @return An ordered factor with levels "low", "medium", "high".
#'
#' @export
#' @examples
#' x <- c(1, 2, 3, 4, 5, 6, 7, 8, 9)
#' disc_terciles(x)
disc_terciles <- function(x) {
  n <- sum(!is.na(x))
  labs <- c("low", "medium", "high")
  out <- rep(NA_character_, length(x))
  if (n == 0) return(factor(out, levels = labs, ordered = TRUE))
  
  idx <- seq_along(x)
  eps <- (idx - mean(idx)) / (n * 1e6)
  x2 <- ifelse(is.na(x), NA_real_, x + eps)
  r <- rank(x2, ties.method = "first", na.last = "keep")
  pr <- r / max(r, na.rm = TRUE)
  out <- cut(pr, breaks = c(0, 1/3, 2/3, 1), labels = labs,
             include.lowest = TRUE, right = TRUE, ordered_result = TRUE)
  out
}


#' Standardize Continuous Columns
#'
#' Standardizes selected numeric columns using z-score or robust
#' (median/MAD) methods. Binary columns (0/1) are left unchanged.
#'
#' @param DT A data.table or data.frame.
#' @param cols Character vector of column names to standardize.
#' @param method Character; either "zscore" or "robust".
#' @param center Logical; whether to center the data.
#' @param scale Logical; whether to scale the data.
#'
#' @return A list with components:
#'   \item{DT}{The standardized data.table.}
#'   \item{scalers}{A list of scaling parameters for each column.}
#'
#' @export
standardize_continuous <- function(
    DT, cols,
    method = c("zscore", "robust"),
    center = TRUE, scale = TRUE
) {
  method <- match.arg(method)
  DT2 <- data.table::copy(DT)
  scalers <- list()
  
  for (nm in cols) {
    if (!nm %in% names(DT2)) next
    x <- DT2[[nm]]
    if (!is.numeric(x)) next
    if (.is_binary_like(x)) next
    
    if (method == "zscore") {
      mu <- if (center) mean(x, na.rm = TRUE) else 0
      sd_ <- if (scale) stats::sd(x, na.rm = TRUE) else 1
      if (is.na(sd_) || sd_ == 0) sd_ <- 1
      x_std <- (x - mu) / sd_
      scalers[[nm]] <- list(method = "zscore", center = mu, scale = sd_)
    } else {
      med <- if (center) stats::median(x, na.rm = TRUE) else 0
      mad_ <- if (scale) stats::mad(x, constant = 1.4826, na.rm = TRUE) else 1
      if (is.na(mad_) || mad_ == 0) mad_ <- 1
      x_std <- (x - med) / mad_
      scalers[[nm]] <- list(method = "robust", center = med, scale = mad_)
    }
    DT2[[nm]] <- x_std
  }
  
  attr(scalers, "cols") <- cols
  list(DT = DT2, scalers = scalers)
}


#' Standardize Continuous Columns In Place
#'
#' Standardizes selected numeric columns of a data.table in place using
#' a z-score transformation. The function modifies \code{DT} by reference
#' and stores the means and standard deviations used in an attribute
#' called \code{"standardization"}.
#'
#' @param DT A \code{data.table}. It is modified by reference.
#' @param cols Character vector of column names to standardize. Columns
#'   that are not present in \code{DT} or are not numeric are silently skipped.
#' @param center Logical; whether to subtract the column mean.
#' @param scale Logical; whether to divide by the column standard deviation.
#'
#' @return The modified \code{data.table} \code{DT} (invisibly), with an
#'   attribute \code{"standardization"} containing the means, standard
#'   deviations, and names of the standardized columns.
#'
#' @examples
#' \donttest{
#' library(data.table)
#' DT <- data.table(x = rnorm(10), y = runif(10), z = 0:9)
#' standardize_continuous_in_place(DT, c("x", "y"))
#' attr(DT, "standardization")
#' }
#' @export
standardize_continuous_in_place <- function(DT, cols, center = TRUE, scale = TRUE) {
  if (!data.table::is.data.table(DT)) {
    stop("DT must be a data.table")
  }
  cols <- intersect(cols, names(DT))
  cols_to_check <- cols
  is_num <- vapply(cols_to_check, function(nm) is.numeric(DT[[nm]]), logical(1))
  cols_num <- cols[is_num]
  
  means <- vapply(cols_num, function(x) mean(DT[[x]], na.rm = TRUE), numeric(1))
  sds <- vapply(cols_num, function(x) stats::sd(DT[[x]], na.rm = TRUE), numeric(1))
  if (!scale) sds[] <- 1
  if (!center) means[] <- 0
  
  for (nm in cols_num) {
    divisor <- if (sds[[nm]] == 0) 1 else sds[[nm]]
    DT[, (nm) := (get(nm) - means[[nm]]) / divisor]
  }
  
  attr(DT, "standardization") <- list(means = means, sds = sds, cols = cols_num)
  invisible(DT)
}
