#' Train a Random Naive Bayes Model via Bootstrap + Random Subspace (Mixed Types)
#'
#' Fits an ensemble Naive Bayes classifier by repeating (i) stratified bootstrap
#' resampling of rows and (ii) random feature-subset selection, then aggregates
#' predictions by posterior averaging.
#'
#' Numeric predictors use Gaussian likelihoods; categorical predictors
#' (factor/character/logical) use multinomial likelihoods with Laplace smoothing.
#'
#' @param data A data.frame containing predictors and the response.
#' @param response Name of the response column (string).
#' @param n_iter Positive integer; number of bootstrap iterations.
#' @param feature_fraction Numeric in (0,1]; fraction of features sampled each iteration.
#' @param cores Positive integer; number of parallel workers.
#' @param laplace Numeric >= 0; Laplace smoothing parameter for categorical features.
#'
#' @return An object of class \code{"random_gaussian_nb"} containing the fitted
#' bootstrap ensemble and training metadata.
#'
#' @details
#' Numeric predictors use Gaussian likelihoods; categorical predictors
#' (factor/character/logical) use multinomial likelihoods with Laplace smoothing.
#'
#' The following S3 methods are available for this class:
#' \describe{
#'   \item{\code{print(x, ...)}}{Returns \code{x} invisibly (called for side effects).}
#'   \item{\code{summary(object, ...)}}{Returns \code{object} invisibly (prints a summary).}
#'   \item{\code{str(object, ...)}}{Returns \code{object} invisibly (prints a compact structure).}
#'   \item{\code{nobs(object, ...)}}{Returns an integer: number of training observations.}
#'   \item{\code{fitted(object, ...)}}{Returns a factor of fitted class labels for the training data.}
#'   \item{\code{plot(x, ...)}}{Returns \code{x} invisibly (called for its side effects).}
#' }
#'
#' @importFrom parallel makeCluster stopCluster parLapply
#' @importFrom stats sd setNames
#' @export

random_gaussian_nb <- function(data,
                               response,
                               n_iter = 100,
                               feature_fraction = 0.5,
                               cores = 1,
                               laplace = 1) {

  ## ---------- checks ----------
  if (!is.data.frame(data))
    stop("`data` must be a data.frame.")

  if (length(response) != 1L || is.na(response) || !is.character(response))
    stop("`response` must be a single non-missing character string.")

  if (is.null(names(data)))
    stop("`data` must have column names (including the response column).")

  if (!(response %in% names(data)))
    stop("Response column '", response, "' not found in `data`.")

  if (length(n_iter) != 1L || is.na(n_iter) || !is.numeric(n_iter) ||
      n_iter < 1 || n_iter != as.integer(n_iter))
    stop("`n_iter` must be a positive integer.")
  n_iter <- as.integer(n_iter)

  if (length(feature_fraction) != 1L || is.na(feature_fraction) ||
      !is.numeric(feature_fraction) || feature_fraction <= 0 || feature_fraction > 1)
    stop("`feature_fraction` must be in (0, 1].")

  if (length(cores) != 1L || is.na(cores) || !is.numeric(cores) ||
      cores < 1 || cores != as.integer(cores))
    stop("`cores` must be an integer >= 1.")
  cores <- as.integer(cores)

  if (length(laplace) != 1L || is.na(laplace) || !is.numeric(laplace) || laplace < 0)
    stop("`laplace` must be a single numeric value >= 0.")

  ## ---------- data prep ----------
  y <- factor(data[[response]])
  X <- data[, setdiff(names(data), response), drop = FALSE]

  if (ncol(X) < 1L) stop("No predictor columns found after removing the response column.")
  if (nrow(X) < 2L) stop("Need at least 2 rows to fit the model.")
  if (length(levels(y)) < 2L) stop("Response must have at least 2 classes.")
  if (anyNA(X) || anyNA(y)) stop("Missing values detected. Please impute/remove NAs first.")

  # classify predictor types
  is_num <- vapply(X, is.numeric, logical(1))
  is_cat <- !is_num

  # coerce categorical to factor (character/logical -> factor) for stable levels
  if (any(is_cat)) {
    X[is_cat] <- lapply(X[is_cat], function(col) {
      if (is.factor(col)) col else factor(col)
    })
  }

  n <- nrow(X)
  p <- ncol(X)
  classes <- levels(y)

  # number of features per model
  m <- function() max(1L, floor(feature_fraction * p))

  # stratified bootstrap indices (keeps class sizes)
  strat_boot <- function(y_vec) {
    idx_by_class <- split(seq_along(y_vec), y_vec)
    unlist(lapply(idx_by_class, function(idx) sample(idx, length(idx), replace = TRUE)),
           use.names = FALSE)
  }

  # store training levels (optional but useful for predict())
  train_levels <- NULL
  if (any(is_cat)) train_levels <- lapply(X[is_cat], levels)

  ## ---------- one bootstrap + random-subspace fit ----------
  # --- สร้าง task list ล่วงหน้า (สุ่มใน main process เท่านั้น) ---
  tasks <- lapply(seq_len(n_iter), function(i) {
    list(
      feat_idx = sample.int(p, m(), replace = FALSE),
      rows     = strat_boot(y)
    )
  })

  # --- fit_one รับ task (ไม่มี RNG ภายใน) ---
  fit_one <- function(task) {
    feats <- names(X)[task$feat_idx]
    rows  <- task$rows

    Xb <- X[rows, feats, drop = FALSE]
    yb <- y[rows]

    num_feats <- feats[is_num[feats]]
    cat_feats <- feats[is_cat[feats]]

    ## numeric parameters (Gaussian)
    mu <- sigma <- NULL
    if (length(num_feats) > 0L) {
      mu    <- stats::setNames(vector("list", length(classes)), classes)
      sigma <- stats::setNames(vector("list", length(classes)), classes)

      for (cl in classes) {
        Xc <- Xb[yb == cl, num_feats, drop = FALSE]
        mu[[cl]] <- colMeans(Xc)

        s <- vapply(Xc, stats::sd, numeric(1))
        s[is.na(s)] <- 0
        sigma[[cl]] <- pmax(s, .Machine$double.eps)
      }
    }

    ## categorical parameters (Multinomial + Laplace)
    catprob <- levels_map <- NULL
    if (length(cat_feats) > 0L) {
      catprob    <- stats::setNames(vector("list", length(cat_feats)), cat_feats)
      levels_map <- stats::setNames(vector("list", length(cat_feats)), cat_feats)

      for (f in cat_feats) {
        levs <- levels(X[[f]])
        levels_map[[f]] <- levs

        by_class <- stats::setNames(vector("list", length(classes)), classes)
        for (cl in classes) {
          xcl <- Xb[yb == cl, f, drop = TRUE]
          tab <- table(xcl)

          counts <- as.numeric(tab[levs])
          counts[is.na(counts)] <- 0

          denom <- sum(counts) + laplace * length(levs)
          prob  <- (counts + laplace) / denom
          names(prob) <- levs
          by_class[[cl]] <- prob
        }
        catprob[[f]] <- by_class
      }
    }

    ## priors
    prior <- table(yb) / length(yb)
    prior <- prior[classes]
    prior[is.na(prior)] <- 0

    list(
      feats      = feats,
      num_feats  = num_feats,
      cat_feats  = cat_feats,
      mu         = mu,
      sigma      = sigma,
      catprob    = catprob,
      levels_map = levels_map,
      prior      = prior
    )
  }

  # --- run ensemble (parallel/serial) ---
  if (cores > 1L) {
    cl <- parallel::makeCluster(cores)
    on.exit(try(parallel::stopCluster(cl), silent = TRUE), add = TRUE)

    parallel::clusterExport(
      cl,
      varlist = c("X", "y", "classes", "is_num", "is_cat",
                  "p", "m", "strat_boot", "laplace", "fit_one"),
      envir = environment()
    )

    models <- parallel::parLapply(cl, X = tasks, fun = fit_one)
  } else {
    models <- lapply(tasks, fit_one)
  }


  ## ---------- output ----------
  out <- list(
    .models          = models,
    .classes         = classes,
    n_iter           = n_iter,
    feature_fraction = feature_fraction,
    cores            = cores,
    laplace          = laplace,
    is_num           = is_num,
    is_cat           = is_cat,
    train_levels     = train_levels,
    X_train          = X,
    y_train          = y
  )
  class(out) <- "random_gaussian_nb"
  out
}
