% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mixMN.R
\name{mixMN}
\alias{mixMN}
\title{Estimate single layer MGM network with bootstrap centrality, bridge metrics, clustering,
and (optionally) community score loadings}
\usage{
mixMN(
  data,
  reps = 100,
  scale = TRUE,
  lambdaSel = c("CV", "EBIC"),
  lambdaFolds = 5,
  lambdaGam = 0.25,
  alphaSeq = 1,
  alphaSel = "CV",
  alphaFolds = 5,
  alphaGam = 0.25,
  k = 2,
  ruleReg = "AND",
  threshold = "LW",
  overparameterize = FALSE,
  thresholdCat = TRUE,
  quantile_level = 0.95,
  covariates = NULL,
  exclude_from_cluster = NULL,
  treat_singletons_as_excluded = FALSE,
  seed_model = NULL,
  seed_boot = NULL,
  cluster_method = c("louvain", "fast_greedy", "infomap", "walktrap", "edge_betweenness"),
  compute_loadings = TRUE,
  boot_what = c("general_index", "bridge_index", "excluded_index", "community",
    "loadings"),
  save_data = FALSE,
  progress = TRUE
)
}
\arguments{
\item{data}{A \code{data.frame} (n x p) with variables in columns.
Variables may be numeric, integer, logical, or factors.
Character and Date/POSIXt variables are not supported and must be converted
prior to model fitting.
Variable types are internally mapped to MGM types as follows:
continuous numeric (double) variables are treated as Gaussian;
integer variables are treated as Poisson unless they take only values
in \{0,1\}, in which case they are treated as binary categorical;
factors and logical variables are treated as categorical.
Binary categorical variables (two-level
factors and logical variables) are internally recoded to \{0,1\} for model
fitting. The original input data are not modified.}

\item{reps}{Integer (>= 0). Number of bootstrap replications.}

\item{scale}{Logical; if \code{TRUE} (default) Gaussian variables
(\code{type == "g"}) are z-standardized internally by \code{mgm()}. Use
\code{scale = FALSE} if your data are already standardized.}

\item{lambdaSel}{Method for lambda selection: \code{"CV"} or \code{"EBIC"}.}

\item{lambdaFolds}{Number of folds for CV (if \code{lambdaSel = "CV"}).}

\item{lambdaGam}{EBIC gamma parameter (if \code{lambdaSel = "EBIC"}).}

\item{alphaSeq}{Alpha parameters of the elastic net penalty (values between 0 and 1).}

\item{alphaSel}{Method for selecting the alpha parameter: \code{"CV"} or \code{"EBIC"}.}

\item{alphaFolds}{Number of folds for CV (if \code{alphaSel = "CV"}).}

\item{alphaGam}{EBIC gamma parameter (if \code{alphaSel = "EBIC"}).}

\item{k}{Integer (>= 1). Order of modeled interactions.}

\item{ruleReg}{Rule to combine neighborhood estimates: \code{"AND"} or \code{"OR"}.}

\item{threshold}{Threshold below which edge-weights are set to zero:
Available options are \code{"LW"}, \code{"HW"}, or \code{"none"}.
\code{"LW"} applies the threshold proposed by Loh & Wainwright;
\code{"HW"} applies the threshold proposed by Haslbeck & Waldorp;
\code{"none"} disables thresholding. Defaults to \code{"LW"}.}

\item{overparameterize}{Logical; controls how categorical interactions are
parameterized in the neighborhood regressions. If \code{TRUE}, categorical
interactions are represented using a fully over-parameterized design matrix
(i.e., all category combinations are explicitly modeled). If \code{FALSE},
the standard \code{glmnet} parameterization is used, where one category
serves as reference. For continuous variables, both parameterizations are
equivalent. The default is \code{FALSE}. The over-parameterized option may
be advantageous when distinguishing pairwise from higher-order interactions.}

\item{thresholdCat}{Logical; if \code{FALSE} thresholds of categorical
variables are set to zero.}

\item{quantile_level}{Level of the central bootstrap quantile region (default \code{0.95}).
Must be a single number between 0 and 1.}

\item{covariates}{Character vector. Variables used as adjustment covariates
in model estimation.}

\item{exclude_from_cluster}{Character vector. Nodes excluded from community
detection (in addition to \code{covariates}).}

\item{treat_singletons_as_excluded}{Logical; if \code{TRUE}, singleton
communities (size 1) are treated as excluded nodes when computing
bridge metrics.}

\item{seed_model}{Optional integer seed for reproducibility of the initial
MGM fit.}

\item{seed_boot}{Optional integer seed passed to \code{future.apply} for
reproducibility of bootstrap replications.}

\item{cluster_method}{Community detection algorithm used on the network:
\code{"louvain"}, \code{"fast_greedy"}, \code{"infomap"},
\code{"walktrap"}, or \code{"edge_betweenness"}.}

\item{compute_loadings}{Logical; if \code{TRUE} (default), compute community loadings
(\code{EGAnet::net.loads}). Only supported for Gaussian, Poisson, and binary
categorical nodes; otherwise loadings are skipped and the reason is
stored in \code{community_loadings$reason}.}

\item{boot_what}{Character vector specifying which quantities to bootstrap.
Valid options are:
 \code{"general_index"} (centrality indices),
 \code{"bridge_index"} (bridge metrics for nodes in communities),
 \code{"excluded_index"} (bridge metrics for nodes treated as excluded),
 \code{"community"} (community memberships),
 \code{"loadings"} (community loadings, only if \code{compute_loadings = TRUE}),
 and \code{"none"} (skip all node-level bootstrap: only edge-weight
 bootstrap is performed if \code{reps > 0}).}

\item{save_data}{Logical; if \code{TRUE}, store the original data in the output object.}

\item{progress}{Logical; if \code{TRUE} (default), show a bootstrap progress bar.}
}
\value{
An object of class \code{c("mixmashnet", "mixMN_fit")}, that is a list with
the following top-level components:
\describe{
  \item{\code{call}}{
    The matched function call.
  }
  \item{\code{settings}}{
    List of main settings used in the call, including
    \code{reps}, \code{cluster_method},
    \code{covariates}, \code{exclude_from_cluster},
    \code{treat_singletons_as_excluded}, and \code{boot_what}.
  }
 \item{\code{data_info}}{
    List with information derived from the input data used for model setup:
    \code{mgm_type_level} (data frame with one row per variable, reporting
    the original R class and the inferred MGM \code{type} and \code{level},
    as used in the call to \code{mgm::mgm}),
    and \code{binary_recode_map} (named list describing the mapping from
    original binary labels to the internal \{0,1\} coding used for model fitting).
  }
  \item{\code{model}}{
    List with:
    \code{mgm} (the fitted \code{mgm} object),
    \code{nodes} (character vector of all node names),
    \code{n} (number of observations),
    \code{p} (number of variables), and
    \code{data} (if \code{save_data = TRUE}).
  }
  \item{\code{graph}}{
    List describing the graph:
    \code{igraph} (an \pkg{igraph} object built on
    \code{keep_nodes_graph}, with edge attributes
    \code{weight}, \code{abs_weight}, \code{sign} and vertex attribute
    \code{membership} for communities),
    \code{keep_nodes_graph} (nodes retained in the graph and all node-level
    metrics), and \code{keep_nodes_cluster} (nodes used for community
    detection).
  }
  \item{\code{communities}}{
    List describing community structure with:
    \code{original_membership} (integer vector of community labels on
    \code{keep_nodes_cluster}),
    \code{groups} (factor of community labels actually used for bridge
    metrics, optionally with singletons treated as excluded),
    \code{palette} (named vector of colors per community), and
    \code{boot_memberships} (list of bootstrap memberships if
    \code{"community"} is requested in \code{boot_what}, otherwise an empty
    list).
  }
  \item{\code{statistics}}{
    List with node- and edge-level summaries:
    \code{node} is a list with:
    \code{true} (data frame with one row per node in
    \code{keep_nodes_graph}, containing the node name and metrics
    \code{strength}, \code{ei1}, \code{closeness}, \code{betweenness},
    \code{bridge_strength}, \code{bridge_betweenness}, \code{bridge_closeness},
    \code{bridge_ei1}, \code{bridge_ei2}, and for nodes treated as excluded
    from communities also
    \code{bridge_strength_excluded},
    \code{bridge_betweenness_excluded},
    \code{bridge_closeness_excluded},
    \code{bridge_ei1_excluded}, \code{bridge_ei2_excluded});
    \code{boot} (list of bootstrap matrices for each metric, each of
    dimension \code{reps x length(keep_nodes_graph)}, possibly \code{NULL}
    if the metric was not requested or if \code{reps = 0}); and
    \code{quantile_region} (list of quantile regions for each node metric, one
    \code{p x 2} matrix per metric, with columns corresponding to the lower and upper
    quantile bounds implied by \code{quantile_level}, or \code{NULL} if no bootstrap was performed).

    \code{edge} is a list with:
    \code{true} (data frame with columns \code{edge} and \code{weight} for
    all unique undirected edges among \code{keep_nodes_graph});
    \code{boot} (matrix of bootstrap edge weights of dimension
    \code{n_edges x reps}); and
    \code{quantile_region} (matrix of quantile regions for edge weights,
    \code{n_edges x 2}, with columns corresponding to the lower and upper
    bootstrap quantile bounds, or \code{NULL} if \code{reps = 0}).
  }
  \item{\code{community_loadings}}{
    List containing community-loading information (based on
    \code{EGAnet::net.loads}) for later community-score computation on new
    data:
    \code{nodes}(nodes used for loadings),
    \code{wc} (integer community labels aligned with \code{nodes}),
    \code{true} (matrix of standardized loadings, nodes x communities,
    or \code{NULL} if loadings were not computed.),
    \code{boot} (list of bootstrap loading matrices, one per replication,
    or \code{NULL} if not bootstrapped),
    \code{available} (logical indicating whether loadings were computed),
    \code{reason} (character string explaining why loadings were not computed,
        or \code{NULL} if \code{available = TRUE}),
    \code{non_scorable_nodes} (character vector of nodes in the community
        subgraph that prevented loadings from being computed (e.g., categorical variables
        with >2 levels), otherwise empty).
    }
  }
}
\description{
Estimates a single layer Mixed Graphical Model (MGM) network on the original data,
using the estimation framework implemented in the \pkg{mgm} package, and performs
non-parametric bootstrap (row resampling) to compute centrality indices, bridge
metrics, clustering stability, and quantile regions for node metrics
and edge weights.
Optionally, the function computes community score loadings (for later prediction
on new data) and can bootstrap the corresponding loadings.
}
\details{
This function does \strong{not} call \code{future::plan()}. To enable
parallel bootstrap, set a plan (e.g. \code{future::plan(multisession)})
before calling \code{mixMN()}. If \code{boot_what} is \code{"none"} and
\code{reps > 0}, node-level metrics are not bootstrapped but edge-weight
bootstrap and corresponding quantile regions are still computed.
}
\examples{
data(bacteremia)
df <- bacteremia[, !names(bacteremia) \%in\% "BloodCulture"]

fit <- mixMN(
  data = df,
  lambdaSel = "EBIC",
  lambdaGam = 0.25,
  reps = 0,
  seed_model = 42,
  compute_loadings = FALSE,
  progress = FALSE
)
fit

# Plot the estimated network
set.seed(1)
plot(fit)

\donttest{
fit_b <- mixMN(
  data = df,
  lambdaSel = "EBIC",
  lambdaGam = 0.25,
  reps = 5,
  seed_model = 42,
  seed_boot =42,
  boot_what = "community",
  compute_loadings = FALSE,
  progress = FALSE
)

# Plot the membership stability
plot(fit_b, what = "stability", cutoff = 0.7)
}

}
\references{
Haslbeck, J. M. B., & Waldorp, L. J. (2020).
mgm: Estimating Time-Varying Mixed Graphical Models in High-Dimensional Data.
\emph{Journal of Statistical Software}, 93(8).
\doi{10.18637/jss.v093.i08}

Loh, P. L., & Wainwright, M. J. (2012).
Structure estimation for discrete graphicalmodels:
Generalized covariance matrices and their inverses.
\emph{NIPS}
}
