#' Generate Synthetic Group Factor Model Data
#'
#' @description
#' Generates synthetic time series data with a multi-group factor structure,
#' along with associated covariates. Useful for Monte Carlo simulation.
#' the \code{\link{FACT}} and \code{\link{COR}} algorithms.
#'
#' @param seed Integer. Random seed for reproducibility. Default: \code{1}.
#' @param T Integer. Number of time periods (rows in \code{Y}). Default: \code{100}.
#' @param N Integer vector of length \code{M}. Number of time series per group,
#'   such that \code{sum(N)} equals the total number of series.
#'   Default: \code{c(100, 100, 100, 100)}.
#' @param r0 Integer. Number of global factors shared across all groups.
#'   Default: \code{2}.
#' @param r Integer vector of length \code{M}. Number of local (group-specific)
#'   factors for each group. Default: \code{c(2, 2, 2, 2)}.
#' @param M Integer. Number of groups. Default: \code{4}.
#' @param sigma Numeric. Standard deviation of the idiosyncratic noise.
#'   Default: \code{1}.
#' @param p Integer. Number of covariates (columns in \code{X}). Default: \code{10}.
#' @param mu Numeric. Controls separation between group covariate distributions
#'   when \code{type_X = "Gaussian"}. Larger values yield better-separated groups.
#'   Default: \code{3}.
#' @param type_X Character. Distribution for generating covariates:
#'   \describe{
#'     \item{\code{"Uniform"}}{Groups differ by support on the real line (default).}
#'     \item{\code{"Gaussian"}}{Groups differ by mean shifts.}
#'   }
#' @param type_F Character. Correlation structure for local factors:
#'   \describe{
#'     \item{\code{"Independent"}}{Local factors are independent across groups (default).
#'       Each follows an AR(1) process.}
#'     \item{\code{"Correlated"}}{Local factors share a common correlation structure
#'       across groups.}
#'   }
#' @param type_noise Character. Distribution for idiosyncratic errors:
#'   \describe{
#'     \item{\code{"Gaussian"}}{Normal errors (default).}
#'     \item{\code{"t3"}}{Heavy-tailed errors from a t-distribution with 3 degrees
#'       of freedom, scaled to have the same variance.}
#'   }
#'
#' @return A list containing:
#'   \describe{
#'     \item{\code{Y}}{A \eqn{T \times N} numeric matrix of time series,
#'       where \eqn{N = \sum N_m}.}
#'     \item{\code{X}}{A \eqn{N \times p} numeric matrix of covariates.}
#'     \item{\code{G}}{The \eqn{T \times r_0} matrix of true global factors.}
#'     \item{\code{r0}}{Number of global factors.}
#'     \item{\code{r}}{Vector of local factor counts per group.}
#'     \item{\code{group}}{Integer vector of length \eqn{N} indicating
#'       true group membership (values 1 through \code{M}).}
#'   }
#'
#' @details
#' The data generating process follows a group factor model:
#' \deqn{Y_m = G \Lambda_m' + F_m \Gamma_m' + E_m, \quad m = 1, \ldots, M}
#'
#' where:
#' \itemize{
#'   \item \eqn{G}: \eqn{T \times r_0} matrix of global factors (shared across groups)
#'   \item \eqn{\Lambda_m}: \eqn{N_m \times r_0} global factor loadings for group \eqn{m}
#'   \item \eqn{F_m}: \eqn{T \times r_m} matrix of local factors for group \eqn{m}
#'   \item \eqn{\Gamma_m}: \eqn{N_m \times r_m} local factor loadings for group \eqn{m}
#'   \item \eqn{E_m}: \eqn{T \times N_m} idiosyncratic error matrix
#' }
#'
#' Both global and local factors follow AR(1) processes with coefficient 0.5.
#' Factor loadings are drawn from standard normal distributions.
#'
#' @note
#' The default covariate generation (\code{type_X = "Uniform"} or \code{"Gaussian"})
#' assumes \code{M = 4} groups with a specific hierarchical structure:
#' groups 1-2 vs 3-4 are separated by the first covariate, and within each pair,
#' groups are separated by additional covariates.
#'
#' @seealso
#' \code{\link{FACT}} for building factor-augmented clustering trees,
#' \code{\link{COR}} for correlation-based clustering.
#'
#' @examples
#' data <- gendata(seed = 123, T = 200, N = c(100, 50, 50, 200), r0 = 1, r = c(2, 2, 2, 3), M = 4)
#' Y <- data$Y
#' X <- data$X
#' @export
#' @importFrom stats rnorm rt runif
#' @importFrom mvtnorm rmvnorm
#'
gendata <- function(seed = 1, T = 100, N = c(100, 100, 100, 100),
                    r0 = 2, r = c(2, 2, 2, 2),
                    M = 4, sigma = 1, p = 10, mu = 3,
                    type_F = "Independent",
                    type_X = "Uniform",
                    type_noise = "Gaussian"){
  set.seed(seed)

  if (type_X %in% c("Gaussian", "Uniform") && M != 4) {
    warning("type_X 'Gaussian' and 'Uniform' are designed for M=4 groups. Results may be unexpected.")
  }

  G0 = matrix(rnorm(T*r0), T, r0)
  for(i in 2:T){
    G0[i, ] = 0.5*G0[i-1, ] + rnorm(r0)
  }
  F0 = list()
  if(type_F == "Independent"){
    for(m in 1:M){
      rm = r[m]
      Fm = matrix(rnorm(T*rm), T, rm)
      for(i in 2:T){
        Fm[i, ] = 0.5*Fm[i-1, ] + rnorm(rm)
      }
      F0[[m]] = Fm
    }
  }else if(type_F == "Correlated"){
    r_sum = sum(r)
    Omega = matrix(0.4, r_sum, r_sum)
    diag(Omega) = 1
    F_all = rmvnorm(n = T, sigma = Omega)
    dim_1 = c(1, 1 + cumsum(r))[1:M]
    dim_2 = cumsum(r)
    for (m in 1:M) {
      F0[[m]] = F_all[, dim_1[m]:dim_2[m], drop = FALSE]
    }
  } else {
    stop("wrong type of the local factors")
  }

  E0 = list()
  for(m in 1:M){
    Nm = N[m]
    if(type_noise == "Gaussian") {
      Em = matrix(rnorm(T*Nm, sd = sigma), T, Nm) # normal error
    } else if(type_noise == "t3"){
      Em = matrix(rt(T*Nm, df = 3), T, Nm)*sigma/sqrt(3)
    } else{
      stop("Wrong input of the error type")
    }
    Pm = svd(matrix(rnorm(T*T), T, T))$u
    Qm = svd(matrix(rnorm(Nm*Nm), Nm, Nm))$u
    E0[[m]] = Pm %*% Em %*% Qm
  }
  Y = list()
  for(m in 1:M){
    Nm = N[m]
    rm = r[m]
    Lambdam = matrix(rnorm(Nm*r0), Nm, r0)
    Gammam = matrix(rnorm(Nm*rm), Nm, rm)
    Y[[m]] = G0 %*% t(Lambdam) + F0[[m]] %*% t(Gammam) + E0[[m]]
  }
  Y = do.call("cbind", Y)
  NN = sum(N)
  group = rep(1:M, times = N)
  if(type_X == "Gaussian"){
    X = matrix(rnorm(NN*p, sd = mu), NN, p)
    X[which(group %in% c(1, 2)), 1] = rnorm(sum(N[1:2]), -mu, 1)
    X[which(group %in% c(3, 4)), 1] = rnorm(sum(N[3:4]), mu, 1)
    X[which(group == 1), 2] = rnorm(N[1], -mu, 1)
    X[which(group == 2), 2] = rnorm(N[2], mu, 1)
    X[which(group == 3), 3] = rnorm(N[3], -mu, 1)
    X[which(group == 4), 3] = rnorm(N[4], mu, 1)
  } else if(type_X == "Uniform"){
    X = matrix(runif(NN*p, -1, 1), NN, p)
    X[which(group %in% c(1, 2)), 1] = runif(sum(N[1:2]), -1, 0)
    X[which(group %in% c(3, 4)), 1] = runif(sum(N[3:4]), 0, 1)
    X[which(group == 1), 2] = runif(N[1], -1, 0)
    X[which(group == 2), 2] = runif(N[2], 0, 1)
    X[which(group == 3), 3] = runif(N[3], -1, 0)
    X[which(group == 4), 3] = runif(N[4], 0, 1)
  } else{
    stop("Wrong input of the type X")
  }
  colnames(X) = paste0("X", 1:ncol(X))
  list(Y = Y, X = X, G0 = G0, r0 = r0, r = r, group = group)
}



