#' @title Seurat-style SVG Detection Methods
#'
#' @description
#' Detect spatially variable genes using methods implemented in Seurat,
#' including Moran's I with inverse distance weights and Mark Variogram.
#'
#' @name CalSVG_Seurat
NULL


#' Detect SVGs using Seurat Moran's I Method
#'
#' @description
#' Identifies spatially variable genes using Moran's I statistic with
#' inverse distance squared weighting, as implemented in Seurat's
#' \code{FindSpatiallyVariableFeatures} function.
#'
#' @param expr_matrix Numeric matrix of gene expression values.
#'   \itemize{
#'     \item Rows: genes
#'     \item Columns: spatial locations (spots/cells)
#'     \item Values: scaled/normalized expression (Seurat typically uses scale.data)
#'   }
#'
#' @param spatial_coords Numeric matrix of spatial coordinates.
#'   \itemize{
#'     \item Rows: spatial locations (must match columns of expr_matrix)
#'     \item Columns: x, y coordinates
#'   }
#'
#' @param weight_scheme Character string specifying the distance-based weighting.
#'   \itemize{
#'     \item \code{"inverse_squared"} (default): w_ij = 1 / d_ij^2
#'       (Seurat default, emphasizes local neighbors)
#'     \item \code{"inverse"}: w_ij = 1 / d_ij
#'       (less emphasis on close neighbors)
#'     \item \code{"gaussian"}: w_ij = exp(-d_ij^2 / (2 * bandwidth^2))
#'       (controlled by bandwidth parameter)
#'   }
#'
#' @param bandwidth Numeric. Bandwidth for Gaussian weighting.
#'   Default is NULL (auto-computed as median pairwise distance).
#'   Only used when \code{weight_scheme = "gaussian"}.
#'
#' @param adjust_method Character string for p-value adjustment.
#'   Default is "BH" (Benjamini-Hochberg).
#'
#' @param n_threads Integer. Number of parallel threads. Default is 1.
#'
#' @param verbose Logical. Print progress messages. Default is TRUE.
#'
#' @return A data.frame with SVG detection results. Columns:
#'   \itemize{
#'     \item \code{gene}: Gene identifier
#'     \item \code{observed}: Observed Moran's I statistic
#'     \item \code{expected}: Expected Moran's I under null
#'     \item \code{sd}: Standard deviation under null
#'     \item \code{p.value}: Raw p-value
#'     \item \code{p.adj}: Adjusted p-value
#'     \item \code{rank}: Rank by p-value (ascending)
#'   }
#'
#' @details
#' \strong{Method Overview:}
#'
#' This function replicates Seurat's \code{FindSpatiallyVariableFeatures}
#' with \code{selection.method = "moransi"}. The key difference from other
#' Moran's I implementations is the weighting scheme:
#'
#' \deqn{w_{ij} = \frac{1}{d_{ij}^2}}
#'
#' where d_ij is the Euclidean distance between locations i and j.
#'
#' \strong{Interpretation:}
#' \itemize{
#'   \item Uses continuous distance-based weights (not binary network)
#'   \item Emphasizes local spatial relationships
#'   \item Higher weights for closer neighbors
#' }
#'
#' \strong{Comparison with MERINGUE:}
#' \itemize{
#'   \item MERINGUE: Binary adjacency (neighbors = 1, others = 0)
#'   \item Seurat: Continuous weights (1/distance^2)
#'   \item Seurat method is more sensitive to local patterns
#' }
#'
#' @examples
#' # Load example data
#' data(example_svg_data)
#' expr <- example_svg_data$logcounts[1:20, ]
#' coords <- example_svg_data$spatial_coords
#' 
#' \donttest{
#' # Basic usage
#' results <- CalSVG_Seurat(expr, coords, verbose = FALSE)
#' head(results)
#' }
#'
#' @references
#' Hao, Y. et al. (2021) Integrated analysis of multimodal single-cell data.
#' Cell.
#'
#' Stuart, T. et al. (2019) Comprehensive Integration of Single-Cell Data.
#' Cell.
#'
#' @seealso
#' \code{\link{CalSVG}}, \code{\link{CalSVG_MERINGUE}}
#'
#' @export
CalSVG_Seurat <- function(expr_matrix,
                          spatial_coords,
                          weight_scheme = c("inverse_squared", "inverse", "gaussian"),
                          bandwidth = NULL,
                          adjust_method = "BH",
                          n_threads = 1L,
                          verbose = TRUE) {

    # Match arguments
    weight_scheme <- match.arg(weight_scheme)

    # =========================================================================
    # Input Validation
    # =========================================================================

    if (!is.matrix(expr_matrix)) {
        expr_matrix <- as.matrix(expr_matrix)
    }

    if (!is.matrix(spatial_coords)) {
        spatial_coords <- as.matrix(spatial_coords)
    }

    # Ensure matching samples
    if (is.null(colnames(expr_matrix))) {
        colnames(expr_matrix) <- paste0("spot_", seq_len(ncol(expr_matrix)))
    }
    if (is.null(rownames(spatial_coords))) {
        rownames(spatial_coords) <- colnames(expr_matrix)
    }

    common_samples <- intersect(colnames(expr_matrix), rownames(spatial_coords))
    if (length(common_samples) == 0) {
        stop("No matching samples between expr_matrix and spatial_coords")
    }

    expr_matrix <- expr_matrix[, common_samples, drop = FALSE]
    spatial_coords <- spatial_coords[common_samples, , drop = FALSE]

    n_genes <- nrow(expr_matrix)
    n_spots <- ncol(expr_matrix)

    if (verbose) {
        message("=== CalSVG_Seurat (Moran's I) ===")
        message(sprintf("  Genes: %d", n_genes))
        message(sprintf("  Spots: %d", n_spots))
        message(sprintf("  Weight scheme: %s", weight_scheme))
    }

    # =========================================================================
    # Compute Distance-based Weights
    # =========================================================================

    if (verbose) message("Computing distance matrix...")

    # Compute pairwise Euclidean distances
    pos_dist <- as.matrix(dist(spatial_coords))

    # Compute weights based on scheme
    if (weight_scheme == "inverse_squared") {
        # Seurat default: 1/d^2
        weights <- 1 / pos_dist^2
    } else if (weight_scheme == "inverse") {
        weights <- 1 / pos_dist
    } else if (weight_scheme == "gaussian") {
        if (is.null(bandwidth)) {
            # Auto bandwidth: median of non-zero distances
            bandwidth <- median(pos_dist[lower.tri(pos_dist)])
        }
        weights <- exp(-pos_dist^2 / (2 * bandwidth^2))
        if (verbose) {
            message(sprintf("  Gaussian bandwidth: %.2f", bandwidth))
        }
    }

    # Set diagonal to 0 (no self-connection)
    diag(weights) <- 0

    # Handle infinite values from 1/0
    weights[!is.finite(weights)] <- 0

    if (verbose) {
        mean_weight <- mean(weights[weights > 0])
        message(sprintf("  Mean weight: %.4f", mean_weight))
    }

    # =========================================================================
    # Compute Moran's I for Each Gene
    # =========================================================================

    if (verbose) message("Computing Moran's I for each gene...")

    # Precompute weight matrix statistics for variance calculation
    W_sum <- sum(weights)
    n <- n_spots

    # S1 and S2 for variance formula
    W_symm <- weights + t(weights)
    S1 <- 0.5 * sum(W_symm^2)
    margin_sums <- rowSums(weights) + colSums(weights)
    S2 <- sum(margin_sums^2)

    # Expected value under null
    E_I <- -1 / (n - 1)

    # Function to compute Moran's I for one gene (Seurat-style)
    compute_moran_seurat <- function(gene_idx) {
        x <- expr_matrix[gene_idx, ]

        # Center the data
        z <- x - mean(x)
        m2 <- sum(z^2)
        m4 <- sum(z^4)

        if (m2 < 1e-16) {
            # Zero variance
            return(c(observed = NA, expected = E_I, sd = NA, p.value = NA))
        }

        # Compute spatial covariance
        cv <- sum(weights * outer(z, z))

        # Moran's I
        I_obs <- (n / W_sum) * (cv / m2)

        # Variance calculation (analytical formula)
        S3 <- (m4 / n) / ((m2 / n)^2)
        S4 <- (n^2 - 3*n + 3) * S1 - n * S2 + 3 * W_sum^2
        S5 <- (n^2 - n) * S1 - 2 * n * S2 + 6 * W_sum^2

        E_I2 <- (n * S4 - S3 * S5) / ((n-1) * (n-2) * (n-3) * W_sum^2)
        V_I <- E_I2 - E_I^2
        sd_I <- sqrt(max(V_I, 0))

        # P-value (two-sided by default in ape::Moran.I)
        z_score <- (I_obs - E_I) / sd_I
        p_val <- 2 * (1 - pnorm(abs(z_score)))

        return(c(observed = I_obs, expected = E_I, sd = sd_I, p.value = p_val))
    }

    # Apply to all genes
    if (n_threads > 1 && .Platform$OS.type != "windows") {
        results_list <- parallel::mclapply(
            seq_len(n_genes),
            compute_moran_seurat,
            mc.cores = n_threads
        )
    } else {
        if (verbose && n_genes > 100) {
            pb <- txtProgressBar(min = 0, max = n_genes, style = 3)
        }

        results_list <- lapply(seq_len(n_genes), function(i) {
            result <- compute_moran_seurat(i)
            if (verbose && n_genes > 100) setTxtProgressBar(pb, i)
            return(result)
        })

        if (verbose && n_genes > 100) close(pb)
    }

    # =========================================================================
    # Compile Results
    # =========================================================================

    results_matrix <- do.call(rbind, results_list)

    results <- data.frame(
        gene = rownames(expr_matrix),
        observed = results_matrix[, "observed"],
        expected = results_matrix[, "expected"],
        sd = results_matrix[, "sd"],
        p.value = results_matrix[, "p.value"],
        stringsAsFactors = FALSE
    )

    # Adjust p-values
    results$p.adj <- p.adjust(results$p.value, method = adjust_method)

    # Sort by p-value then by absolute Moran's I (Seurat default ordering)
    results <- results[order(results$p.value, -abs(results$observed)), ]
    results$rank <- seq_len(nrow(results))
    rownames(results) <- NULL

    # =========================================================================
    # Summary
    # =========================================================================

    if (verbose) {
        n_sig_raw <- sum(results$p.value < 0.05, na.rm = TRUE)
        n_sig_adj <- sum(results$p.adj < 0.05, na.rm = TRUE)
        message(sprintf("  Significant (p < 0.05): %d raw, %d adjusted",
                       n_sig_raw, n_sig_adj))
        message("=== Done ===")
    }

    return(results)
}


#' Detect SVGs using Mark Variogram Method
#'
#' @description
#' Identifies spatially variable genes using the mark variogram approach,
#' as implemented in Seurat's \code{FindSpatiallyVariableFeatures} function
#' with \code{selection.method = "markvariogram"}.
#'
#' @param expr_matrix Numeric matrix of gene expression values.
#' @param spatial_coords Numeric matrix of spatial coordinates.
#' @param r_metric Numeric. Distance at which to evaluate the variogram.
#'   Default is 5. Larger values capture broader spatial patterns.
#' @param normalize Logical. Whether to normalize the variogram. Default is TRUE.
#' @param n_threads Integer. Number of parallel threads. Default is 1.
#' @param verbose Logical. Print progress messages. Default is TRUE.
#'
#' @return A data.frame with SVG detection results. Columns:
#'   \itemize{
#'     \item \code{gene}: Gene identifier
#'     \item \code{r.metric.X}: Variogram value at distance r_metric
#'     \item \code{rank}: Rank by variogram value (ascending, lower = more spatially variable)
#'   }
#'
#' @details
#' \strong{Method Overview:}
#'
#' The mark variogram measures how the correlation between gene expression
#' values changes with distance. It is computed using the spatstat package's
#' \code{markvario} function.
#'
#' \strong{Interpretation:}
#' \itemize{
#'   \item Lower variogram values indicate stronger spatial autocorrelation
#'   \item Values near 1 indicate random spatial distribution
#'   \item Values < 1 indicate positive spatial autocorrelation (clustering)
#' }
#'
#' \strong{Note:} Requires the \code{spatstat} package suite to be installed:
#' \code{spatstat.geom} and \code{spatstat.explore}.
#'
#' @examples
#' \donttest{
#' # Load example data
#' data(example_svg_data)
#' expr <- example_svg_data$logcounts[1:5, ]
#' coords <- example_svg_data$spatial_coords
#' 
#' # Requires spatstat packages
#' if (requireNamespace("spatstat.geom", quietly = TRUE) &&
#'     requireNamespace("spatstat.explore", quietly = TRUE)) {
#'     results <- CalSVG_MarkVario(expr, coords, verbose = FALSE)
#'     head(results)
#' }
#' }
#'
#' @references
#' Baddeley, A. et al. (2015) Spatial Point Patterns: Methodology and
#' Applications with R. Chapman and Hall/CRC.
#'
#' @seealso
#' \code{\link{CalSVG_Seurat}}
#'
#' @export
CalSVG_MarkVario <- function(expr_matrix,
                              spatial_coords,
                              r_metric = 5,
                              normalize = TRUE,
                              n_threads = 1L,
                              verbose = TRUE) {

    # Check for spatstat
    if (!requireNamespace("spatstat.geom", quietly = TRUE) ||
        !requireNamespace("spatstat.explore", quietly = TRUE)) {
        stop(
            "Mark variogram method requires spatstat packages.\n",
            "Please install with:\n",
            "  install.packages(c('spatstat.geom', 'spatstat.explore'))"
        )
    }

    # =========================================================================
    # Input Validation
    # =========================================================================

    if (!is.matrix(expr_matrix)) {
        expr_matrix <- as.matrix(expr_matrix)
    }

    if (!is.matrix(spatial_coords)) {
        spatial_coords <- as.matrix(spatial_coords)
    }

    # Ensure matching samples
    if (is.null(colnames(expr_matrix))) {
        colnames(expr_matrix) <- paste0("spot_", seq_len(ncol(expr_matrix)))
    }
    if (is.null(rownames(spatial_coords))) {
        rownames(spatial_coords) <- colnames(expr_matrix)
    }

    common_samples <- intersect(colnames(expr_matrix), rownames(spatial_coords))
    expr_matrix <- expr_matrix[, common_samples, drop = FALSE]
    spatial_coords <- spatial_coords[common_samples, , drop = FALSE]

    n_genes <- nrow(expr_matrix)
    n_spots <- ncol(expr_matrix)

    if (verbose) {
        message("=== CalSVG_MarkVario ===")
        message(sprintf("  Genes: %d", n_genes))
        message(sprintf("  Spots: %d", n_spots))
        message(sprintf("  r.metric: %d", r_metric))
    }

    # =========================================================================
    # Create Point Pattern Object
    # =========================================================================

    if (verbose) message("Creating point pattern object...")

    x_coords <- spatial_coords[, 1]
    y_coords <- spatial_coords[, 2]

    # Create ppp object
    pp <- spatstat.geom::ppp(
        x = x_coords,
        y = y_coords,
        xrange = range(x_coords),
        yrange = range(y_coords)
    )

    # =========================================================================
    # Compute Mark Variogram
    # =========================================================================

    if (verbose) message("Computing mark variograms...")

    # Function to compute variogram for one gene
    compute_vario <- function(gene_idx) {
        # Set marks for this gene
        pp_temp <- pp
        spatstat.geom::marks(pp_temp) <- expr_matrix[gene_idx, ]

        tryCatch({
            mv <- spatstat.explore::markvario(X = pp_temp, normalise = normalize)
            return(mv)
        }, error = function(e) {
            return(NULL)
        })
    }

    # Compute for all genes
    if (n_threads > 1 && .Platform$OS.type != "windows") {
        mv_list <- parallel::mclapply(
            seq_len(n_genes),
            compute_vario,
            mc.cores = n_threads
        )
    } else {
        if (verbose && n_genes > 50) {
            pb <- txtProgressBar(min = 0, max = n_genes, style = 3)
        }

        mv_list <- lapply(seq_len(n_genes), function(i) {
            result <- compute_vario(i)
            if (verbose && n_genes > 50) setTxtProgressBar(pb, i)
            return(result)
        })

        if (verbose && n_genes > 50) close(pb)
    }

    names(mv_list) <- rownames(expr_matrix)

    # =========================================================================
    # Extract r.metric Values
    # =========================================================================

    if (verbose) message("Extracting variogram statistics...")

    # Function to get value at r_metric
    get_r_metric <- function(mv, r_metric) {
        if (is.null(mv)) return(NA)

        # Find closest r value
        r_vals <- mv$r
        idx <- which.min(abs(r_vals - r_metric))
        return(mv$trans[idx])
    }

    r_metric_vals <- sapply(mv_list, get_r_metric, r_metric = r_metric)

    # =========================================================================
    # Create Results
    # =========================================================================

    results <- data.frame(
        gene = rownames(expr_matrix),
        r.metric.value = r_metric_vals,
        stringsAsFactors = FALSE
    )

    # Sort by variogram value (ascending - lower = more spatially variable)
    results <- results[order(results$r.metric.value, na.last = TRUE), ]
    results$rank <- seq_len(nrow(results))
    rownames(results) <- NULL

    # =========================================================================
    # Summary
    # =========================================================================

    if (verbose) {
        n_below_1 <- sum(results$r.metric.value < 1, na.rm = TRUE)
        message(sprintf("  Genes with variogram < 1: %d", n_below_1))
        message("=== Done ===")
    }

    return(results)
}
