## ----gene_analysis, echo=TRUE, message=FALSE, warning=FALSE------------------- library(DPComb) # Load built-in data #data(case_control) data("case_control", package = "DPComb") # Group markers: gene1 contains marker1 to marker5; gene2 contains marker6 to marker15. markers_gene1 <- paste0("marker", 1:5) markers_gene2 <- paste0("marker", 6:15) #' Function: perform_comb_test #' #' This function conducts a combination test on a specified set of genetic markers in #' a case-control study. It computes the number of mutations in cases (xs) and #' performs two tests: #' 1. X-value based test using a hypergeometric model with parameters constructed #' from the data. #' 2. Fisher's exact test based combination, where p-values derived from Fisher's exact #' test are combined. #' #' @param markers A character vector of marker names. #' @param method (Optional) A string specifying the combination method. Default is #' "fisher_mean". Supported methods include "fisher_mean", "fisher_median", #' "pearson", "edgington", "stouffer", and "george". #' @param side (Optional) A string indicating the tail option for converting x values to #' p-values. Default is "two". Options are "two" (two-tailed), "left", or "right". #' Ensure consistency with your test specifications. #' #' @return A list containing two elements: #' xs_test: The result from the combination test using mutation counts (xs). #' ps_test: The result from the combination test using Fisher's p-values. #' perform_comb_test <- function(markers, Data, method = "fisher_mean", side = "two") { ##Method 1: Based on mutation counts and null distribution descriptions. # Extract disease status disease_status <- Data$disease_status # Calculate xs: number of mutations (1's) in cases for each marker xs <- sapply(markers, function(m) sum(Data[disease_status == 1, m])) # Total cases and controls total_cases <- sum(disease_status == 1) total_controls <- sum(disease_status == 0) # Construct x_distn_params for hyper distribution for each marker: # m = number of cases, n = number of controls, k = total mutations in the marker. x_distn_params <- lapply(markers, function(m) { k <- sum(Data[, m]) list(distn = "hyper", m = total_cases, n = total_controls, k = k) }) # Combination test using xs and x_distn_params: res_xs <- DPComb_tests(xs = xs, side = side, x_distn_params = x_distn_params, method = method) ##Method 2: Based on Fisher's exact test p-values and x_distn_params. # Compute p-values from Fisher's exact test for each marker. # define alternative as "two.sided", "greater", or "less" correspond to side # being "two", "right", or "left". alternative <- switch(side, "two" = "two.sided", "right" = "greater", "left" = "less") ps <- sapply(markers, function(m) { tbl <- table(Data$disease_status, Data[, m]) fisher.test(tbl, alternative = alternative)$p.value }) ps[ps > 1] <- 1 # Ensure p-values are within [0, 1]. #Results from Fisher's test may exceed 1 slightly. res_ps <- DPComb_tests(ps = ps, side = side, x_distn_params = x_distn_params, method = method) list(xs_test = res_xs, ps_test = res_ps) } # Perform tests for gene1 and gene2. # Use default method "fisher_mean" and side "two". result_gene1 <- perform_comb_test(markers_gene1, Data=case_control) result_gene2 <- perform_comb_test(markers_gene2, Data=case_control) # Print the results. cat("Gene 1 Combination Test Results (using xs):\n") print(result_gene1$xs_test) cat("\nGene 1 Combination Test Results (using Fisher's p-values):\n") print(result_gene1$ps_test) cat("\nGene 2 Combination Test Results (using xs):\n") print(result_gene2$xs_test) cat("\nGene 2 Combination Test Results (using Fisher's p-values):\n") print(result_gene2$ps_test) ## === Test all methods and sides === methods <- c("fisher_mean", "fisher_median", "pearson", "edgington", "stouffer", "george") sides <- c("two", "right", "left") # Initialize a list to store results results <- list() # Perform tests for each `method` and `side` option, and store the results for (method in methods) { for (side in sides) { result_gene1 <- perform_comb_test(markers_gene1, Data=case_control, method = method, side = side) result_gene2 <- perform_comb_test(markers_gene2, Data=case_control, method = method, side = side) # Store results in the list results[[paste(method, side, sep = "_")]] <- list( gene1_xs = result_gene1$xs_test, gene1_ps = result_gene1$ps_test, gene2_xs = result_gene2$xs_test, gene2_ps = result_gene2$ps_test ) } } # Convert results to data frames for better visualization results_gene1_df <- do.call(rbind, lapply(names(results), function(name) { res <- results[[name]] data.frame( Method_Side = name, XS_Sn = round(res$gene1_xs$Sn, 2), XS_pval = round(res$gene1_xs$pval, 4), PS_Sn = round(res$gene1_ps$Sn, 2), PS_pval = round(res$gene1_ps$pval, 4) ) })) results_gene2_df <- do.call(rbind, lapply(names(results), function(name) { res <- results[[name]] data.frame( Method_Side = name, XS_Sn = round(res$gene2_xs$Sn, 2), XS_pval = round(res$gene2_xs$pval, 4), PS_Sn = round(res$gene2_ps$Sn, 2), PS_pval = round(res$gene2_ps$pval, 4) ) })) # Print the results in table format knitr::kable(results_gene1_df, caption = "Combination Test Results for Gene 1") knitr::kable(results_gene2_df, caption = "Combination Test Results for Gene 2") ## ----gene1_analysis, echo=TRUE, message=FALSE, warning=FALSE------------------ library(DPComb) data("case_control", package = "DPComb") # Analyze gene1 using test_case_control_fisher test_case_control_fisher(Data = case_control, response = "disease_status", covariates = paste0("marker", 1:5), method = "fisher_mean", alternative = "two.sided")