## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----eval = FALSE------------------------------------------------------------- # # if (!require("remotes", quietly = TRUE)) # # install.packages("remotes") # # # # remotes::install_github("statdivlab/radEmu") ## ----setup, message = FALSE--------------------------------------------------- library(magrittr) library(dplyr) library(ggplot2) library(stringr) library(radEmu) ## ----message = FALSE---------------------------------------------------------- library(parallel) ## ----------------------------------------------------------------------------- # load in sample data data("wirbel_sample") # set group to be a factor with levels CTR for control and CRC for cancer wirbel_sample$Group <- factor(wirbel_sample$Group, levels = c("CTR","CRC")) # load in abundance data data("wirbel_otu") # save mOTU names mOTU_names <- colnames(wirbel_otu) # consider taxa in the following genera chosen_genera <- c("Eubacterium", "Faecalibacterium", "Fusobacterium", "Porphyromonas") # get taxonomy information from mOTU names mOTU_name_df <- data.frame(name = mOTU_names) %>% mutate(base_name = stringr::str_remove(mOTU_names, "unknown ") %>% stringr::str_remove("uncultured ")) %>% mutate(genus_name = stringr::word(base_name, 1)) # restrict to names in chosen genera restricted_mOTU_names <- mOTU_name_df %>% filter(genus_name %in% chosen_genera) %>% pull(name) # pull out observations from a chinese study within the meta-analysis ch_study_obs <- which(wirbel_sample$Country %in% c("CHI")) # make count matrix for chosen samples and genera small_Y <- wirbel_otu[ch_study_obs, restricted_mOTU_names] # check for samples with only zero counts sum(rowSums(small_Y) == 0) # no samples have a count sum of 0 # check for genera with only zero counts sum(colSums(small_Y) == 0) # one category has a count sum of 0 # remove the one genus with only zero counts category_to_rm <- which(colSums(small_Y) == 0) small_Y <- small_Y[, -category_to_rm] ## ----------------------------------------------------------------------------- ch_fit <- emuFit(formula = ~ Group, data = wirbel_sample[ch_study_obs, ], Y = small_Y, run_score_tests = FALSE) ## ----------------------------------------------------------------------------- mOTU_to_test <- which(str_detect(restricted_mOTU_names, "7116")) ch_fit$B %>% rownames covariate_to_test <- which("GroupCRC" == ch_fit$B %>% rownames) robust_score <- emuFit(formula = ~ Group, data = wirbel_sample[ch_study_obs, ], fitted_model = ch_fit, refit = FALSE, test_kj = data.frame(k = covariate_to_test, j = mOTU_to_test), Y = small_Y) robust_score$coef$pval[mOTU_to_test] ## ----eval = FALSE------------------------------------------------------------- # ncores <- parallel::detectCores() - 1 # ncores ## ----eval = FALSE------------------------------------------------------------- # emuTest <- function(category) { # score_res <- emuFit(formula = ~ Group, # data = wirbel_sample[ch_study_obs, ], # fitted_model = ch_fit, # refit = FALSE, # test_kj = data.frame(k = covariate_to_test, # j = category), # Y = small_Y) # return(score_res) # } ## ----eval = FALSE------------------------------------------------------------- # if (.Platform$OS.type != "windows" & !identical(Sys.getenv("GITHUB_ACTIONS"), "true")) { # # run if we are on a Mac or Linux machine # score_res <- mclapply(1:5, # emuTest, # mc.cores = ncores) # } else { # # don't run if we are on a Windows machine, or if testing with GitHub actions # score_res <- NULL # } ## ----eval = FALSE------------------------------------------------------------- # if (!is.null(score_res)) { # c(score_res[[1]]$coef$pval[1], ## robust score test p-value for the first taxon # score_res[[2]]$coef$pval[2]) ## robust score test p-value for the second taxon # } ## ----eval = FALSE------------------------------------------------------------- # if (!is.null(score_res)) { # full_score <- sapply(1:length(score_res), # function(x) score_res[[x]]$coef$score_stat[x]) # full_pval <- sapply(1:length(score_res), # function(x) score_res[[x]]$coef$pval[x]) # full_coef <- ch_fit$coef %>% # dplyr::select(-score_stat, -pval) %>% # filter(category_num %in% 1:5) %>% # mutate(score_stat = full_score, # pval = full_pval) # full_coef # }