## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----------------------------------------------------------------------------- # library(robustbase) # To fit a robust regression. # library(parallel) # To use mclapply() when reestimating the association matrix. # library(dplyr) # For the convenience of tabulating p-values, coefficients, and q-values. # library(fdrtool) # For false discovery rate control. # library(gtools) # To estimate an association matrix via SparCC. ## ----setup-------------------------------------------------------------------- library(SOHPIE) ## ----------------------------------------------------------------------------- set.seed(20050505) data(combinedamgut) # A complete data containing columns with taxa and clinical covariates. ## ----------------------------------------------------------------------------- # Note: Again, we will use a toy example with the first 30 out of 138 taxa. OTUtab = combinedamgut[ , 8:37] # Clinical/demographic covariates (phenotypic data): # Note: All of these covariates in phenodat below will be included in the regression # when you use SOHPIE_DNA function later. Please make sure # phenodat below include variables that will be analyzed only. phenodat = combinedamgut[, 1:7] # first column is ID, so not using it. ## ----------------------------------------------------------------------------- # Obtain indices of each grouping factor. # In this example, a variable indicating the status of living with a dog was chosen (i.e. bin_dog). # Accordingly, Groups A and B imply living without and with a dog, respectively. newindex_grpA = which(combinedamgut$bin_dog == 0) newindex_grpB = which(combinedamgut$bin_dog == 1) ## ----------------------------------------------------------------------------- SOHPIEres <- SOHPIE_DNA(OTUdat = OTUtab, clindat = phenodat, groupA = newindex_grpA, groupB = newindex_grpB, c = 0.5) ## ----------------------------------------------------------------------------- # qval() function will get you a table with q-values. qval(SOHPIEres) ## ----------------------------------------------------------------------------- # Create an object to keep the table with q-values. qvaltab <- qval(SOHPIEres) # Retrieve a vector of q-values for a single variable of interest. qval_specific_var(qvaltab = qvaltab, varname = "bin_dog") ## ----------------------------------------------------------------------------- # Please do NOT forget to provide the name of variable in DCtaxa_tab(groupvar = ) # and the level of significance (0.3 in this example). DCtaxa_tab <- DCtaxa_tab(qvaltab = qvaltab, groupvar = "bin_dog", alpha = 0.3) DCtaxa_tab ## ----------------------------------------------------------------------------- data(combineddietswap) OTUtab = combineddietswap[ , 5:ncol(combineddietswap)] phenodat = combineddietswap[ , 1:4] # first column is ID, so not using it. ## ----------------------------------------------------------------------------- # Obtain indices for each groups # (i.e. African-Americans from Pittsburgh (AAM) vs. Africans from rural South Africa (AFR)) # at baseline (time = 1) and at 29-days (time = 6) # Group A1 for AAM at baseline. newindex_A1 = which(combineddietswap$timepoint == 1 & combineddietswap$nationality == "AAM") # Group A6 for AAM at 29-days. newindex_A6 = which(combineddietswap$timepoint == 6 & combineddietswap$nationality == "AAM") # Group B1 for AFR at baseline. newindex_B1 = which(combineddietswap$timepoint == 1 & combineddietswap$nationality == "AFR") # Group A6 for AFR at 29-days. newindex_B6 = which(combineddietswap$timepoint == 6 & combineddietswap$nationality == "AFR") ## ----------------------------------------------------------------------------- est_asso_matA1 = asso_mat(OTUdat=OTUtab, group=newindex_A1) est_asso_matA6 = asso_mat(OTUdat=OTUtab, group=newindex_A6) est_asso_matB1 = asso_mat(OTUdat=OTUtab, group=newindex_B1) est_asso_matB6 = asso_mat(OTUdat=OTUtab, group=newindex_B6) ## For each group, we take the difference of estimated ## association matrices between time points (29-days minus baseline). asso_mat_diffA61 = est_asso_matA6$assomat - est_asso_matA1$assomat asso_mat_diffB61 = est_asso_matB6$assomat - est_asso_matB1$assomat ## Similarly, for each group, we take the difference of re-estimated ## association matrices between time points (29-days minus baseline). asso_mat_drop_diffA61 = mapply('-', est_asso_matA6$reest.assomat, est_asso_matA1$reest.assomat, SIMPLIFY=FALSE) asso_mat_drop_diffB61 = mapply('-', est_asso_matB6$reest.assomat, est_asso_matB1$reest.assomat, SIMPLIFY=FALSE) ## ----------------------------------------------------------------------------- ## For changes in network centrality between association matrices estimated from the whole data. thetahat_grpA = thetahats(asso_mat_diffA61) thetahat_grpB = thetahats(asso_mat_diffB61) ## For changes in network centrality between association matrices ## re-estimated from the leave-one-out sample. thetahat_drop_grpA = sapply(asso_mat_drop_diffA61, thetahats) thetahat_drop_grpB = sapply(asso_mat_drop_diffB61, thetahats) ## ----------------------------------------------------------------------------- # Sample sizes for each group. n_A <- length(newindex_A1) n_B <- length(newindex_B1) # Jackknife pseudo-values. thetatilde_grpA = thetatildefun(thetahat_grpA, thetahat_drop_grpA, n_A) thetatilde_grpB = thetatildefun(thetahat_grpB, thetahat_drop_grpB, n_B) thetatilde = rbind(thetatilde_grpA, thetatilde_grpB) ## ----------------------------------------------------------------------------- fitmod = pseudoreg(pseudoval=thetatilde, clindat=phenodat, c=0.5) ## ----------------------------------------------------------------------------- summary.result = pseudoreg.summary(pseudo.reg.res=fitmod, taxanames=colnames(OTUtab)) ## In this study, the main grouping variable was nationality. # We can use qval_specific_var to see the q-values only. # of nationality. Alternatively, we further use DCtaxa_tab to see # the DC taxa with their p-values. qval_specific_var(summary.result$q_values, varname = "nationalityAFR") DCtaxa_tab(summary.result$q_values, groupvar = "nationalityAFR", alpha=0.05)