## ----initialsetup, include=FALSE---------------------------------------------- knitr::opts_chunk$set(cache=FALSE) ## ----eval=FALSE--------------------------------------------------------------- # library(data.table) # setwd("/mnt/scratch/quantgen/TENSOR_EVD/pipeline") # # PHENO <- read.csv("source/PHENO.csv") # GENO <- fread("source/GENO.csv", data.table=FALSE) # ECOV <- read.csv("source/ECOV.csv", row.names=1) # # # Select North region # PHENO <- PHENO[PHENO$region %in% 'North',] # PHENO$year_loc <- factor(as.character(PHENO$year_loc)) # PHENO$genotype <- factor(as.character(PHENO$genotype)) # # save(PHENO, file="data/pheno.RData") # # # Calculate the GRM # ID <- GENO[,1] # GENO <- as.matrix(GENO[,-1]) # rownames(GENO) <- ID # X <- scale(GENO, center=TRUE, scale=FALSE) # KG <- tcrossprod(X) # KG <- KG[levels(PHENO$genotype),levels(PHENO$genotype)] # KG <- KG/mean(diag(KG)) # # save(KG, file="data/GRM.RData") # # # Calculate the ERM # ECOV <- ECOV[,-grep("HI30_",colnames(ECOV))] # # KE <- tcrossprod(scale(ECOV)) # KE <- KE[levels(PHENO$year_loc),levels(PHENO$year_loc)] # KE <- KE/mean(diag(KE)) # # save(KE, file="data/ERM.RData") ## ----eval=FALSE--------------------------------------------------------------- # JOBS <- expand.grid(nG = c(100,500,1000), # nE = c(10,30,50), # n = c(10000,20000,30000), # alpha = c(0.90,0.95,0.98), # replicate = 1:10) # dim(JOBS); head(JOBS) # #[1] 810 5 # # nG nE n alpha replicate # #1 100 10 10000 0.9 1 # #2 500 10 10000 0.9 1 # #3 1000 10 10000 0.9 1 # #4 100 30 10000 0.9 1 # #5 500 30 10000 0.9 1 # #6 1000 30 10000 0.9 1 # # save(JOBS, file="/mnt/scratch/quantgen/TENSOR_EVD/pipeline/parms/JOBS1.RData") ## ----eval=TRUE---------------------------------------------------------------- out <- read.csv(url("https://raw.githubusercontent.com/MarcooLopez/tensorEVD/main/inst/extdata/results_simulation.txt")) head(out[,1:7]) head(out[,8:12]) # results from the eigen function head(out[,13:17]) # results from the tensorEVD function ## ----eval=T------------------------------------------------------------------- source("https://raw.githubusercontent.com/MarcooLopez/tensorEVD/main/misc/functions.R") ## ----eval=T, fig.width = 7, fig.height = 5, fig.align='center'---------------- # Some data edits out$alpha <- factor(100*out$alpha) out$nG <- paste0("n[G]*' = '*",out$nG,"L") out$nE <- paste0("n[E]*' = '*",out$nE,"L") out$nG <- factor(out$nG, levels = unique(out$nG)) out$nE <- factor(out$nE, levels = unique(out$nE)) # Reshaping the data measure <- c("time","Frobenius","CMD","nPC","pPC") dat <- melt_data(out, id=c("nG","nE","n","alpha"), measure=paste0(measure,"_"), value.name=measure, variable.name="method") color1 <- c('90%'="navajowhite2", '95%'="chocolate1", '98%'="red4") color2 <- c(eigen="#E69F00", tensorEVD="#009E73", eigs="#56B4E9", trlan="#CC79A7", chol="#D55E00") # Figure 1: Computation time ratio (eigen/tensorEVD) dat0 <- out[out$alpha != "100",] dat0$alpha <- factor(paste0(dat0$alpha,"%")) dat0$ratio <- log10(dat0$time_eigen/dat0$time_tensorEVD) dat0$n <- dat0$n/1000 breaks0 <- seq(1,4,by=1) figure1 <- make_plot(dat0, type="line", x='n', y='ratio', group="alpha", group.label=NULL, facet="nG", facet2="nE", facet.type="grid", xlab="Sample size (x1000)", ylab="Computation time ratio (eigen/tensorEVD)", group.color=color1, nSD=0, errorbar.size=0, breaks.y=breaks0, labels.y=sprintf("%.f",10^breaks0), scales="fixed") #print(figure1) ## ----eval=T, fig.width = 7, fig.height = 5, fig.align='center'---------------- dat0 <- dat[dat$method %in% c("eigen","tensorEVD") & dat$alpha!="100",] dat0$method <- factor(as.character(dat0$method)) dat0$alpha <- factor(as.character(dat0$alpha)) # Figure 2: Approximation accuracy using Frobenious norm figure2 <- make_plot(dat0, x='alpha', y='Frobenius', group="method", by="n", facet="nG", facet2="nE", facet.type="grid", xlab=bquote(alpha~"x100% of variance of K"), ylab=expression("Frobenius norm ("~abs(abs(K-hat(K)))[F]~")"), by.label="Sample size", breaks.y=seq(0,500,by=100), group.color=color2, rect.by.height=-0.05, ylim=c(0,NA)) #print(figure2) ## ----eval=T, fig.width = 7, fig.height = 5, fig.align='center'---------------- figure3 <- make_plot(dat0, x='alpha', y='pPC', group="method", by="n", facet="nG", facet2="nE", facet.type="grid", xlab=bquote(alpha~"x100% of variance of K"), ylab="Number of eigenvectors/rank", by.label="Sample size", group.color=color2, rect.by.height=-0.05, hline=1, hline.color="red2", ylim=c(0,NA)) #print(figure3) ## ----eval=FALSE--------------------------------------------------------------- # JOBS <- expand.grid(alpha = c(0.90,0.95,0.98,1.00), # replicate = 1:5) # dim(JOBS); head(JOBS) # #[1] 20 2 # # alpha replicate # #1 0.90 1 # #2 0.95 1 # #3 0.98 1 # #4 1.00 1 # #5 0.90 2 # #6 0.95 2 # # save(JOBS, file="/mnt/scratch/quantgen/TENSOR_EVD/pipeline/parms/JOBS2.RData") ## ----eval=FALSE--------------------------------------------------------------- # JOBS <- expand.grid(trait = c("yield","anthesis","silking","ASI"), # method = c("eigen","tensorEVD"), # alpha = c(0.90,0.95,0.98,1.00), # replicate = 1:5) # JOBS <- JOBS[-which(JOBS$alpha==1.00 & JOBS$method=="tensorEVD"),] # dim(JOBS); head(JOBS) # #[1] 140 4 # # trait method alpha replicate # #1 yield eigen 0.9 1 # #2 anthesis eigen 0.9 1 # #3 silking eigen 0.9 1 # #4 ASI eigen 0.9 1 # #5 yield tensorEVD 0.9 1 # #6 anthesis tensorEVD 0.9 1 # # save(JOBS, file="/mnt/scratch/quantgen/TENSOR_EVD/pipeline/parms/JOBS3.RData") ## ----eval=FALSE--------------------------------------------------------------- # setwd("/mnt/scratch/quantgen/TENSOR_EVD/pipeline") # load("parms/JOBS3.RData") # prefix <- "output/genomic_prediction/ANOVA" # # out <- c() # for(k in 1:nrow(JOBS)) # { # trait <- as.vector(JOBS[k,"trait"]) # method <- as.vector(JOBS[k,"method"]) # alpha <- as.vector(JOBS[k,"alpha"]) # replicate <- as.vector(JOBS[k,"replicate"]) # # suffix <- paste0(trait,"/",method,"/alpha_",100*alpha,"/rep_",replicate,"/VC.RData") # filename <- paste0(prefix,"/",suffix) # if(file.exists(filename)){ # load(filename) # out <- rbind(out, VC) # }else{ # message("File not found: '",suffix,"'") # } # } ## ----eval=TRUE---------------------------------------------------------------- out <- read.csv(url("https://raw.githubusercontent.com/MarcooLopez/tensorEVD/main/inst/extdata/results_ANOVA.txt")) head(out) ## ----eval=T, fig.width = 6.5, fig.height = 4.5, fig.align='center'------------ out$alpha <- factor(paste0(100*out$alpha,"%"), levels=c("100%","98%","95%","90%")) out$source <- factor(out$source, levels=c("G","E","GE","Error")) trait <- c("yield", "anthesis", "silking", "ASI")[1] myfun <- function(x) sprintf('%.3f', x) # Figure 4: Phenotypic variance of yield dat <- out[out$trait==trait,] figure4 <- make_plot(dat, x='alpha', y='mean', SD="SD", group="method", facet="source", xlab=bquote(alpha~"x100% of variance of K"), ylab=paste0("Proportion of variance of ",trait), group.color=color2, scales="free_y", ylabels=myfun, text=myfun, ylim=c(0,NA)) #print(figure4) ## ----eval=FALSE--------------------------------------------------------------- # setwd("/mnt/scratch/quantgen/TENSOR_EVD/pipeline") # load("data/pheno.RData") # table(PHENO$CV_10fold) # # 1 2 3 4 5 6 7 8 9 10 # #6180 6277 6246 5785 6160 5858 5492 5660 5436 5975 ## ----eval=FALSE--------------------------------------------------------------- # JOBS <- expand.grid(trait = c("yield","anthesis","silking","ASI"), # method = c("eigen","tensorEVD"), # alpha = c(0.90,0.95,0.98), # fold = 1:10) # dim(JOBS); head(JOBS) # #[1] 240 4 # # trait method alpha fold # #1 yield eigen 0.9 1 # #2 anthesis eigen 0.9 1 # #3 silking eigen 0.9 1 # #4 ASI eigen 0.9 1 # #5 yield tensorEVD 0.9 1 # #6 anthesis tensorEVD 0.9 1 # # save(JOBS, file="/mnt/scratch/quantgen/TENSOR_EVD/pipeline/parms/JOBS4.RData") ## ----eval=FALSE--------------------------------------------------------------- # setwd("/mnt/scratch/quantgen/TENSOR_EVD/pipeline") # source("https://raw.githubusercontent.com/MarcooLopez/tensorEVD/main/misc/functions.R") # load("parms/JOBS4.RData") # prefix <- "output/genomic_prediction/10F_CV" # # dat <- c() # for(trait in levels(JOBS$trait)){ # for(method in levels(JOBS$method)){ # for(alpha in unique(JOBS$alpha)){ # out0 <- c() # for(fold in unique(JOBS$fold)){ # suffix <- paste0(trait,"/",method,"/alpha_",100*alpha,"/results_fold_",fold,".RData") # filename <- paste0(prefix,"/",suffix) # if(file.exists(filename)){ # load(filename) # out0 <- rbind(out0, out) # }else{ # message("File not found: '",suffix,"'") # } # } # tmp <- get_corr(out0, by="year_loc") # dat <- rbind(dat, data.frame(trait,method,alpha,tmp)) # } # } # } # # # Reshaping the data # dat$trait <- factor(dat$trait, levels=levels(JOBS$trait)) # out <- reshape2::dcast(dat, trait+alpha+year_loc+nRecords~method, value.var="correlation") # tmp <- reshape2::dcast(dat, trait+alpha+year_loc+nRecords~method, value.var="SE")[,levels(JOBS$method)] # colnames(tmp) <- paste0(colnames(tmp),".SE") # out <- data.frame(out, tmp) ## ----eval=TRUE---------------------------------------------------------------- out <- read.csv(url("https://raw.githubusercontent.com/MarcooLopez/tensorEVD/main/inst/extdata/results_10F_CV.txt")) head(out) ## ----eval=T, fig.width = 6.0, fig.height = 6.5, fig.align='center'------------ out$trait <- factor(out$trait, levels=unique(out$trait)) out$alpha <- factor(paste0(100*out$alpha,"%"), levels=c("98%","95%","90%")) # Figure 5: Within environment prediction correlation rg <- range(c(out$eigen,out$tensorEVD)) if(requireNamespace("ggplot2", quietly=TRUE)){ figure5 <- ggplot2::ggplot(out, ggplot2::aes(tensorEVD, eigen)) + ggplot2::geom_abline(color="gray70", linetype="dashed") + ggplot2::geom_point(fill="#56B4E9", shape=21, size=1.4) + ggplot2::facet_grid(trait ~ alpha) + ggplot2::theme_bw() + ggplot2::xlim(rg) + ggplot2::ylim(rg) } #print(figure5)