## ----setup, include=FALSE--------------------------------------------------------------------------------------------- oldopts <- options(width = 120) knitr::opts_chunk$set( collapse = TRUE, comment = " ", fig.width=7, fig.height=5 ) ## ----loadlib, eval=TRUE, message=FALSE, warning=FALSE----------------------------------------------------------------- library(osktnorm) ## ----load-excel-pkgs, message=FALSE, warning=FALSE-------------------------------------------------------------------- if (!requireNamespace("readxl", quietly = TRUE)) install.packages("readxl") if (!requireNamespace("writexl", quietly = TRUE)) install.packages("writexl") library(readxl) library(writexl) ## ----read-excel, message=FALSE, warning=FALSE------------------------------------------------------------------------- file_path <- system.file("extdata", "sativapheno.xlsx", package = "osktnorm") pheno <- read_excel(file_path) # Select some variables from the pheno phenoset <- as.data.frame(pheno[, c(4,5,6,7,9,10,12,13,14)]) phenoset <- phenoset[, sapply(phenoset, is.numeric)] head(phenoset) ## ----box-plots, message=FALSE, warning=FALSE-------------------------------------------------------------------------- oldpar <- par(no.readonly = TRUE) nvar <- ncol(phenoset) nrow_plot <- ceiling(sqrt(nvar)) ncol_plot <- ceiling(nvar / nrow_plot) par(mfrow = c(nrow_plot, ncol_plot), mar = c(4, 4, 3, 1)) cols <- rainbow(nvar) for(i in seq_len(nvar)) { x <- phenoset[[i]] x <- x[!is.na(x)] if(length(x) == 0) next h_calc <- hist(x, plot = FALSE) d <- density(x) max_y <- max(c(h_calc$density, d$y)) * 1.1 hist(x, breaks = 15, freq = FALSE, col = adjustcolor(cols[i], 0.5), border = "white", main = colnames(phenoset)[i], xlab = "Transformed Value", ylab = "Density", ylim = c(0, max_y)) lines(d, col = "black", lwd = 2) curve(dnorm(x, mean = mean(x), sd = sd(x)), add = TRUE, col = "red", lty = 2, lwd = 2) } par(oldpar) ## ----densplots, message=FALSE, warning=FALSE-------------------------------------------------------------------------- shapiro_results <- data.frame( Trait = colnames(phenoset), W = NA_real_, pval = NA_real_, IsNormal = NA_character_, stringsAsFactors = FALSE ) for(i in seq_len(ncol(phenoset))) { x <- phenoset[[i]] x <- x[!is.na(x)] if(length(x) >= 3) { test <- shapiro.test(x) shapiro_results$W[i] <- test$statistic shapiro_results$pval[i] <- test$p.value shapiro_results$IsNormal[i] <- ifelse(test$p.value > 0.05, "YES", "NO") } else { shapiro_results$W[i] <- NA shapiro_results$pval[i] <- NA shapiro_results$IsNormal[i] <- NA } } shapiro_results ## ----batch-normalization, message=FALSE, warning=FALSE---------------------------------------------------------------- # Normalization only norm_res1 <- osktnorm(phenoset, normtests = FALSE) phenonormal <- norm_res1$normalized head(phenonormal) ## ----boxplots, message=FALSE, warning=FALSE--------------------------------------------------------------------------- oldpar <- par(no.readonly = TRUE) par(mar = c(8,4,4,2)) boxplot( phenonormal, col = rainbow(ncol(phenonormal)), border = "black", outline = TRUE, las = 2, main = "OSKT Normalized Values", ylab = "Values", cex.axis = 0.8 ) par(oldpar) ## ----histograms, message=FALSE, warning=FALSE------------------------------------------------------------------------- oldpar <- par(no.readonly = TRUE) nvar <- ncol(phenonormal) nrow_plot <- ceiling(sqrt(nvar)) ncol_plot <- ceiling(nvar / nrow_plot) par(mfrow = c(nrow_plot, ncol_plot), mar = c(4,4,3,1)) cols <- rainbow(nvar) for(i in seq_len(nvar)) { x <- phenonormal[[i]] x <- x[!is.na(x)] if(length(x) == 0) next d <- density(x) h_calc <- hist(x, plot = FALSE) max_y <- max(d$y, h_calc$density) * 1.1 hist(x, freq = FALSE, col = adjustcolor(cols[i], 0.6), border = "white", main = colnames(phenonormal)[i], xlab = "", ylab = "Density", ylim = c(0, max_y)) lines(d, col = "black", lwd = 2) } par(oldpar) ## ----optparams, message=FALSE, warning=FALSE-------------------------------------------------------------------------- norm_res2 <- osktnorm(phenoset, normtests = "all") phenonormal <- norm_res2$normalized paramstable <- norm_res2$parameters paramstable ## ----testresults, message=FALSE, warning=FALSE------------------------------------------------------------------------ testtable <- norm_res2$normtests testtable ## ----write-excel, eval=FALSE, echo=TRUE, message=FALSE, warning=FALSE------------------------------------------------- # writexl::write_xlsx(phenonormal, path = "sativapheno_normalized.xlsx") # ## ----final-checks, message=FALSE, warning=FALSE------------------------------- options(oldopts)