## ----setup, include=FALSE--------------------------------------------------------------------------------------------- oldopts <- options(width = 120) knitr::opts_chunk$set( collapse = TRUE, comment = " ", fig.width=7, fig.height=5 ) ## ----loadlib, eval=TRUE, message=FALSE, warning=FALSE----------------------------------------------------------------- library(osktnorm) ## ----extractresults, eval=TRUE, message=FALSE, warning=FALSE---------------------------------------------------------- set.seed(12) x_orig <- rlnorm(300, mean=0, sd=0.5) # Generate right-skewed data # Apply OSKT normality res_oskt <- osktfast(x_orig) x_transformed <- res_oskt$transformed head(x_transformed, 5) g_star <- res_oskt$g h_star <- res_oskt$h A2 <- res_oskt$value cat("Optimized skewness: ", g_star, "\n") cat("Optimized kurtosis: ", h_star, "\n") cat("Anderson-Darling statistic at the optimum: ", A2, "\n") ## ----visualization, eval=TRUE, message=FALSE, warning=FALSE----------------------------------------------------------- oldpar <- par(no.readonly = TRUE) breaks <- pretty(range(c(x_orig, x_transformed)), n = 25) h_orig <- hist(x_orig, breaks = breaks, plot = FALSE) h_trans <- hist(x_transformed, breaks = breaks, plot = FALSE) d_orig <- density(x_orig); d_trans <- density(x_transformed) ymax <- max(c(h_orig$density, h_trans$density, d_orig$y, d_trans$y, dnorm(0))) hist(x_orig, breaks = breaks, freq = FALSE, ylim = c(0, ymax * 1.05), col = rgb(0.2, 0.4, 0.8, 0.4), border = "white", main = "Before and After OSKT Transformation", xlab = "Value") lines(d_orig, col = "blue", lwd = 2) hist(x_transformed, breaks = breaks, freq = FALSE, col = rgb(0.8, 0.3, 0.3, 0.4), border = "white", add = TRUE) lines(d_trans, col = "red", lwd = 2) curve(dnorm(x), add = TRUE, lwd = 2, lty = 2, col = "black") # Standard normal reference legend("topleft", legend = c("Original", "Transformed", "Original Density", "OSKT Density", "Standard Normal"), col = c(rgb(0.2,0.4,0.8,0.6), rgb(0.8,0.3,0.3,0.6), "blue", "red", "black"), lwd = c(10, 10, 2, 2, 2), lty = c(1, 1, 1, 1, 2), bty = "n") par(oldpar) ## ----backtransformation, eval=TRUE, message=FALSE, warning=FALSE------------------------------------------------------ X_mean <- mean(x_orig) X_sd <- sd(x_orig) res_back <- backosktfast( Z = x_transformed, X_mean = X_mean, X_sd = X_sd, g = g_star, h = h_star, method = "brent") x_recovered <- res_back$X_orig head(x_recovered, 5) ## ----backtransformation2, eval=TRUE, message=FALSE, warning=FALSE----------------------------------------------------- oldpar <- par(no.readonly = TRUE) breaks <- pretty(range(c(x_orig, x_transformed, x_recovered)), n = 30) hist(x_orig, breaks = breaks, freq = FALSE, col = rgb(0.2, 0.4, 0.9, 0.4), border = "white", main="OSKT Transformation & Back Transformation", xlab="Value") hist(x_transformed, breaks = breaks, freq = FALSE, col = rgb(0.8, 0.3, 0.3, 0.4), border = "white", add=TRUE) hist(x_recovered, breaks = breaks, freq = FALSE, col = rgb(0.2,0.8,0.2,0.4), border = "white", add=TRUE) legend("topleft", legend = c("Original","Transformed","Back-transformed"), fill = c(rgb(0.2,0.4,0.8,0.4), rgb(0.8,0.3,0.3,0.4), rgb(0.2,0.8,0.2,0.4))) (all.equal(x_orig, x_recovered, tolerance = 1e-6)) par(oldpar) ## ----backdiagnostics, eval=TRUE, message=FALSE, warning=FALSE--------------------------------------------------------- ok <- is.finite(x_orig) & is.finite(x_recovered) xo <- x_orig[ok] xr <- x_recovered[ok] err <- xr - xo MAE <- mean(abs(err)) RMSE <- sqrt(mean(err^2)) COR <- cor(xo, xr) back_stats <- data.frame(RMSE = RMSE, MAE = MAE, Correlation= COR, R2 = COR^2) round(t(back_stats), 8) ## ----normality-table, message=FALSE, warning=FALSE-------------------------------------------------------------------- set.seed(12) x_orig <- groupcompare::ghdist(n=300, A=0, B=1, g=-0.49, h=0) x_bc <- osktnorm::boxcox(x_orig, makepositive=TRUE)$transformed x_yj <- osktnorm::yeojohnson(x_orig)$transformed x_oskt <- osktfast(x_orig)$transformed get_stats <- function(x) { x <- x[is.finite(x)] c( Skew = mean((x - mean(x))^3) / sd(x)^3, Kurt = mean((x - mean(x))^4) / sd(x)^4 - 3, SW = shapiro.test(x)$p.value, CVM = cvmtest(x)$p.value, PPM = unname(pearsonp(x)$statistic) ) } pval_table <- rbind(ORG = get_stats(x_orig), BC = get_stats(x_bc), YJ = get_stats(x_yj), OSKT = get_stats(x_oskt)) as.data.frame(round(pval_table, 4)) ## ----final_checks, message=FALSE, warning=FALSE------------------------------- options(oldopts)