## ----------------------------------------------------------------------------- #| echo: false Sys.setenv(OMP_NUM_THREADS = 2) ## ----------------------------------------------------------------------------- #| message: false library(resemble) library(prospectr) # obtain a numeric vector of the wavelengths at which spectra is recorded wavs <- as.numeric(colnames(NIRsoil$spc)) # pre-process the spectra: # - use detrend # - use first order derivative diff_order <- 1 poly_order <- 1 window <- 7 # Preprocess spectra NIRsoil$spc_pr <- savitzkyGolay( detrend(NIRsoil$spc, wav = wavs), m = diff_order, p = poly_order, w = window ) train_x <- NIRsoil$spc_pr[NIRsoil$train == 1, ] train_y <- NIRsoil$Ciso[NIRsoil$train == 1] test_x <- NIRsoil$spc_pr[NIRsoil$train == 0, ] test_y <- NIRsoil$Ciso[NIRsoil$train == 0] ## ----------------------------------------------------------------------------- #| eval: true # Retain components that individually explain at least 1% of variance proj_var <- ortho_projection(train_x, ncomp = ncomp_by_var(0.01)) proj_var ## ----------------------------------------------------------------------------- #| eval: true # Retain enough components to explain at least 99% of variance proj_cumvar <- ortho_projection(train_x, ncomp = ncomp_by_cumvar(0.99)) proj_cumvar ## ----------------------------------------------------------------------------- #| eval: true # Optimal component selection using side information # using default max_ncomp which is 40 proj_opc <- ortho_projection(train_x, Yr = train_y, ncomp = ncomp_by_opc()) proj_opc ## ----------------------------------------------------------------------------- # this needs to be passed to the ncomp argument in ortho_projection() ncomp_by_opc(max_ncomp = 15) ## ----------------------------------------------------------------------------- #| eval: false # # Using ncomp_fixed() # proj_fixed <- ortho_projection(train_x, ncomp = ncomp_fixed(10)) # # # Equivalent: passing an integer directly # proj_fixed <- ortho_projection(train_x, ncomp = 10) ## ----------------------------------------------------------------------------- #| results: hide # PCA with default component selection (ncomp_by_var(0.01)) pca_train <- ortho_projection(train_x, method = "pca") pca_train ## ----------------------------------------------------------------------------- #| label: fig-pca-variance #| fig-cap: "Individual contribution to the explained variance for each component (left) and cumulative variance explained by the principal components (right)." #| fig-width: 7 #| fig-height: 3 plot(pca_train) ## ----------------------------------------------------------------------------- #| results: hide # PCA using NIPALS pca_nipals_train <- ortho_projection(train_x, method = "pca_nipals") ## ----------------------------------------------------------------------------- #| results: hide # PLS using SIMPLS with 10 components pls_train <- ortho_projection( train_x, Yr = train_y, method = "simpls", ncomp = 10 ) pls_train ## ----------------------------------------------------------------------------- #| results: hide # PCA with optimal component selection pca_opc <- ortho_projection( train_x, Yr = train_y, method = "pca", ncomp = ncomp_by_opc(40) ) pca_opc ## ----------------------------------------------------------------------------- #| label: fig-pca-opc #| fig-cap: "RMSD between observations and their nearest neighbors as a function of the number of principal components." #| fig-width: 5 #| fig-height: 4 plot(pca_opc, col = "#F59E0B") ## ----------------------------------------------------------------------------- #| results: hide # Fit PLS projection model pls_model <- ortho_projection( train_x, Yr = train_y, method = "simpls", ncomp = ncomp_by_opc(40), scale = TRUE ) # Project test data pls_test_scores <- predict(pls_model, newdata = test_x) ## ----------------------------------------------------------------------------- #| label: fig-pc-proj #| fig-width: 4.5 #| fig-height: 5 #| fig-cap: "Projection of the training observations in the first two PLS components (blue) and projection of the test observations onto the same latent space (red)." plot( pls_model$scores[, 1:2], col = rgb(0.231, 0.51, 0.965, 0.3), pch = 16, cex = 1.5, xlab = "PLS1", ylab = "PLS2" ) grid(lty = 1) points( pls_test_scores[, 1:2], col = rgb(0.961, 0.620, 0.043, 0.7), pch = 16, cex = 1.5 ) legend( "topright", legend = c("Training projection", "Predicted projection"), col = c( rgb(0.231, 0.51, 0.965, 0.3), rgb(0.961, 0.620, 0.043, 0.7) ), pch = 16, pt.cex = 1.5, box.col = "black" ) ## ----------------------------------------------------------------------------- #| results: hide # Project training and test data simultaneously pca_both <- ortho_projection( train_x, Xu = test_x, Yr = train_y, method = "pca", ncomp = ncomp_by_opc(40), scale = TRUE ) ## ----------------------------------------------------------------------------- #| results: hide #| eval: false # train_y2 <- NIRsoil$Nt[NIRsoil$train == 1] # train_y3 <- NIRsoil$CEC[NIRsoil$train == 1] # # # PLS with multivariate side information # pls_multi <- ortho_projection( # train_x, # Xu = test_x, # Yr = cbind(train_y, train_y2, train_y3), # method = "pca", # ncomp = ncomp_by_opc(40), # scale = TRUE # ) # # plot(pls_multi)