This vignette covers advanced workflows built on top of the core NRMstatsML modules, including:
stat_fn closuresThe package ships a plain-text version of nrm_example
under inst/extdata for users who want to demonstrate a full
import-to-analysis pipeline:
library(NRMstatsML)
csv_path <- system.file("extdata", "nrm_example.csv",
package = "NRMstatsML")
d <- read.csv(csv_path, stringsAsFactors = FALSE)
nrm_data_check(d, time_var = "year", verbose = TRUE)
#> === NRMstatsML: Data Check ===
#> Rows : 20
#> Columns : 10
#> Numeric : year, crop_yield, soil_OC, N, P, K, rainfall, runoff, biomass
#> Status : OKA common question in long-term NRM studies is: is the observed trend in yield real, or could it arise by chance? Bootstrap sampling answers this by resampling the dataset many times and computing Sen’s slope on each resample.
# Define a statistic function: Sen's slope on crop_yield
sens_stat <- function(df) {
nrm_sens_slope(df, time_var = "year", value_var = "crop_yield")$slope
}
# Bootstrap the slope — 500 replicates for speed
bs_slope <- nrm_bootstrap(d, stat_fn = sens_stat, n_iter = 500)
print(bs_slope)
#> -- Bootstrap --
#> Mean : -0.0005 SD : 0.0254 CI : [-0.0494, 0.0519]If the 95% CI for the slope excludes zero, the trend is significant. This approach is more robust than a single-pass Mann-Kendall test because it directly quantifies slope uncertainty.
When two inputs interact (e.g. N × rainfall), a full response surface
reveals the joint optimum. We build this by iterating
nrm_response_curve() across a grid and collecting predicted
yields.
# Fit a quadratic surface: yield ~ N + rainfall (manual grid approach)
mv_surface <- nrm_multivariate(d,
formula = crop_yield ~ N + I(N^2) + rainfall + I(rainfall^2) + N:rainfall,
scale = FALSE)
# Prediction grid
N_seq <- seq(60, 120, length.out = 30)
R_seq <- seq(610, 720, length.out = 30)
grid <- expand.grid(N = N_seq, rainfall = R_seq)
grid$predicted_yield <- stats::predict(mv_surface$model, newdata = grid)
# Simple contour summary (base graphics — no extra dependencies)
with(
reshape(grid, idvar = "N", timevar = "rainfall",
direction = "wide")[, 1:10], # abbreviated for display
message(sprintf("Predicted yield range: %.2f to %.2f t/ha",
min(grid$predicted_yield),
max(grid$predicted_yield)))
)# ggplot2 visualisation of the response surface
library(ggplot2)
ggplot(grid, aes(x = N, y = rainfall, fill = predicted_yield)) +
geom_tile() +
scale_fill_viridis_c(name = "Yield\n(t/ha)", option = "C") +
geom_contour(aes(z = predicted_yield), colour = "white",
alpha = 0.5, linewidth = 0.4) +
labs(
title = "Predicted Yield Response Surface",
subtitle = "Quadratic model: crop_yield ~ N × rainfall",
x = "Nitrogen applied (kg/ha)",
y = "Rainfall (mm)"
) +
theme_minimal(base_size = 13)Instead of a single ARIMA forecast, we propagate parametric
uncertainty by running nrm_forecast() on bootstrap
resamples of the historical record.
# Collect horizon-1 point forecasts from 200 bootstrap resamples
set.seed(42)
boot_forecasts <- replicate(200, {
idx <- sample(nrow(d), replace = TRUE)
boot_d <- d[sort(idx), ] # keep time order approximately
boot_d$year <- d$year # restore exact year sequence
fc <- nrm_forecast(boot_d, value_var = "crop_yield",
horizon = 1, method = "auto_arima")
as.numeric(fc$forecast$mean)
}, simplify = TRUE)
cat(sprintf(
"Bootstrap forecast (h=1):\n Mean = %.3f t/ha\n 95%% CI: [%.3f, %.3f]\n",
mean(boot_forecasts),
quantile(boot_forecasts, 0.025),
quantile(boot_forecasts, 0.975)
))
#> Bootstrap forecast (h=1):
#> Mean = 5.448 t/ha
#> 95% CI: [5.157, 5.548]stat_fn Closuresnrm_uncertainty() and nrm_bootstrap()
accept any function of the form f(data) → scalar. This
makes them highly extensible.
rsq_fn <- function(df) {
m <- lm(crop_yield ~ N + P + K, data = df)
summary(m)$r.squared
}
bs_rsq <- nrm_bootstrap(d, stat_fn = rsq_fn, n_iter = 500)
print(bs_rsq)
#> -- Bootstrap --
#> Mean : 0.9995 SD : 0.0002 CI : [0.9990, 0.9998]opt_N_fn <- function(df) {
rc <- nrm_response_curve(df, input_var = "N",
response_var = "crop_yield",
type = "quadratic")
opt <- nrm_optimize_input(rc, price_ratio = 0.6)
opt$economic_optimum
}
bs_optN <- nrm_bootstrap(d, stat_fn = opt_N_fn, n_iter = 300)
cat(sprintf(
"Bootstrapped economic optimum N:\n Mean = %.1f kg/ha\n 95%% CI: [%.1f, %.1f]\n",
bs_optN$mean, bs_optN$ci[1], bs_optN$ci[2]
))
#> Bootstrapped economic optimum N:
#> Mean = -98660.8 kg/ha
#> 95% CI: [-279854.7, -11328.2]tau_fn <- function(df) {
nrm_mann_kendall(df, time_var = "year", value_var = "soil_OC")$tau
}
mc_tau <- nrm_monte_carlo(d, stat_fn = tau_fn, n_iter = 300, noise_sd = 0.05)
print(mc_tau)
#> -- Monte Carlo --
#> Mean : 0.9972 SD : 0.0054 CI : [0.9789, 1.0000]Compare OLS, quadratic OLS, and PLS side-by-side using
nrm_benchmark() on a simple hold-out split.
set.seed(123)
n_d <- nrow(d)
idx <- sample(n_d, floor(0.75 * n_d))
train <- d[ idx, ]
test <- d[-idx, ]
# Fit three candidate models
m_ols <- lm(crop_yield ~ N + P + K + rainfall, data = train)
m_quad <- lm(crop_yield ~ N + I(N^2) + rainfall, data = train)
m_full <- lm(crop_yield ~ N + P + K + rainfall + soil_OC, data = train)
bm <- nrm_benchmark(
models = list(ols = m_ols, quadratic = m_quad, full_ols = m_full),
test_data = test,
response_var = "crop_yield"
)
print(bm)
#> === NRMstatsML: Model Benchmark ===
#>
#> model RMSE MAE Rsq
#> full_ols 0.0279 0.0200 0.9974
#> ols 0.0285 0.0191 0.9973
#> quadratic 0.0435 0.0380 0.9937Putting it all together — a single self-contained script suitable for a supplementary materials section:
library(NRMstatsML)
# 1. Import
d <- read.csv(system.file("extdata", "nrm_example.csv",
package = "NRMstatsML"))
# 2. Validate
nrm_data_check(d)
# 3. Trend
tr <- nrm_trend(d, time_var = "year", value_var = "crop_yield")
nrm_plot(tr)
# 4. Multivariate
mv <- nrm_multivariate(d, crop_yield ~ N + P + K + rainfall)
nrm_summary(mv)
# 5. Response curve and optimum
rc <- nrm_response_curve(d, "N", "crop_yield", type = "quadratic")
opt <- nrm_optimize_input(rc, price_ratio = 0.6)
nrm_plot(rc)
# 6. Forecast
fc <- nrm_forecast(d, "crop_yield", horizon = 5)
nrm_plot(fc)
# 7. Uncertainty
unc <- nrm_uncertainty(d,
stat_fn = function(x) mean(x$crop_yield),
method = "bootstrap", n_iter = 1000)
print(unc)sessionInfo()
#> R version 4.2.1 (2022-06-23 ucrt)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows 10 x64 (build 26200)
#>
#> Matrix products: default
#>
#> locale:
#> [1] LC_COLLATE=C
#> [2] LC_CTYPE=English_United States.utf8
#> [3] LC_MONETARY=English_United States.utf8
#> [4] LC_NUMERIC=C
#> [5] LC_TIME=English_United States.utf8
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] ggplot2_4.0.2 NRMstatsML_0.1.4
#>
#> loaded via a namespace (and not attached):
#> [1] Rcpp_1.1.1 urca_1.3-4 pillar_1.11.1
#> [4] bslib_0.10.0 compiler_4.2.1 RColorBrewer_1.1-3
#> [7] jquerylib_0.1.4 tools_4.2.1 trend_1.1.6
#> [10] boot_1.3-32 digest_0.6.39 lattice_0.20-45
#> [13] nlme_3.1-168 viridisLite_0.4.3 tibble_3.3.1
#> [16] jsonlite_2.0.0 evaluate_1.0.5 lifecycle_1.0.5
#> [19] gtable_0.3.6 pkgconfig_2.0.3 rlang_1.1.7
#> [22] cli_3.6.5 rstudioapi_0.18.0 Kendall_2.2.2
#> [25] parallel_4.2.1 yaml_2.3.12 xfun_0.57
#> [28] fastmap_1.2.0 withr_3.0.2 extraDistr_1.10.0.2
#> [31] dplyr_1.2.0 knitr_1.51 generics_0.1.4
#> [34] sass_0.4.10 vctrs_0.7.2 forecast_9.0.2
#> [37] isoband_0.3.0 tidyselect_1.2.1 grid_4.2.1
#> [40] glue_1.7.0 R6_2.6.1 otel_0.2.0
#> [43] rmarkdown_2.31 farver_2.1.2 magrittr_2.0.3
#> [46] scales_1.4.0 htmltools_0.5.9 timeDate_4052.112
#> [49] colorspace_2.1-2 fracdiff_1.5-3 labeling_0.4.3
#> [52] S7_0.2.1 cachem_1.1.0 zoo_1.8-15