## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse   = TRUE,
  comment    = "#>",
  fig.width  = 7,
  fig.height = 4.5,
  message    = FALSE,
  warning    = FALSE
)

## ----csv-import---------------------------------------------------------------
library(NRMstatsML)

csv_path <- system.file("extdata", "nrm_example.csv",
                        package = "NRMstatsML")
d <- read.csv(csv_path, stringsAsFactors = FALSE)

nrm_data_check(d, time_var = "year", verbose = TRUE)

## ----trend-bootstrap----------------------------------------------------------
# Define a statistic function: Sen's slope on crop_yield
sens_stat <- function(df) {
  nrm_sens_slope(df, time_var = "year", value_var = "crop_yield")$slope
}

# Bootstrap the slope — 500 replicates for speed
bs_slope <- nrm_bootstrap(d, stat_fn = sens_stat, n_iter = 500)
print(bs_slope)

## ----response-surface---------------------------------------------------------
# Fit a quadratic surface: yield ~ N + rainfall (manual grid approach)
mv_surface <- nrm_multivariate(d,
  formula = crop_yield ~ N + I(N^2) + rainfall + I(rainfall^2) + N:rainfall,
  scale   = FALSE)

# Prediction grid
N_seq   <- seq(60, 120, length.out = 30)
R_seq   <- seq(610, 720, length.out = 30)
grid    <- expand.grid(N = N_seq, rainfall = R_seq)

grid$predicted_yield <- stats::predict(mv_surface$model, newdata = grid)

# Simple contour summary (base graphics — no extra dependencies)
with(
  reshape(grid, idvar = "N", timevar = "rainfall",
          direction = "wide")[, 1:10],   # abbreviated for display
  message(sprintf("Predicted yield range: %.2f to %.2f t/ha",
                  min(grid$predicted_yield),
                  max(grid$predicted_yield)))
)

## ----surface-plot, fig.height=5-----------------------------------------------
# ggplot2 visualisation of the response surface
library(ggplot2)

ggplot(grid, aes(x = N, y = rainfall, fill = predicted_yield)) +
  geom_tile() +
  scale_fill_viridis_c(name = "Yield\n(t/ha)", option = "C") +
  geom_contour(aes(z = predicted_yield), colour = "white",
               alpha = 0.5, linewidth = 0.4) +
  labs(
    title    = "Predicted Yield Response Surface",
    subtitle = "Quadratic model: crop_yield ~ N × rainfall",
    x        = "Nitrogen applied (kg/ha)",
    y        = "Rainfall (mm)"
  ) +
  theme_minimal(base_size = 13)

## ----boot-forecast------------------------------------------------------------
# Collect horizon-1 point forecasts from 200 bootstrap resamples
set.seed(42)
boot_forecasts <- replicate(200, {
  idx      <- sample(nrow(d), replace = TRUE)
  boot_d   <- d[sort(idx), ]          # keep time order approximately
  boot_d$year <- d$year               # restore exact year sequence
  fc <- nrm_forecast(boot_d, value_var = "crop_yield",
                     horizon = 1, method = "auto_arima")
  as.numeric(fc$forecast$mean)
}, simplify = TRUE)

cat(sprintf(
  "Bootstrap forecast (h=1):\n  Mean = %.3f t/ha\n  95%% CI: [%.3f, %.3f]\n",
  mean(boot_forecasts),
  quantile(boot_forecasts, 0.025),
  quantile(boot_forecasts, 0.975)
))

## ----custom-rsq---------------------------------------------------------------
rsq_fn <- function(df) {
  m  <- lm(crop_yield ~ N + P + K, data = df)
  summary(m)$r.squared
}

bs_rsq <- nrm_bootstrap(d, stat_fn = rsq_fn, n_iter = 500)
print(bs_rsq)

## ----custom-optN--------------------------------------------------------------
opt_N_fn <- function(df) {
  rc <- nrm_response_curve(df, input_var = "N",
                            response_var = "crop_yield",
                            type = "quadratic")
  opt <- nrm_optimize_input(rc, price_ratio = 0.6)
  opt$economic_optimum
}

bs_optN <- nrm_bootstrap(d, stat_fn = opt_N_fn, n_iter = 300)
cat(sprintf(
  "Bootstrapped economic optimum N:\n  Mean = %.1f kg/ha\n  95%% CI: [%.1f, %.1f]\n",
  bs_optN$mean, bs_optN$ci[1], bs_optN$ci[2]
))

## ----custom-tau---------------------------------------------------------------
tau_fn <- function(df) {
  nrm_mann_kendall(df, time_var = "year", value_var = "soil_OC")$tau
}

mc_tau <- nrm_monte_carlo(d, stat_fn = tau_fn, n_iter = 300, noise_sd = 0.05)
print(mc_tau)

## ----benchmark----------------------------------------------------------------
set.seed(123)
n_d   <- nrow(d)
idx   <- sample(n_d, floor(0.75 * n_d))
train <- d[ idx, ]
test  <- d[-idx, ]

# Fit three candidate models
m_ols  <- lm(crop_yield ~ N + P + K + rainfall,          data = train)
m_quad <- lm(crop_yield ~ N + I(N^2) + rainfall,         data = train)
m_full <- lm(crop_yield ~ N + P + K + rainfall + soil_OC, data = train)

bm <- nrm_benchmark(
  models       = list(ols = m_ols, quadratic = m_quad, full_ols = m_full),
  test_data    = test,
  response_var = "crop_yield"
)
print(bm)

## ----full-pipeline, eval = FALSE----------------------------------------------
# library(NRMstatsML)
# 
# # 1. Import
# d <- read.csv(system.file("extdata", "nrm_example.csv",
#                            package = "NRMstatsML"))
# 
# # 2. Validate
# nrm_data_check(d)
# 
# # 3. Trend
# tr  <- nrm_trend(d, time_var = "year", value_var = "crop_yield")
# nrm_plot(tr)
# 
# # 4. Multivariate
# mv  <- nrm_multivariate(d, crop_yield ~ N + P + K + rainfall)
# nrm_summary(mv)
# 
# # 5. Response curve and optimum
# rc  <- nrm_response_curve(d, "N", "crop_yield", type = "quadratic")
# opt <- nrm_optimize_input(rc, price_ratio = 0.6)
# nrm_plot(rc)
# 
# # 6. Forecast
# fc  <- nrm_forecast(d, "crop_yield", horizon = 5)
# nrm_plot(fc)
# 
# # 7. Uncertainty
# unc <- nrm_uncertainty(d,
#         stat_fn = function(x) mean(x$crop_yield),
#         method  = "bootstrap", n_iter = 1000)
# print(unc)

## ----session------------------------------------------------------------------
sessionInfo()