## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5,
  fig.align = "center"
)
library(kofn)
library(flexhaz)

# Heavy simulations are off by default so the vignette builds in <30 seconds.
# Set run_long <- TRUE to reproduce all Monte Carlo results.
run_long <- FALSE

## ----observation-functors-----------------------------------------------------
# Scheme 0: system lifetime only (optionally right-censored at tau)
obs0 <- observe_right_censor(tau = 100)
obs0(42.7)    # exact observation
obs0(150.0)   # right-censored at tau = 100

# Scheme 1: periodic inspection with interval width delta = 5
obs1 <- observe_periodic(delta = 5, tau = 100)
obs1(42.7)    # failure in interval [40, 45)
obs1(150.0)   # right-censored

# Scheme 2: complete data (trivial)
obs2 <- observe_exact()
obs2(42.7)    # exact observation, always

## ----asymmetry-demo-----------------------------------------------------------
set.seed(42)

# True parameters: (shape_1, scale_1, shape_2, scale_2)
theta_true <- c(1.5, 2.0, 2.0, 3.0)

# Create the Weibull parallel system model (EM for Scheme 0)
model_em <- kofn(k = 2, m = 2, component = dfr_weibull(), method = "em")

# Generate Scheme 0 data (system lifetimes only)
gen <- rdata(model_em)
df0 <- gen(theta_true, n = 100)

cat("System lifetime summary:\n")
summary(df0$t)

# At the median system lifetime, how much has each component's CDF accumulated?
t_med <- median(df0$t)
cat(sprintf("\nAt median system time %.2f:\n", t_med))
cat(sprintf("  F_1(t) = %.4f  (fast component: nearly saturated)\n",
            pweibull(t_med, shape = 1.5, scale = 2.0)))
cat(sprintf("  F_2(t) = %.4f  (slow component: still informative)\n",
            pweibull(t_med, shape = 2.0, scale = 3.0)))

## ----scheme0-fit--------------------------------------------------------------
# Fit under Scheme 0 using the EM algorithm
solver <- fit(model_em)
mle0 <- solver(df0, n_starts = 1L)

est0 <- coef(mle0)
cat("Scheme 0 estimates vs truth:\n")
cat(sprintf("  Component 1 (fast): shape = %.3f (true: 1.500), scale = %.3f (true: 2.000)\n",
            est0[1], est0[2]))
cat(sprintf("  Component 2 (slow): shape = %.3f (true: 2.000), scale = %.3f (true: 3.000)\n",
            est0[3], est0[4]))

# Standard errors from the variance-covariance matrix
se_vec <- tryCatch({
  V <- vcov(mle0)
  if (is.null(V)) rep(NA_real_, length(coef(mle0))) else sqrt(diag(V))
}, error = function(e) rep(NA_real_, length(coef(mle0))))
if (!any(is.na(se_vec))) {
  cat(sprintf("\n  SE(shape_1) = %.3f,  SE(shape_2) = %.3f\n",
              se_vec[1], se_vec[3]))
  cat(sprintf("  SE ratio (fast/slow): %.1fx\n", se_vec[1] / se_vec[3]))
}

## ----scheme1-demo-------------------------------------------------------------
# Generate data under Scheme 1 with delta = 0.5
model_wei <- kofn(k = 2, m = 2, component = dfr_weibull())
gen_s1 <- rdata_scheme1(model_wei)
df1 <- gen_s1(theta_true, n = 100, delta = 0.5)

cat("Scheme 1 data (first 6 observations):\n")
print(head(df1))

cat("\nColumn meanings:\n")
cat("  t:            exact system failure time\n")
cat("  comp_lower_j: lower bound of component j's inspection interval\n")
cat("  comp_upper_j: upper bound of component j's inspection interval\n")

## ----scheme1-fit--------------------------------------------------------------
# Fit under Scheme 1
solver_s1 <- fit_scheme1(model_wei)
mle1 <- solver_s1(df1, n_starts = 1L)

est1 <- coef(mle1)
cat("Scheme 1 (delta = 0.5) estimates vs truth:\n")
cat(sprintf("  Component 1 (fast): shape = %.3f (true: 1.500), scale = %.3f (true: 2.000)\n",
            est1[1], est1[2]))
cat(sprintf("  Component 2 (slow): shape = %.3f (true: 2.000), scale = %.3f (true: 3.000)\n",
            est1[3], est1[4]))

## ----comparison-single, fig.height = 6----------------------------------------
set.seed(123)

n <- 100
m <- 2
alpha_true <- c(1.5, 2.0)
beta_true  <- c(2.0, 3.0)
theta_true <- c(alpha_true[1], beta_true[1], alpha_true[2], beta_true[2])
delta <- 0.5

# Generate component lifetimes directly
comp_times <- matrix(0, nrow = n, ncol = m)
for (j in seq_len(m)) {
  comp_times[, j] <- rweibull(n, shape = alpha_true[j], scale = beta_true[j])
}
sys_times <- apply(comp_times, 1, max)

# --- Scheme 0: system lifetime only ---
df_s0 <- data.frame(t = sys_times)
model_em <- kofn(k = 2, m = 2, component = dfr_weibull(), method = "em")
solver_em <- fit(model_em)
mle_s0 <- solver_em(df_s0, n_starts = 1L)

# --- Scheme 1: add inspection intervals ---
df_s1 <- data.frame(t = sys_times)
for (j in seq_len(m)) {
  df_s1[[paste0("comp_lower_", j)]] <- floor(comp_times[, j] / delta) * delta
  df_s1[[paste0("comp_upper_", j)]] <- floor(comp_times[, j] / delta) * delta + delta
}
model_mle <- kofn(k = 2, m = 2, component = dfr_weibull(), method = "mle")
solver_s1 <- fit_scheme1(model_mle)
mle_s1 <- solver_s1(df_s1, n_starts = 1L)

# --- Display results ---
cat("=== Single-sample comparison (n = 100) ===\n\n")
results <- data.frame(
  Parameter = c("shape_1", "scale_1", "shape_2", "scale_2"),
  Truth     = theta_true,
  Scheme_0  = round(coef(mle_s0), 3),
  Scheme_1  = round(coef(mle_s1), 3)
)
results$Error_S0 <- round(abs(results$Scheme_0 - results$Truth), 3)
results$Error_S1 <- round(abs(results$Scheme_1 - results$Truth), 3)
print(results, row.names = FALSE)

## ----mc-comparison, eval = run_long-------------------------------------------
# # Monte Carlo comparison: run_long = TRUE to execute
# set.seed(2024)
# n_rep <- 200
# n <- 300
# delta <- 0.5
# 
# est_s0 <- matrix(NA, nrow = n_rep, ncol = 4)
# est_s1 <- matrix(NA, nrow = n_rep, ncol = 4)
# conv_s0 <- logical(n_rep)
# conv_s1 <- logical(n_rep)
# 
# for (r in seq_len(n_rep)) {
#   # Generate common component lifetimes
#   comp_times <- matrix(0, nrow = n, ncol = m)
#   for (j in seq_len(m)) {
#     comp_times[, j] <- rweibull(n, shape = alpha_true[j], scale = beta_true[j])
#   }
#   sys_times <- apply(comp_times, 1, max)
# 
#   # Scheme 0
#   df_s0 <- data.frame(t = sys_times)
#   res0 <- tryCatch(solver_em(df_s0, n_starts = 3L),
#                     error = function(e) NULL)
#   if (!is.null(res0) && res0$converged) {
#     est_s0[r, ] <- coef(res0)
#     conv_s0[r] <- TRUE
#   }
# 
#   # Scheme 1
#   df_s1 <- data.frame(t = sys_times)
#   for (j in seq_len(m)) {
#     df_s1[[paste0("comp_lower_", j)]] <- floor(comp_times[, j] / delta) * delta
#     df_s1[[paste0("comp_upper_", j)]] <- floor(comp_times[, j] / delta) * delta + delta
#   }
#   res1 <- tryCatch(solver_s1(df_s1, n_starts = 3L),
#                     error = function(e) NULL)
#   if (!is.null(res1) && res1$converged) {
#     est_s1[r, ] <- coef(res1)
#     conv_s1[r] <- TRUE
#   }
# 
#   if (r %% 50 == 0) message(sprintf("  Replicate %d / %d", r, n_rep))
# }
# 
# # Compute RMSE for converged replicates
# rmse <- function(est, truth) {
#   valid <- !is.na(est)
#   sqrt(mean((est[valid] - truth)^2))
# }
# 
# cat("\n=== Monte Carlo RMSE (n = 300, n_rep = 200) ===\n\n")
# param_names <- c("shape_1", "scale_1", "shape_2", "scale_2")
# for (p in seq_along(theta_true)) {
#   r0 <- rmse(est_s0[, p], theta_true[p])
#   r1 <- rmse(est_s1[, p], theta_true[p])
#   cat(sprintf("  %-8s  Scheme 0: %.4f   Scheme 1: %.4f   ratio: %.1fx\n",
#               param_names[p], r0, r1, r0 / r1))
# }
# cat(sprintf("\n  Convergence: Scheme 0 = %d/%d, Scheme 1 = %d/%d\n",
#             sum(conv_s0), n_rep, sum(conv_s1), n_rep))

## ----mc-results-precomputed, eval = !run_long---------------------------------
# Pre-computed results from a 200-replicate Monte Carlo run:
cat("=== Monte Carlo RMSE (n = 300, n_rep = 200, pre-computed) ===\n\n")
cat("  Parameter  Scheme 0    Scheme 1    Ratio (S0/S1)\n")
cat("  --------   --------    --------    -------------\n")
cat("  shape_1    0.6121      0.0291      21.0x\n")
cat("  scale_1    0.8434      0.0847       10.0x\n")
cat("  shape_2    0.0893      0.0409       2.2x\n")
cat("  scale_2    0.1567      0.0721       2.2x\n")
cat("\n  The fast component (shape_1) sees a 21x RMSE improvement.\n")
cat("  The slow component improves ~2x --- already well-estimated under Scheme 0.\n")

## ----delta-sensitivity, eval = run_long---------------------------------------
# # Sensitivity analysis: delta in {0.1, 0.5, 1.0, 2.0}
# set.seed(2024)
# deltas <- c(0.1, 0.5, 1.0, 2.0)
# n_rep_sens <- 100
# n <- 300
# 
# rmse_by_delta <- matrix(NA, nrow = length(deltas), ncol = 4)
# rownames(rmse_by_delta) <- paste0("delta=", deltas)
# colnames(rmse_by_delta) <- c("shape_1", "scale_1", "shape_2", "scale_2")
# 
# for (d_idx in seq_along(deltas)) {
#   delta_d <- deltas[d_idx]
#   est_d <- matrix(NA, nrow = n_rep_sens, ncol = 4)
# 
#   for (r in seq_len(n_rep_sens)) {
#     comp_times <- matrix(0, nrow = n, ncol = m)
#     for (j in seq_len(m)) {
#       comp_times[, j] <- rweibull(n, shape = alpha_true[j], scale = beta_true[j])
#     }
#     sys_times <- apply(comp_times, 1, max)
# 
#     df_d <- data.frame(t = sys_times)
#     for (j in seq_len(m)) {
#       df_d[[paste0("comp_lower_", j)]] <- floor(comp_times[, j] / delta_d) * delta_d
#       df_d[[paste0("comp_upper_", j)]] <- floor(comp_times[, j] / delta_d) * delta_d + delta_d
#     }
# 
#     res_d <- tryCatch(solver_s1(df_d, n_starts = 3L),
#                       error = function(e) NULL)
#     if (!is.null(res_d) && res_d$converged) {
#       est_d[r, ] <- coef(res_d)
#     }
#   }
# 
#   for (p in 1:4) {
#     rmse_by_delta[d_idx, p] <- rmse(est_d[, p], theta_true[p])
#   }
#   message(sprintf("  delta = %.1f complete", delta_d))
# }
# 
# cat("\n=== RMSE by inspection interval width ===\n\n")
# print(round(rmse_by_delta, 4))

## ----delta-precomputed, eval = !run_long--------------------------------------
cat("=== RMSE by inspection interval width (pre-computed) ===\n\n")
cat("  delta    shape_1   scale_1   shape_2   scale_2\n")
cat("  -----    -------   -------   -------   -------\n")
cat("  0.1      0.0195    0.0583    0.0380    0.0685\n")
cat("  0.5      0.0291    0.0847    0.0409    0.0721\n")
cat("  1.0      0.0437    0.1102    0.0425    0.0748\n")
cat("  2.0      0.0812    0.1654    0.0461    0.0789\n")
cat("\n")
cat("  Even delta = 2.0 (coarser than the median component lifetime)\n")
cat("  gives shape_1 RMSE of 0.08 vs 0.61 under Scheme 0: a 7.5x improvement.\n")

## ----information-anatomy------------------------------------------------------
# Demonstrate: how much does F_j(a+) - F_j(a-) vary with shape?
t_fail <- 1.0  # a typical failure time for the fast component
delta <- 0.5
a_lower <- floor(t_fail / delta) * delta
a_upper <- a_lower + delta

shapes_grid <- seq(0.5, 3.0, by = 0.1)
probs <- sapply(shapes_grid, function(alpha) {
  pweibull(a_upper, shape = alpha, scale = 2.0) -
    pweibull(a_lower, shape = alpha, scale = 2.0)
})

plot(shapes_grid, probs, type = "l", lwd = 2,
     xlab = expression(alpha[1] ~ "(shape parameter)"),
     ylab = expression(F[1](a^"+") - F[1](a^"-")),
     main = "Interval probability varies with shape",
     sub = sprintf("Interval [%.1f, %.1f), scale = 2.0", a_lower, a_upper))
abline(v = 1.5, lty = 2, col = "red")
text(1.5, max(probs) * 0.9, expression(alpha[1] == 1.5 ~ "(truth)"),
     pos = 4, col = "red")

## ----fisher-demo, eval = run_long---------------------------------------------
# fi <- compare_fisher_info(
#   shapes = c(1.5, 2.0),
#   scales = c(2.0, 3.0),
#   n = 200, delta = 0.5, n_rep = 30,
#   component = dfr_weibull()
# )
# 
# cat("=== Fisher Information Determinant Comparison ===\n\n")
# cat(sprintf("  Median det(I), Scheme 0: %.4e\n", fi$median_det["scheme0"]))
# cat(sprintf("  Median det(I), Scheme 1: %.4e\n", fi$median_det["scheme1"]))
# cat(sprintf("  Median det(I), Scheme 2: %.4e\n", fi$median_det["scheme2"]))
# cat(sprintf("\n  Efficiency ratios:\n"))
# cat(sprintf("    Scheme 0 / Scheme 1 = %.4f  (Scheme 1 is %.1fx more informative)\n",
#             fi$efficiency_01, 1 / fi$efficiency_01))
# cat(sprintf("    Scheme 1 / Scheme 2 = %.4f  (Scheme 1 recovers %.0f%% of complete-data info)\n",
#             fi$efficiency_12, fi$efficiency_12 * 100))
# cat(sprintf("    Scheme 0 / Scheme 2 = %.6f\n", fi$efficiency_02))
# cat(sprintf("\n  Valid replicates: Scheme 0 = %d, Scheme 1 = %d, Scheme 2 = %d\n",
#             fi$n_valid["scheme0"], fi$n_valid["scheme1"], fi$n_valid["scheme2"]))

## ----fisher-precomputed, eval = !run_long-------------------------------------
cat("=== Fisher Information Determinant Comparison (pre-computed) ===\n\n")
cat("  Median det(I), Scheme 0: 3.21e+04\n")
cat("  Median det(I), Scheme 1: 1.89e+07\n")
cat("  Median det(I), Scheme 2: 3.15e+07\n")
cat("\n  Efficiency ratios:\n")
cat("    Scheme 0 / Scheme 1 = 0.0017  (Scheme 1 is ~590x more informative)\n")
cat("    Scheme 1 / Scheme 2 = 0.60    (Scheme 1 recovers ~60% of complete-data info)\n")
cat("    Scheme 0 / Scheme 2 = 0.0010\n")
cat("\n  Scheme 1 with delta = 0.5 closes most of the gap between black-box\n")
cat("  observation and complete monitoring, using only periodic access.\n")

## ----scheme1-k-spectrum-------------------------------------------------------
set.seed(42)
rates <- c(1.0, 0.8, 0.6, 0.4)
rates_sorted <- sort(rates)
n <- 50

k_results <- data.frame(
  k = 2:4,
  type = c("2-of-4", "3-of-4", "parallel"),
  s0_mean_err = NA_real_,
  s1_mean_err = NA_real_,
  improvement = NA_character_
)

for (idx in seq_len(nrow(k_results))) {
  k <- k_results$k[idx]
  model_k <- kofn(k = k, m = 4, component = dfr_exponential())

  # Scheme 0: system lifetime only
  gen0 <- rdata(model_k)
  dat0 <- gen0(theta = rates, n = n)
  fitter0 <- fit(model_k)
  res0 <- fitter0(dat0, n_starts = 1)

  # Scheme 1: periodic inspection (delta = 0.5)
  s1gen <- rdata_scheme1(model_k)
  dat1 <- s1gen(theta = rates, n = n, delta = 0.5)
  fitter1 <- fit_scheme1(model_k)
  res1 <- fitter1(dat1, n_starts = 1)

  if (res0$converged) {
    k_results$s0_mean_err[idx] <- round(
      mean(abs(sort(coef(res0)) - rates_sorted)), 3)
  }
  if (res1$converged) {
    k_results$s1_mean_err[idx] <- round(
      mean(abs(sort(coef(res1)) - rates_sorted)), 3)
  }
  if (!is.na(k_results$s0_mean_err[idx]) && !is.na(k_results$s1_mean_err[idx])) {
    ratio <- k_results$s0_mean_err[idx] / k_results$s1_mean_err[idx]
    k_results$improvement[idx] <- sprintf("%.0fx", ratio)
  }
}
k_results