## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, fig.align = "center" ) library(kofn) library(flexhaz) # Heavy simulations are off by default so the vignette builds in <30 seconds. # Set run_long <- TRUE to reproduce all Monte Carlo results. run_long <- FALSE ## ----observation-functors----------------------------------------------------- # Scheme 0: system lifetime only (optionally right-censored at tau) obs0 <- observe_right_censor(tau = 100) obs0(42.7) # exact observation obs0(150.0) # right-censored at tau = 100 # Scheme 1: periodic inspection with interval width delta = 5 obs1 <- observe_periodic(delta = 5, tau = 100) obs1(42.7) # failure in interval [40, 45) obs1(150.0) # right-censored # Scheme 2: complete data (trivial) obs2 <- observe_exact() obs2(42.7) # exact observation, always ## ----asymmetry-demo----------------------------------------------------------- set.seed(42) # True parameters: (shape_1, scale_1, shape_2, scale_2) theta_true <- c(1.5, 2.0, 2.0, 3.0) # Create the Weibull parallel system model (EM for Scheme 0) model_em <- kofn(k = 2, m = 2, component = dfr_weibull(), method = "em") # Generate Scheme 0 data (system lifetimes only) gen <- rdata(model_em) df0 <- gen(theta_true, n = 100) cat("System lifetime summary:\n") summary(df0$t) # At the median system lifetime, how much has each component's CDF accumulated? t_med <- median(df0$t) cat(sprintf("\nAt median system time %.2f:\n", t_med)) cat(sprintf(" F_1(t) = %.4f (fast component: nearly saturated)\n", pweibull(t_med, shape = 1.5, scale = 2.0))) cat(sprintf(" F_2(t) = %.4f (slow component: still informative)\n", pweibull(t_med, shape = 2.0, scale = 3.0))) ## ----scheme0-fit-------------------------------------------------------------- # Fit under Scheme 0 using the EM algorithm solver <- fit(model_em) mle0 <- solver(df0, n_starts = 1L) est0 <- coef(mle0) cat("Scheme 0 estimates vs truth:\n") cat(sprintf(" Component 1 (fast): shape = %.3f (true: 1.500), scale = %.3f (true: 2.000)\n", est0[1], est0[2])) cat(sprintf(" Component 2 (slow): shape = %.3f (true: 2.000), scale = %.3f (true: 3.000)\n", est0[3], est0[4])) # Standard errors from the variance-covariance matrix se_vec <- tryCatch({ V <- vcov(mle0) if (is.null(V)) rep(NA_real_, length(coef(mle0))) else sqrt(diag(V)) }, error = function(e) rep(NA_real_, length(coef(mle0)))) if (!any(is.na(se_vec))) { cat(sprintf("\n SE(shape_1) = %.3f, SE(shape_2) = %.3f\n", se_vec[1], se_vec[3])) cat(sprintf(" SE ratio (fast/slow): %.1fx\n", se_vec[1] / se_vec[3])) } ## ----scheme1-demo------------------------------------------------------------- # Generate data under Scheme 1 with delta = 0.5 model_wei <- kofn(k = 2, m = 2, component = dfr_weibull()) gen_s1 <- rdata_scheme1(model_wei) df1 <- gen_s1(theta_true, n = 100, delta = 0.5) cat("Scheme 1 data (first 6 observations):\n") print(head(df1)) cat("\nColumn meanings:\n") cat(" t: exact system failure time\n") cat(" comp_lower_j: lower bound of component j's inspection interval\n") cat(" comp_upper_j: upper bound of component j's inspection interval\n") ## ----scheme1-fit-------------------------------------------------------------- # Fit under Scheme 1 solver_s1 <- fit_scheme1(model_wei) mle1 <- solver_s1(df1, n_starts = 1L) est1 <- coef(mle1) cat("Scheme 1 (delta = 0.5) estimates vs truth:\n") cat(sprintf(" Component 1 (fast): shape = %.3f (true: 1.500), scale = %.3f (true: 2.000)\n", est1[1], est1[2])) cat(sprintf(" Component 2 (slow): shape = %.3f (true: 2.000), scale = %.3f (true: 3.000)\n", est1[3], est1[4])) ## ----comparison-single, fig.height = 6---------------------------------------- set.seed(123) n <- 100 m <- 2 alpha_true <- c(1.5, 2.0) beta_true <- c(2.0, 3.0) theta_true <- c(alpha_true[1], beta_true[1], alpha_true[2], beta_true[2]) delta <- 0.5 # Generate component lifetimes directly comp_times <- matrix(0, nrow = n, ncol = m) for (j in seq_len(m)) { comp_times[, j] <- rweibull(n, shape = alpha_true[j], scale = beta_true[j]) } sys_times <- apply(comp_times, 1, max) # --- Scheme 0: system lifetime only --- df_s0 <- data.frame(t = sys_times) model_em <- kofn(k = 2, m = 2, component = dfr_weibull(), method = "em") solver_em <- fit(model_em) mle_s0 <- solver_em(df_s0, n_starts = 1L) # --- Scheme 1: add inspection intervals --- df_s1 <- data.frame(t = sys_times) for (j in seq_len(m)) { df_s1[[paste0("comp_lower_", j)]] <- floor(comp_times[, j] / delta) * delta df_s1[[paste0("comp_upper_", j)]] <- floor(comp_times[, j] / delta) * delta + delta } model_mle <- kofn(k = 2, m = 2, component = dfr_weibull(), method = "mle") solver_s1 <- fit_scheme1(model_mle) mle_s1 <- solver_s1(df_s1, n_starts = 1L) # --- Display results --- cat("=== Single-sample comparison (n = 100) ===\n\n") results <- data.frame( Parameter = c("shape_1", "scale_1", "shape_2", "scale_2"), Truth = theta_true, Scheme_0 = round(coef(mle_s0), 3), Scheme_1 = round(coef(mle_s1), 3) ) results$Error_S0 <- round(abs(results$Scheme_0 - results$Truth), 3) results$Error_S1 <- round(abs(results$Scheme_1 - results$Truth), 3) print(results, row.names = FALSE) ## ----mc-comparison, eval = run_long------------------------------------------- # # Monte Carlo comparison: run_long = TRUE to execute # set.seed(2024) # n_rep <- 200 # n <- 300 # delta <- 0.5 # # est_s0 <- matrix(NA, nrow = n_rep, ncol = 4) # est_s1 <- matrix(NA, nrow = n_rep, ncol = 4) # conv_s0 <- logical(n_rep) # conv_s1 <- logical(n_rep) # # for (r in seq_len(n_rep)) { # # Generate common component lifetimes # comp_times <- matrix(0, nrow = n, ncol = m) # for (j in seq_len(m)) { # comp_times[, j] <- rweibull(n, shape = alpha_true[j], scale = beta_true[j]) # } # sys_times <- apply(comp_times, 1, max) # # # Scheme 0 # df_s0 <- data.frame(t = sys_times) # res0 <- tryCatch(solver_em(df_s0, n_starts = 3L), # error = function(e) NULL) # if (!is.null(res0) && res0$converged) { # est_s0[r, ] <- coef(res0) # conv_s0[r] <- TRUE # } # # # Scheme 1 # df_s1 <- data.frame(t = sys_times) # for (j in seq_len(m)) { # df_s1[[paste0("comp_lower_", j)]] <- floor(comp_times[, j] / delta) * delta # df_s1[[paste0("comp_upper_", j)]] <- floor(comp_times[, j] / delta) * delta + delta # } # res1 <- tryCatch(solver_s1(df_s1, n_starts = 3L), # error = function(e) NULL) # if (!is.null(res1) && res1$converged) { # est_s1[r, ] <- coef(res1) # conv_s1[r] <- TRUE # } # # if (r %% 50 == 0) message(sprintf(" Replicate %d / %d", r, n_rep)) # } # # # Compute RMSE for converged replicates # rmse <- function(est, truth) { # valid <- !is.na(est) # sqrt(mean((est[valid] - truth)^2)) # } # # cat("\n=== Monte Carlo RMSE (n = 300, n_rep = 200) ===\n\n") # param_names <- c("shape_1", "scale_1", "shape_2", "scale_2") # for (p in seq_along(theta_true)) { # r0 <- rmse(est_s0[, p], theta_true[p]) # r1 <- rmse(est_s1[, p], theta_true[p]) # cat(sprintf(" %-8s Scheme 0: %.4f Scheme 1: %.4f ratio: %.1fx\n", # param_names[p], r0, r1, r0 / r1)) # } # cat(sprintf("\n Convergence: Scheme 0 = %d/%d, Scheme 1 = %d/%d\n", # sum(conv_s0), n_rep, sum(conv_s1), n_rep)) ## ----mc-results-precomputed, eval = !run_long--------------------------------- # Pre-computed results from a 200-replicate Monte Carlo run: cat("=== Monte Carlo RMSE (n = 300, n_rep = 200, pre-computed) ===\n\n") cat(" Parameter Scheme 0 Scheme 1 Ratio (S0/S1)\n") cat(" -------- -------- -------- -------------\n") cat(" shape_1 0.6121 0.0291 21.0x\n") cat(" scale_1 0.8434 0.0847 10.0x\n") cat(" shape_2 0.0893 0.0409 2.2x\n") cat(" scale_2 0.1567 0.0721 2.2x\n") cat("\n The fast component (shape_1) sees a 21x RMSE improvement.\n") cat(" The slow component improves ~2x --- already well-estimated under Scheme 0.\n") ## ----delta-sensitivity, eval = run_long--------------------------------------- # # Sensitivity analysis: delta in {0.1, 0.5, 1.0, 2.0} # set.seed(2024) # deltas <- c(0.1, 0.5, 1.0, 2.0) # n_rep_sens <- 100 # n <- 300 # # rmse_by_delta <- matrix(NA, nrow = length(deltas), ncol = 4) # rownames(rmse_by_delta) <- paste0("delta=", deltas) # colnames(rmse_by_delta) <- c("shape_1", "scale_1", "shape_2", "scale_2") # # for (d_idx in seq_along(deltas)) { # delta_d <- deltas[d_idx] # est_d <- matrix(NA, nrow = n_rep_sens, ncol = 4) # # for (r in seq_len(n_rep_sens)) { # comp_times <- matrix(0, nrow = n, ncol = m) # for (j in seq_len(m)) { # comp_times[, j] <- rweibull(n, shape = alpha_true[j], scale = beta_true[j]) # } # sys_times <- apply(comp_times, 1, max) # # df_d <- data.frame(t = sys_times) # for (j in seq_len(m)) { # df_d[[paste0("comp_lower_", j)]] <- floor(comp_times[, j] / delta_d) * delta_d # df_d[[paste0("comp_upper_", j)]] <- floor(comp_times[, j] / delta_d) * delta_d + delta_d # } # # res_d <- tryCatch(solver_s1(df_d, n_starts = 3L), # error = function(e) NULL) # if (!is.null(res_d) && res_d$converged) { # est_d[r, ] <- coef(res_d) # } # } # # for (p in 1:4) { # rmse_by_delta[d_idx, p] <- rmse(est_d[, p], theta_true[p]) # } # message(sprintf(" delta = %.1f complete", delta_d)) # } # # cat("\n=== RMSE by inspection interval width ===\n\n") # print(round(rmse_by_delta, 4)) ## ----delta-precomputed, eval = !run_long-------------------------------------- cat("=== RMSE by inspection interval width (pre-computed) ===\n\n") cat(" delta shape_1 scale_1 shape_2 scale_2\n") cat(" ----- ------- ------- ------- -------\n") cat(" 0.1 0.0195 0.0583 0.0380 0.0685\n") cat(" 0.5 0.0291 0.0847 0.0409 0.0721\n") cat(" 1.0 0.0437 0.1102 0.0425 0.0748\n") cat(" 2.0 0.0812 0.1654 0.0461 0.0789\n") cat("\n") cat(" Even delta = 2.0 (coarser than the median component lifetime)\n") cat(" gives shape_1 RMSE of 0.08 vs 0.61 under Scheme 0: a 7.5x improvement.\n") ## ----information-anatomy------------------------------------------------------ # Demonstrate: how much does F_j(a+) - F_j(a-) vary with shape? t_fail <- 1.0 # a typical failure time for the fast component delta <- 0.5 a_lower <- floor(t_fail / delta) * delta a_upper <- a_lower + delta shapes_grid <- seq(0.5, 3.0, by = 0.1) probs <- sapply(shapes_grid, function(alpha) { pweibull(a_upper, shape = alpha, scale = 2.0) - pweibull(a_lower, shape = alpha, scale = 2.0) }) plot(shapes_grid, probs, type = "l", lwd = 2, xlab = expression(alpha[1] ~ "(shape parameter)"), ylab = expression(F[1](a^"+") - F[1](a^"-")), main = "Interval probability varies with shape", sub = sprintf("Interval [%.1f, %.1f), scale = 2.0", a_lower, a_upper)) abline(v = 1.5, lty = 2, col = "red") text(1.5, max(probs) * 0.9, expression(alpha[1] == 1.5 ~ "(truth)"), pos = 4, col = "red") ## ----fisher-demo, eval = run_long--------------------------------------------- # fi <- compare_fisher_info( # shapes = c(1.5, 2.0), # scales = c(2.0, 3.0), # n = 200, delta = 0.5, n_rep = 30, # component = dfr_weibull() # ) # # cat("=== Fisher Information Determinant Comparison ===\n\n") # cat(sprintf(" Median det(I), Scheme 0: %.4e\n", fi$median_det["scheme0"])) # cat(sprintf(" Median det(I), Scheme 1: %.4e\n", fi$median_det["scheme1"])) # cat(sprintf(" Median det(I), Scheme 2: %.4e\n", fi$median_det["scheme2"])) # cat(sprintf("\n Efficiency ratios:\n")) # cat(sprintf(" Scheme 0 / Scheme 1 = %.4f (Scheme 1 is %.1fx more informative)\n", # fi$efficiency_01, 1 / fi$efficiency_01)) # cat(sprintf(" Scheme 1 / Scheme 2 = %.4f (Scheme 1 recovers %.0f%% of complete-data info)\n", # fi$efficiency_12, fi$efficiency_12 * 100)) # cat(sprintf(" Scheme 0 / Scheme 2 = %.6f\n", fi$efficiency_02)) # cat(sprintf("\n Valid replicates: Scheme 0 = %d, Scheme 1 = %d, Scheme 2 = %d\n", # fi$n_valid["scheme0"], fi$n_valid["scheme1"], fi$n_valid["scheme2"])) ## ----fisher-precomputed, eval = !run_long------------------------------------- cat("=== Fisher Information Determinant Comparison (pre-computed) ===\n\n") cat(" Median det(I), Scheme 0: 3.21e+04\n") cat(" Median det(I), Scheme 1: 1.89e+07\n") cat(" Median det(I), Scheme 2: 3.15e+07\n") cat("\n Efficiency ratios:\n") cat(" Scheme 0 / Scheme 1 = 0.0017 (Scheme 1 is ~590x more informative)\n") cat(" Scheme 1 / Scheme 2 = 0.60 (Scheme 1 recovers ~60% of complete-data info)\n") cat(" Scheme 0 / Scheme 2 = 0.0010\n") cat("\n Scheme 1 with delta = 0.5 closes most of the gap between black-box\n") cat(" observation and complete monitoring, using only periodic access.\n") ## ----scheme1-k-spectrum------------------------------------------------------- set.seed(42) rates <- c(1.0, 0.8, 0.6, 0.4) rates_sorted <- sort(rates) n <- 50 k_results <- data.frame( k = 2:4, type = c("2-of-4", "3-of-4", "parallel"), s0_mean_err = NA_real_, s1_mean_err = NA_real_, improvement = NA_character_ ) for (idx in seq_len(nrow(k_results))) { k <- k_results$k[idx] model_k <- kofn(k = k, m = 4, component = dfr_exponential()) # Scheme 0: system lifetime only gen0 <- rdata(model_k) dat0 <- gen0(theta = rates, n = n) fitter0 <- fit(model_k) res0 <- fitter0(dat0, n_starts = 1) # Scheme 1: periodic inspection (delta = 0.5) s1gen <- rdata_scheme1(model_k) dat1 <- s1gen(theta = rates, n = n, delta = 0.5) fitter1 <- fit_scheme1(model_k) res1 <- fitter1(dat1, n_starts = 1) if (res0$converged) { k_results$s0_mean_err[idx] <- round( mean(abs(sort(coef(res0)) - rates_sorted)), 3) } if (res1$converged) { k_results$s1_mean_err[idx] <- round( mean(abs(sort(coef(res1)) - rates_sorted)), 3) } if (!is.na(k_results$s0_mean_err[idx]) && !is.na(k_results$s1_mean_err[idx])) { ratio <- k_results$s0_mean_err[idx] / k_results$s1_mean_err[idx] k_results$improvement[idx] <- sprintf("%.0fx", ratio) } } k_results