## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, echo = FALSE, message = FALSE, warning=FALSE---------------------- # List all suggested packages needed for this vignette required_pkgs <- c( "imuGAP", "data.table", "bayesplot", "ggplot2", "tidyr", "dplyr" ) # Check if they are available pkgs_available <- all(sapply(required_pkgs, requireNamespace, quietly = TRUE)) # If not available, hide the rest of the document or knit dynamically if (!pkgs_available) { knitr::opts_chunk$set(eval = FALSE) message("Some suggested packages are missing. Vignette code will not be executed.") } else { # Assign the result so the chunk does not auto-print the logical vector # sapply() returns (the setup block should render no output). chk <- suppressPackageStartupMessages( sapply(required_pkgs, require, character.only = TRUE) ) } ## ----locations---------------------------------------------------------------- data("locations_sim", package = "imuGAP") head(locations_sim) # Canonicalize and validate canonical_locations <- canonicalize_locations(locations_sim) head(canonical_locations) ## ----observations------------------------------------------------------------- data("observations_sim", package = "imuGAP") head(observations_sim[, .(obs_id, loc_id, positive, sample_n, censored)]) # Canonicalize and validate canonical_observations <- canonicalize_observations(observations_sim) head(canonical_observations) ## ----populations-------------------------------------------------------------- data("populations_sim", package = "imuGAP") head(populations_sim) # Canonicalize and validate canonical_populations <- canonicalize_populations( populations_sim, observations_sim, locations_sim ) head(canonical_populations) ## ----validation-failure-obs--------------------------------------------------- # Create a copy with an invalid observation (positive > sample_n) invalid_obs <- copy(observations_sim[, .(obs_id, loc_id, positive, sample_n, censored)]) invalid_obs[1, positive := sample_n + 10] # This will fail validation and throw an error: tryCatch( canonicalize_observations(invalid_obs), error = function(e) message("Caught expected error: ", e$message) ) ## ----validation-failure-locs-------------------------------------------------- # Create a copy with a duplicate location ID invalid_locs <- rbind( locations_sim, data.frame(loc_id = "Scruggs", parent_id = "State") ) # This will fail validation: tryCatch( canonicalize_locations(invalid_locs), error = function(e) message("Caught expected error: ", e$message) ) ## ----sampling-code, eval = FALSE---------------------------------------------- # fit_sim <- sampling( # observations_sim, populations_sim, locations_sim, # stan_opts = stan_options( # iter = 2000, chains = 4, refresh = 0, seed = 1L # ) # ) ## ----load-fit----------------------------------------------------------------- data("fit_sim", package = "imuGAP") ## ----extract-params----------------------------------------------------------- beta_draws <- extract_imugap(fit_sim, pars = "beta_bs") str(beta_draws) ## ----trace-plot--------------------------------------------------------------- bayesplot::mcmc_trace(fit_sim$stanfit, pars = c( "beta_bs[1]", "beta_bs[2]", "beta_bs[3]", "beta_bs[4]", "beta_bs[5]" ) ) + theme_bw() + theme(legend.position = c(.9, .1), legend.justification = c(1, 0)) ## ----target------------------------------------------------------------------- target_sim <- create_target( fit = fit_sim, location = unique(locations_sim$loc_id), age = 1:18, cohort = max(populations_sim$cohort) - 18, dose = c(1, 2), mode = "snapshot" ) head(target_sim) ## ----predict-code, eval = FALSE----------------------------------------------- # predict_sim <- predict(object = fit_sim, target = target_sim, posterior_size = 100) ## ----load-predictions--------------------------------------------------------- data("predict_sim", package = "imuGAP") ## ----summarize-predictions---------------------------------------------------- # Calculate the posterior mean coverage probability for each location and dose at age 5 summary_predict <- summary(predict_sim) head(summary_predict) ## ----state-viz---------------------------------------------------------------- summary_predict |> subset(loc_id == "State" & dose == 2 & age > 4) |> ggplot() + aes(x = age) + geom_line(aes(y = q50)) + geom_ribbon(aes(ymin = q2_5, ymax = q97_5), alpha = 0.5) + theme_bw() + scale_x_continuous(breaks = 5:18, minor_breaks = NULL) + labs(x = "Age", y = "Coverage", title = "State-level two dose coverage") ## ----county-viz--------------------------------------------------------------- summary_predict |> subset(loc_id %in% c("Scruggs", "Simone", "Watson") & dose == 2 & age > 4) |> ggplot() + aes(x = age) + geom_line(aes(y = q50, color = loc_id)) + geom_ribbon(aes(ymin = q2_5, ymax = q97_5, fill = loc_id), alpha = 0.2) + theme_bw() + theme( legend.position = "inside", legend.position.inside = c(.2, 0.05), legend.justification.inside = c(0, 0) ) + scale_x_continuous(breaks = 5:18, minor_breaks = NULL) + scale_color_discrete(NULL, aesthetics = c("color", "fill")) + labs( x = "Age", y = "Coverage", title = "County-level two dose coverage" ) ## ----grade-viz---------------------------------------------------------------- scruggs_schools <- locations_sim[parent_id == "Scruggs", loc_id] summary_predict |> subset( loc_id %in% scruggs_schools & dose == 2 & age > 4 ) |> ggplot() + geom_boxplot(aes(x = factor(age), y = q50)) + theme_bw() + labs( x = "Age", y = "Coverage", title = "Distribution of School-Level Coverage For Scruggs County" ) ## ----------------------------------------------------------------------------- schools <- c( "Towhee Children's Academy", # ~380 per grade "Flycatcher Elementary", # ~110 per grade "Sparrow School" # ~60 per grade ) # Subset to targets of interest (all retained posterior draws) predict_sub <- predict_sim |> subset(loc_id %in% schools & dose == 2 & age > 4) # Get the pre-computed background coverage matching the subsetted target latent_ref <- copy(predict_sub$target) latent_ref$coverage <- latent_params_sim$coverage[predict_sub$target$obs_id] # Convert predictions to a long-format data.frame draws_df <- as.data.frame(predict_sub) # Now plot it all ggplot() + aes(age, coverage, color = loc_id) + geom_point( data = draws_df, alpha = 1 / 256, shape = 16, position = position_jitterdodge( dodge.width = 0.5, jitter.width = 0.15 ) ) + geom_point( data = latent_ref, mapping = aes(shape = "True value"), fill = NA ) + theme_bw() + scale_shape_manual( name = "", values = c("True value" = 24) ) + scale_color_discrete(NULL, aesthetics = c("color", "fill")) + scale_x_continuous(breaks = 5:18, minor_breaks = NULL) + theme(legend.position = "bottom") + labs(color = "School", x = "Age", y = "Coverage")