spatialAtomizeR implements atom-based Bayesian
regression methods (ABRM) for spatial data with misaligned grids. This
vignette demonstrates the basic workflow for analyzing misaligned
spatial data using both simulated and real-world examples.
This example demonstrates the complete workflow using simulated data.
Generate spatial data with misaligned grids. The function creates two non-overlapping grids (X-grid and Y-grid) and their intersection (atoms).
sim_data <- simulate_misaligned_data(
seed = 42,
# Distribution specifications for covariates
dist_covariates_x = c('normal', 'poisson', 'binomial'),
dist_covariates_y = c('normal', 'poisson', 'binomial'),
dist_y = 'poisson', # Outcome distribution
# Intercepts for data generation (REQUIRED)
x_intercepts = c(4, -1, -1), # One per X covariate
y_intercepts = c(4, -1, -1), # One per Y covariate
beta0_y = -1, # Outcome model intercept
# Spatial correlation parameters
x_correlation = 0.5, # Correlation between X covariates
y_correlation = 0.5, # Correlation between Y covariates
# True effect sizes for outcome model
beta_x = c(-0.03, 0.1, -0.2), # Effects of X covariates
beta_y = c(0.03, -0.1, 0.2) # Effects of Y covariates
)The simulated data object contains: - gridx: X-grid
spatial features with covariates - gridy: Y-grid spatial
features with covariates and outcome - atoms: Atom-level
spatial features (intersection of grids) - true_params:
True parameter values used in simulation
The simulate_misaligned_data function returns an object
of class misaligned_data with useful S3 methods:
# Check the class
class(sim_data)
#> [1] "misaligned_data"
# Print method shows basic information
print(sim_data)
#> Simulated Misaligned Spatial Data
#> ==================================
#> Y-grid cells: 25
#> X-grid cells: 30
#> Atoms: 35
#> Number of X covariates: 3
#> Number of Y covariates: 3
#> True parameters available
# Summary method shows more details
summary(sim_data)
#> Simulated Misaligned Spatial Data
#> ==================================
#> Y-grid cells: 25
#> X-grid cells: 30
#> Atoms: 35
#> Number of X covariates: 3
#> Number of Y covariates: 3
#> True parameters available
#> True beta_x: -0.03, 0.1, -0.2
#> True beta_y: 0.03, -0.1, 0.2
# Default: overlay both grids to visualise misalignment
plot(sim_data)Retrieve the pre-compiled NIMBLE model specification:
Fit the ABRM model. You must specify which covariates follow which distributions by providing their indices:
abrm_results <- run_abrm(
gridx = sim_data$gridx,
gridy = sim_data$gridy,
atoms = sim_data$atoms,
model_code = model_code,
true_params = sim_data$true_params, # Optional: for validation
# Map distribution indices to positions in dist_covariates_x/y
norm_idx_x = 1, # 'normal' is 1st in dist_covariates_x
pois_idx_x = 2, # 'poisson' is 2nd
binom_idx_x = 3, # 'binomial' is 3rd
norm_idx_y = 1, # Same for Y covariates
pois_idx_y = 2,
binom_idx_y = 3,
# Outcome distribution: 1=normal, 2=poisson, 3=binomial
dist_y = 2,
# MCMC parameters
niter = 50000, # Total iterations per chain
nburnin = 30000, # Burn-in iterations
nchains = 2, # Number of chains
seed = 123,
compute_waic = TRUE
)The run_abrm function returns an object of class
abrm with S3 methods:
# Check the class
class(abrm_results)
#> [1] "abrm"
# Print method shows summary statistics
print(abrm_results)
#>
#> ABRM Model Results
#> ==================
#> Parameters estimated: 7
#> Mean absolute bias: 0.2402
#> Coverage rate: 100 %
#>
#> Use summary() for the full parameter table.
# Summary method shows detailed parameter estimates
summary(abrm_results)
#> ABRM Model Summary
#> ==================
#> Parameters estimated: 7
#> Mean absolute bias: 0.2402
#> Coverage rate: 100 %
#>
#> variable estimated_beta std_error ci_lower ci_upper true_beta bias
#> intercept -0.54060 1.3053 -2.70688 1.8998 -1.00 0.45940
#> covariate_x_1 0.06764 0.1474 -0.21282 0.3599 -0.03 0.09764
#> covariate_x_2 0.24491 0.4059 -0.58058 1.0313 0.10 0.14491
#> covariate_x_3 -0.68932 0.8488 -2.29887 1.0312 -0.20 -0.48932
#> covariate_y_1 0.20855 0.1427 -0.07177 0.4874 0.03 0.17855
#> covariate_y_2 0.04571 0.6241 -1.17400 1.2215 -0.10 0.14571
#> covariate_y_3 0.03415 0.7763 -1.39218 1.5496 0.20 -0.16585
#> relative_bias within_ci
#> -45.94 TRUE
#> -325.47 TRUE
#> 144.91 TRUE
#> 244.66 TRUE
#> 595.17 TRUE
#> -145.71 TRUE
#> -82.93 TRUE
# Plot method shows MCMC diagnostics (if available)
plot(abrm_results)# Get variance-covariance matrices
vcov(abrm_results)
#>
#> Variance-Covariance Matrices for ABRM Model
#> ============================================
#>
#> Intercept Variance:
#> beta_0_y: 1.703773
#> X-Grid Coefficients Variance-Covariance Matrix:
#> Dimensions: 3 x 3
#> covariate_x_1 covariate_x_2 covariate_x_3
#> covariate_x_1 0.021714 -0.014287 0.036561
#> covariate_x_2 -0.014287 0.164777 -0.069159
#> covariate_x_3 0.036561 -0.069159 0.720543
#>
#> Y-Grid Coefficients Variance-Covariance Matrix:
#> Dimensions: 3 x 3
#> covariate_y_1 covariate_y_2 covariate_y_3
#> covariate_y_1 0.020363 -0.011530 -0.020613
#> covariate_y_2 -0.011530 0.389514 -0.084022
#> covariate_y_3 -0.020613 -0.084022 0.602699
#>
#> Use vcov_object$vcov_beta_x, vcov_object$vcov_beta_y, etc. to access matrices
# Test coef()
coef(abrm_results)
#> beta_0_y covariate_x_1 covariate_x_2 covariate_x_3 covariate_y_1
#> -0.54060357 0.06764204 0.24491310 -0.68931577 0.20855084
#> covariate_y_2 covariate_y_3
#> 0.04571204 0.03414577
# Test confint()
confint(abrm_results)
#> CI.Lower CI.Upper
#> intercept -2.70688305 1.8998461
#> covariate_x_1 -0.21281539 0.3598648
#> covariate_x_2 -0.58057793 1.0312572
#> covariate_x_3 -2.29886717 1.0311990
#> covariate_y_1 -0.07176605 0.4874385
#> covariate_y_2 -1.17400383 1.2215393
#> covariate_y_3 -1.39217558 1.5496064
# Test parm subsetting
confint(abrm_results, parm = "covariate_x_1")
#> CI.Lower CI.Upper
#> covariate_x_1 -0.2128154 0.3598648
# Test fitted()
fitted(abrm_results)
#> y_grid_id observed fitted residual
#> 1 3 31 30.2001 0.7999
#> 2 4 18 18.8310 -0.8310
#> 3 5 34 33.5033 0.4967
#> 4 8 11 12.7122 -1.7122
#> 5 9 34 32.6033 1.3967
#> 6 10 42 38.8583 3.1417
#> 7 13 22 22.9119 -0.9119
#> 8 14 16 17.2410 -1.2410
#> 9 15 10 11.9866 -1.9866
#> 10 18 56 53.6539 2.3461
#> 11 19 36 33.7592 2.2408
#> 12 20 17 16.9202 0.0798
#> 13 21 42 39.5999 2.4001
#> 14 24 18 17.4957 0.5043
#> 15 25 35 35.0852 -0.0852
#> 16 1 19 46.1751 -27.1751
#> 17 2 39 37.5169 1.4831
#> 18 6 43 62.4962 -19.4962
#> 19 7 68 52.0277 15.9723
#> 20 11 38 41.1438 -3.1438
#> 21 12 63 47.8334 15.1666
#> 22 16 40 74.6093 -34.6093
#> 23 17 56 60.5217 -4.5217
#> 24 22 67 95.8024 -28.8024
#> 25 23 50 55.6886 -5.6886
# waic_abrm Objects
w <- waic(abrm_results)
print(w) # shows WAIC, lppd, pWAIC, n_params
#> WAIC for ABRM
#> =============
#> WAIC : 1071.471
#> lppd : -470.611 (log pointwise predictive density)
#> pWAIC : 65.125 (effective number of parameters)
#>
#> Note: lower WAIC indicates better predictive fit.
#> Compare only models fitted on identical data.
w$waic # the raw scalar if you need it for comparison
#> [1] 1071.471This example demonstrates using real spatial data from Utah, matching the analysis in the manuscript.
set.seed(500)
# Load Utah counties (Y-grid)
cnty <- counties(state = 'UT')
#> Retrieving data for the year 2024
#> | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
gridy <- cnty %>%
rename(ID = GEOID) %>%
mutate(ID = as.numeric(ID)) # ID must be numeric
# Load Utah school districts (X-grid)
scd <- school_districts(state = 'UT')
#> Retrieving data for the year 2024
#> | | | 0% | |= | 1% | |= | 2% | |== | 3% | |== | 4% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 7% | |====== | 8% | |====== | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 53% | |====================================== | 54% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 66% | |=============================================== | 67% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |=============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
gridx <- scd %>%
rename(ID = GEOID) %>%
mutate(ID = as.numeric(ID))# Bundle grids into a misaligned_data object
utah_mis <- structure(
list(gridy = gridy, gridx = gridx),
class = "misaligned_data"
)
# Default overlay plot
plot(utah_mis, color_x = "orange", color_y = "blue", title = "Spatial Misalignment in Utah")
# You can also inspect each grid separately
plot(utah_mis, which = "gridy", title = "Utah Counties")ABRM requires population counts at the atom level. Here we use LandScan gridded population data:
# NOTE: You must download LandScan data separately
# Available at: https://landscan.ornl.gov/
# This example assumes the file is in your working directory
pop_raster <- raster("landscan-global-2022.tif")
# Extract population for each atom
pop_atoms <- raster::extract(
pop_raster,
st_transform(atoms, crs(pop_raster)),
fun = sum,
na.rm = TRUE
)
atoms$population <- pop_atomsFor demonstration, we generate synthetic outcome and covariate data at the atom level:
# Create atom-level spatial adjacency matrix
W_a <- nb2mat(
poly2nb(as(atoms, "Spatial"), queen = TRUE),
style = "B",
zero.policy = TRUE
)
# Generate spatial random effects
atoms <- atoms %>%
mutate(
re_x = gen_correlated_spat(W = W_a, n_vars = 1, correlation = 1, rho = 0.8),
re_z = gen_correlated_spat(W = W_a, n_vars = 1, correlation = 1, rho = 0.8),
re_y = gen_correlated_spat(W = W_a, n_vars = 1, correlation = 1, rho = 0.8)
)
# Generate atom-level covariates and outcome
atoms <- atoms %>%
mutate(
# X-grid covariate (Poisson)
covariate_x_1 = rpois(
n = nrow(atoms),
lambda = population * exp(-1 + re_x)
),
# Y-grid covariate (Normal)
covariate_y_1 = rnorm(
n = nrow(atoms),
mean = population * (3 + re_z),
sd = 1 * population
)
) %>%
mutate(
# Outcome (Poisson)
y = rpois(
n = nrow(atoms),
lambda = population * exp(
-5 +
1 * (covariate_x_1 / population) -
0.3 * (covariate_y_1 / population) +
re_y
)
)
)In reality, we only observe aggregated data at the grid level, not at atoms:
# Aggregate X covariates to X-grid
gridx[["covariate_x_1"]] <- sapply(gridx$ID, function(j) {
atom_indices <- which(atoms$ID_x == j)
sum(atoms[["covariate_x_1"]][atom_indices])
})
# Aggregate Y covariates to Y-grid
gridy[["covariate_y_1"]] <- sapply(gridy$ID, function(j) {
atom_indices <- which(atoms$ID_y == j)
sum(atoms[["covariate_y_1"]][atom_indices])
})
# Aggregate outcome to Y-grid
gridy$y <- sapply(gridy$ID, function(j) {
atom_indices <- which(atoms$ID_y == j)
sum(atoms$y[atom_indices])
})model_code <- get_abrm_model()
mcmc_results <- run_abrm(
gridx = gridx,
gridy = gridy,
atoms = atoms,
model_code = model_code,
# Specify covariate distributions
pois_idx_x = 1, # X covariate is Poisson
norm_idx_y = 1, # Y covariate is Normal
dist_y = 2, # Outcome is Poisson
# MCMC settings (increase for real analysis)
niter = 300000,
nburnin = 200000,
nchains = 2,
seed = 123,
compute_waic = TRUE
)When you specify covariates, the indices correspond to their positions:
dist_covariates_x = c('normal', 'poisson', 'binomial')
# ^1 ^2 ^3
# So you would specify:
norm_idx_x = 1 # First position
pois_idx_x = 2 # Second position
binom_idx_x = 3 # Third position| Code | Distribution | String | Use Case |
|---|---|---|---|
| 1 | Normal | ‘normal’ | Continuous data |
| 2 | Poisson | ‘poisson’ | Count/rate data |
| 3 | Binomial | ‘binomial’ | Proportion/binary data |
?simulate_misaligned_data,
?run_abrm?spatialAtomizeR