From raw data to model with tidyILD

This vignette walks through the full tidyILD pipeline: prepare data, inspect structure, apply within-between decomposition and lags, fit a mixed-effects model, and run diagnostics and plots.

Simulate and prepare

library(tidyILD)
# Simulate simple ILD
d <- ild_simulate(n_id = 10, n_obs_per = 12, irregular = TRUE, seed = 42)
# Prepare: encode time structure and add .ild_* columns
x <- ild_prepare(d, id = "id", time = "time", gap_threshold = 7200)

Inspect

ild_summary(x)
#> $summary
#> # A tibble: 1 × 7
#>    n_id n_obs time_min time_max prop_gap median_dt_sec iqr_dt_sec
#>   <int> <int>    <dbl>    <dbl>    <dbl>         <dbl>      <dbl>
#> 1    10   120        0   40136.        0         3612.       640.
#> 
#> $n_units
#> [1] 10
#> 
#> $n_obs
#> [1] 120
#> 
#> $time_range
#> [1]     0 40136
#> 
#> $spacing
#> $spacing$median_dt
#> [1] 3612.244
#> 
#> $spacing$iqr_dt
#> [1] 640.2958
#> 
#> $spacing$n_intervals
#> [1] 110
#> 
#> $spacing$pct_gap
#> [1] 0
#> 
#> $spacing$by_id
#> # A tibble: 10 × 5
#>       id median_dt iqr_dt n_intervals pct_gap
#>    <int>     <dbl>  <dbl>       <int>   <dbl>
#>  1     1     3627.   549.          11       0
#>  2     2     3702.   854.          11       0
#>  3     3     3617.   740.          11       0
#>  4     4     3321.   702.          11       0
#>  5     5     3616.   588.          11       0
#>  6     6     3438.   481.          11       0
#>  7     7     3767.   816.          11       0
#>  8     8     3591.   499.          11       0
#>  9     9     3609.   194.          11       0
#> 10    10     3700.   521.          11       0
#> 
#> 
#> $n_gaps
#> [1] 0
#> 
#> $pct_gap
#> [1] 0
ild_spacing_class(x)
#> [1] "regular-ish"

Within-person centering and lags

x <- ild_center(x, y)
x <- ild_lag(x, y, mode = "gap_aware", max_gap = 7200)

Fit a model

Without residual autocorrelation (lmer):

fit0 <- ild_lme(y ~ 1 + (1 | id), data = x, ar1 = FALSE, warn_no_ar1 = FALSE)

With AR1 residual correlation (nlme):

fit1 <- ild_lme(y ~ 1, data = x, ar1 = TRUE, correlation_class = "CAR1")

Diagnostics and plots

diag <- ild_diagnostics(fit1, data = x)
names(diag)  # meta, data, stats
#> [1] "meta"  "data"  "stats"
names(plot_ild_diagnostics(diag))  # plot names for requested types
#> [1] "residual_acf"        "residuals_vs_fitted" "residuals_vs_time"  
#> [4] "qq"
# Pooled residual ACF (tibble)
head(diag$stats$acf$pooled)
#> # A tibble: 6 × 2
#>     lag      acf
#>   <dbl>    <dbl>
#> 1     0  1      
#> 2     1  0.121  
#> 3     2 -0.00832
#> 4     3 -0.113  
#> 5     4 -0.0886 
#> 6     5 -0.147
# By-id ACF when by_id = TRUE: one tibble per person
names(diag$stats$acf$by_id)
#>  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10"
head(diag$stats$acf$by_id[[1]])
#> # A tibble: 6 × 2
#>     lag     acf
#>   <dbl>   <dbl>
#> 1     0  1     
#> 2     1  0.361 
#> 3     2 -0.0851
#> 4     3  0.149 
#> 5     4  0.196 
#> 6     5  0.0752

ild_plot(x, type = "trajectory", var = "y", max_ids = 5)

Trajectory plot

ild_plot(fit1, type = "fitted")

Fitted vs observed

Reproducibility

Use a fixed seed when simulating or fitting models so results can be recreated. The pipeline is deterministic for a given seed and data. When saving results (e.g. after [ild_lme()] or [ild_diagnostics()]), you can attach a reproducibility manifest and save a single bundle with [ild_manifest()] and [ild_bundle()]:

# Optional: build a manifest with scenario and seed, then bundle the fit for saving
manifest <- ild_manifest(seed = 42, scenario = ild_summary(x), include_session = FALSE)
bundle <- ild_bundle(fit1, manifest = manifest, label = "model_ar1")
# saveRDS(bundle, "run.rds")  # one file with result + manifest + label
names(bundle)
#> [1] "result"   "manifest" "label"