--- title: "GammaFrailty: Gamma Frailty Regression Models with Multiple Baseline Distributions" author: "Shikhar Tyagi" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: true toc_depth: 3 number_sections: true vignette: > %\VignetteIndexEntry{GammaFrailty: Gamma Frailty Regression Models} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, warning = FALSE, message = FALSE ) ``` # Introduction The **GammaFrailty** package implements univariate gamma frailty regression models for survival data under six different baseline distributions: | Model | Parameter(s) | Reference | |-------|-------------|-----------| | Arvind | alpha | Pandey et al. (2024) | | Lindley | lambda | Lindley (1958) | | Linear Failure Rate (LFR) | a, b | Bain (1974) | | Power Xgamma (PXG) | alpha, beta | Tyagi et al. (2022) | | Modified Topp-Leone (MTL) | alpha | Singh et al. (2025) | | Power Failure Rate (PFR) | a, k | Mugdadi (2005) | ## The Gamma Frailty Framework Let $T_j$ be the survival time for the $j$-th subject and $Z$ be an unobservable frailty variable drawn from a Gamma distribution with mean 1 and variance $\theta$. Conditional on $Z = z$, the hazard is $$h(t \mid z) = z \, h_0(t) \, e^{\mathbf{X}_j \boldsymbol{\beta}}.$$ After marginalising over $Z$, the unconditional survival function becomes $$S(t_j) = \left[1 + \theta \, \eta_j \, H_0(t_j)\right]^{-1/\theta},$$ where $\eta_j = e^{\mathbf{X}_j \boldsymbol{\beta}}$ and $H_0(t)$ is the baseline cumulative hazard. When no covariates are present, $\eta_j = 1$. --- # Installation ```r # Install from local source install.packages( "path/to/GammaFrailty", repos = NULL, type = "source" ) ``` --- # Random Number Generation Each baseline distribution has its own `r_*` function, and the full frailty model generator is `r_gamma_frailty()`. ```{r rng} library(GammaFrailty) set.seed(42) # Arvind (alpha = 0.5) x_arvind <- r_arvind(200, alpha = 0.5) # Lindley (lambda = 1.5) x_lindley <- r_lindley(200, lambda = 1.5) # LFR (a = 0.5, b = 0.2) x_lfr <- r_lfr(200, a = 0.5, b = 0.2) # Power Xgamma (alpha = 1, beta = 0.8) x_pxg <- r_pxg(100, alpha = 1.0, beta = 0.8) # Modified Topp-Leone (alpha = 2) x_mtl <- r_mtl(100, alpha = 2.0) # Power Failure Rate (a = 0.5, k = 1) x_pfr <- r_pfr(200, a = 0.5, k = 1.0) summary(x_arvind) ``` ### Plot baseline distributions ```{r baseline_plot} plot_baseline("arvind", par = 0.5, t_range = c(0.01, 3)) ``` --- # Simulating Frailty Survival Data `r_gamma_frailty()` supports eight censoring mechanisms. ```{r simulate} set.seed(1) # Right-censored data with one covariate dat_right <- r_gamma_frailty( n = 150, baseline = "arvind", par = 0.5, x = matrix(rnorm(150), ncol = 1), beta = 0.5, theta = 0.3, cen_type = "right", cen_rate = 0.25 ) colnames(dat_right)[4] <- "X1" cat("Censoring rate:", mean(dat_right$status == 0), "\n") head(dat_right) ``` ```{r simulate_interval} set.seed(2) # Interval-censored, no covariates dat_int <- r_gamma_frailty( n = 100, baseline = "lfr", par = c(0.5, 0.2), theta = 0.4, cen_type = "interval", int_width = 0.4 ) table(dat_int$status) ``` --- # Fitting Models ## No-covariate model (complete data) ```{r fit_no_cov} set.seed(3) dat_complete <- r_gamma_frailty(120, "arvind", par = 0.5, theta = 0.3, cen_type = "none") fit0 <- fit_gamma_frailty(dat_complete$time, dat_complete$status, baseline = "arvind") summary(fit0) ``` ## Model with covariates (right-censored) ```{r fit_with_cov} fit1 <- fit_gamma_frailty(dat_right$time, dat_right$status, x = dat_right[, "X1", drop = FALSE], baseline = "arvind") summary(fit1) ``` ## Formula interface ```{r formula_interface} library(survival) fit_formula <- gamma_frailty( Surv(time, status) ~ X1, data = dat_right, baseline = "arvind" ) summary(fit_formula) ``` --- # Inference and Model Metrics ```{r inference} # Coefficients with CI coef(fit1) confint(fit1, level = 0.95) # Log-likelihood and information criteria logLik(fit1) AIC(fit1) cat("BIC:", fit1$BIC, "\n") cat("Frailty variance (theta):", fit1$theta, " SE:", fit1$theta_se, "\n") ``` ### VIF and Tolerance ```{r vif} # Generate data with 2 covariates to illustrate VIF set.seed(4) dat2 <- r_gamma_frailty(150, "arvind", par = 0.5, x = matrix(rnorm(300), ncol = 2), beta = c(0.4, -0.3), theta = 0.3, cen_type = "right") fit2 <- fit_gamma_frailty(dat2$time, dat2$status, x = dat2[, 4:5], baseline = "arvind") cat("VIF:\n"); print(fit2$VIF) cat("Tolerance:\n"); print(fit2$Tolerance) ``` --- # Residuals and Diagnostics ```{r residuals} res <- residuals_frailty(fit1) cat("MSE:", round(res$MSE, 4), " RMSE:", round(res$RMSE, 4), " MAE:", round(res$MAE, 4), "\n") cat("R-squared:", round(res$R_square, 4), " Adj R-sq:", round(res$Adj_R_square, 4), "\n") cat("KS test p-value:", round(res$KS_pvalue, 4), "\n") ``` ```{r diag_table} dt <- diagnostics_table(fit1) head(dt[, c("time","status","deviance","leverage","cooks_dist","DFFITS")]) ``` --- # Diagnostic Plots ## Core residual plots ```{r plot_core, fig.height=9} oldpar <- par(mfrow = c(2, 2)) plot_residuals_fitted(fit1) plot_qq_residuals(fit1) plot_scale_location(fit1) plot_residuals_leverage(fit1) par(oldpar) ``` ## Survival and influence plots ```{r plot_surv_inf, fig.height=5} plot_survival_km(fit1) ``` ```{r plot_forest} plot_coef_forest(fit1) ``` ```{r plot_leverage_hist} plot_leverage(fit1) ``` ```{r plot_dfbetas} plot_dfbetas(fit1) ``` --- # Prediction ```{r predict_surv} # Survival at specific time points survival_at(fit1, times = c(0.5, 1.0, 2.0)) |> head(6) ``` ```{r predict_median} # Median survival times med <- predict_frailty(fit1, type = "median") summary(med) ``` ```{r predict_risk} # Risk scores (eta = exp(X * beta)) risk <- predict_frailty(fit1, type = "risk") summary(risk) ``` ```{r predict_marginal} # Population-averaged (marginal) survival t_grid <- seq(0.1, 3, by = 0.2) ms <- predict_frailty(fit1, newtime = t_grid, type = "marginal") plot(t_grid, ms, type = "l", lwd = 2, col = "steelblue", xlab = "Time", ylab = "Marginal S(t)", main = "Marginal Survival") ``` ```{r predict_expected} # Expected survival drop in [0.5, 1.5] es <- predict_frailty(fit1, type = "expected", window = c(0.5, 1.5)) summary(es) ``` ```{r forecast} # Forecast beyond training range fc <- forecast_frailty(fit1, horizon = 6, n_grid = 100) plot(fc$time, fc$survival[1, ], type = "l", lwd = 2, col = "firebrick", xlab = "Time", ylab = "S(t)", main = "Forecast: Subject 1") ``` --- # Model Comparison ```{r compare} set.seed(5) dat_cmp <- r_gamma_frailty(120, "arvind", par = 0.5, theta = 0.3, cen_type = "right", cen_rate = 0.2) fits <- list( Arvind = fit_gamma_frailty(dat_cmp$time, dat_cmp$status, baseline = "arvind"), Lindley = fit_gamma_frailty(dat_cmp$time, dat_cmp$status, baseline = "lindley"), LFR = fit_gamma_frailty(dat_cmp$time, dat_cmp$status, baseline = "lfr"), PFR = fit_gamma_frailty(dat_cmp$time, dat_cmp$status, baseline = "pfr") ) compare_models(fits) ``` --- # Cross-Validation ```{r cv, eval=FALSE} cv_res <- cv_frailty(dat_cmp$time, dat_cmp$status, baseline = "arvind", k = 5) cat("Mean OOS log-lik:", round(cv_res$mean_oos_loglik, 3), "\n") cat("Mean OOS RMSE: ", round(cv_res$mean_oos_rmse, 3), "\n") ``` --- # Censored Data - All Types ## Left censoring ```{r left_cen} set.seed(6) dat_left <- r_gamma_frailty(100, "pfr", par = c(0.5, 1), theta = 0.35, cen_type = "left", left_threshold = 0.3) table(dat_left$status) fit_left <- fit_gamma_frailty(dat_left$time, dat_left$status, baseline = "pfr") summary(fit_left) ``` ## Interval censoring ```{r int_cen} fit_int <- fit_gamma_frailty(dat_int$time, dat_int$status, time2 = dat_int$time2, baseline = "lfr") summary(fit_int) ``` ## Type-I censoring (fixed time) ```{r type1_cen} set.seed(8) dat_t1 <- r_gamma_frailty(100, "arvind", par = 0.5, theta = 0.3, cen_type = "type1", cen_time = 2.0) table(dat_t1$status) # 0 = censored at time 2.0 fit_t1 <- fit_gamma_frailty(dat_t1$time, dat_t1$status, baseline = "arvind") summary(fit_t1) ``` ## Type-II censoring (fixed failure count) ```{r type2_cen} set.seed(9) dat_t2 <- r_gamma_frailty(100, "pfr", par = c(0.5, 1), theta = 0.3, cen_type = "type2", r_failures = 70L) table(dat_t2$status) # exactly 70 status=1 events fit_t2 <- fit_gamma_frailty(dat_t2$time, dat_t2$status, baseline = "pfr") summary(fit_t2) ``` ## Progressive Type-I censoring (withdrawals at fixed times) ```{r prog_type1_cen} set.seed(10) dat_pt1 <- r_gamma_frailty(120, "lindley", par = 1.5, theta = 0.4, cen_type = "progressive_type1", prog_times = c(0.5, 1.0, 1.5), prog_scheme = c(5L, 5L, 5L)) table(dat_pt1$status) # 0 = withdrawn at tau_j, 1 = exact failure fit_pt1 <- fit_gamma_frailty(dat_pt1$time, dat_pt1$status, baseline = "lindley") summary(fit_pt1) ``` --- # Simulation Study ```{r simulation, eval=FALSE} set.seed(99) sim_res <- simulation_study( n_sim = 200, n_vec = c(50, 100, 200), baseline = "arvind", par = 0.5, beta = 0.4, theta = 0.3, cen_type = "right", cen_rate = 0.2, verbose = TRUE ) ``` --- # References 1. Lindley, D. V. (1958). Fiducial distributions and Bayes' theorem. Journal of the Royal Statistical Society. Series B (Methodological), 102-107. 2. Mugdadi, A. R. (2005). The least squares type estimation of the parameters in the power hazard function. Applied mathematics and computation, 169(2), 737-748. 3. Bain, L. J. (1974). Analysis for the linear failure-rate life-testing distribution. Technometrics, 16(4), 551-559. 4. Singh, B., Tyagi, S., Singh, R. P., & Tyagi, A. (2025). Modified Topp-Leone distribution: properties, classical and Bayesian estimation with application to COVID-19 and reliability data. Thailand Statistician, 23(1), 72-96. 5. Pandey, A., Singh, R. P., Tyagi, S., & Tyagi, A. (2024). Modelling climate, COVID-19, and reliability data: A new continuous lifetime model under different methods of estimation. Stat. Appl., 22(2). 6. Tyagi, S., Kumar, S., Pandey, A., Saha, M., & Bagariya, H. Power xgamma distribution: Properties and its applications to cancer data. Int J Stat Reliab Eng. 2022; 9(1): 51-60.