\documentclass[8pt]{extarticle} %\VignetteIndexEntry{This document presents a series of vignettes for the models available in SemiCompRisks package.} %\VignetteEngine{R.rsp::tex} \usepackage{hyperref} \usepackage[T1]{fontenc} \usepackage{amsmath} \usepackage{bbm} \usepackage{listings}% http://ctan.org/pkg/listings \lstset{ basicstyle=\ttfamily, mathescape } \usepackage[top=0.5in, bottom=.25in, left=.35in, right=.35in]{geometry} \usepackage{amstext} \usepackage{amssymb} \DeclareMathOperator{\Res}{Res} \usepackage{booktabs,dcolumn} %? \usepackage{multicol} \usepackage{xcolor} \usepackage{longtable} \usepackage[libertine]{newtxmath} \usepackage{fancyhdr} \pagestyle{empty} \fancyhf{} \newlength{\remaining} \newcommand{\titleline}[1]{% \setlength{\remaining}{\textwidth-\widthof{\textsc{#1}}} \noindent\underline{\textsc{#1}\hspace*{\remaining}}\par} \begin{document} \begin{enumerate} \item {\large\texttt{BayesSurv\_HReg} : independent, univariate time-to-event data fit to a Cox PH model with Weibull baseline hazard} \item {\large\texttt{BayesSurv\_HReg} : independent, univariate time-to-event data fit to a Cox PH model with PEM baseline hazard} \item {\large\texttt{BayesSurv\_AFT} : independent, univariate time-to-event data fit to an AFT model with LN baseline survival distribution} \item {\large\texttt{BayesSurv\_AFT} : independent, univariate time-to-event data fit to an AFT model with DPM baseline survival distribution} \item {\large\texttt{BayesSurv\_HReg} : cluster-correlated, univariate time-to-event data fit to a Cox PH model with Weibull baseline hazard} \item {\large\texttt{BayesSurv\_HReg} : cluster-correlated, univariate time-to-event data fit to a Cox PH model with PEM baseline hazard} \item {\large\texttt{BayesID\_HReg} : independent semi-competing risks data using an illness-death model with Weibull baseline hazards} \item {\large\texttt{BayesID\_HReg} : independent semi-competing risks data using an illness-death model with PEM baseline hazards} \item {\large\texttt{BayesID\_AFT} : independent semi-competing risks data using an AFT illness-death model with LN baseline survival distribution} \item {\large\texttt{BayesID\_AFT} : independent semi-competing risks data using an AFT illness-death model with DPM baseline survival distribution} \item {\large\texttt{BayesID\_HReg} : cluster-correlated semi-competing risks data using an illness-death model with Weibull baseline hazards} \item {\large\texttt{BayesID\_HReg} : cluster-correlated semi-competing risks data using an illness-death model with PEM baseline hazards} \end{enumerate} \newpage %%%%%%%%%%%%%%%%%% %%%%%%% 1 %%%%%%%%%%%%%%%%%% \par\hbox{{\large1. \texttt{BayesSurv\_HReg} }applied to independent, univariate time-to-event data fit to a Cox PH model with Weibull baseline hazard}\hrule \vspace{.5cm} \noindent{\large \textbf{Definition of the survival model}}\\ \noindent Let $t_i$ denote the time-to-event of interest for individuals $i=1,\dots,n$, subject to right censoring at time $c_i$. Let $(y_i,\delta_i,x_i)$ denote independent observations, where $y_i=\min\left(t_i,c_i \right)$, $\delta_i=\mathbbm{1}(t_i\le c_i)$, and $x_i$ is a vector of covariates for individual $i$. The following Cox proportional hazards model is assumed \begin{equation*} h(t_i|x_i) = h_0(t_i)\exp\left(x_i^\top \beta \right),~t_i>0, \end{equation*} where the baseline hazard $h_0$ is defined parametrically by a Weibull hazard, $h_0(t) = \alpha \kappa t^{\alpha - 1}$. %The Weibull parameters, $\alpha$ and $\kappa$, which are both positive real numbers control the shape and scale of the hazard function, respectively. The form of the hazard function shows that $\alpha=1$ corresponds to a constant risk of failure over time, $\alpha<1$ corresponds to a decrease in risk over time, and $\alpha>1$ corresponds to an increase in risk over time. The scaling effect of the parameter $\kappa$ can be seen by considering the expression for the median of the corresponding Weibull random variable which is given by $\left(\ln{2}/\kappa\right)^\alpha$. In the Bayesian framework, priors must be specified for the regression parameter, $\beta$, and the shape and scale parameters of baseline hazard function, $\alpha$ and $\kappa$, respectively. The following specifications are made \begin{align*} \pi(\beta) & \propto 1,\\ \pi(\alpha) & \sim Gamma(a,b),\\ \pi(\kappa) & \sim Gamma(c,d).\\ \end{align*} \noindent{\large\textbf{Hyperparameters}}\\ The hyperparameters $a$ and $b$ must be specified for the prior distribution of $\alpha$ which is a Gamma distribution with mean $ab$ and variance $ab^2$. Similarly, the hyperparameters $c$ and $d$ must be specified for the Gamma prior of $\kappa$.\\ \noindent{\large \textbf{Arguments to specify}}\\ \noindent\begin{tabular}{p{.12in} p{1.20in} p{5.85in}} \toprule \multicolumn{3}{l}{\textbf{Model-related}}\\ & \texttt{Formula} & a \texttt{Formula} object that corresponds to the hazard $h(t_i|x_i)$: $y+\delta \sim x$.\\ &\texttt{data} & an $(n\times q)$-dimensional data.frame; the $q$-columns correspond to $q$ covariate vectors named in the formula in \texttt{Formula}.\\[.12cm] \multicolumn{3}{l}{\textbf{Hyperparameters}}\\ &\texttt{WB.ab} & a 2-vector of positive hyperparameters $a$ and $b$ of the prior distribution for the shape parameter $\alpha$ of the Weibull baseline hazard. Example: \texttt{WB.ab <- c(0.5, 0.01)}. \\ &\texttt{WB.cd} & a 2-vector of positive hyperparameters $c$ and $d$ of the prior distribution for the scale parameter $\kappa$ of the Weibull baseline hazard. Example: \texttt{WB.cd <- c(0.5, 0.05)}. \\[.12cm] \multicolumn{3}{l}{\textbf{MCMC Settings}}\\ &\texttt{numReps} & total number of scans\\ &\texttt{thin} & extent of thinning, e.g. if \texttt{thin=10} retain every 10$^{th}$ sample. \\ &\texttt{burninPerc} & the proportion of burn-in (samples to be discarded before analyzing the data). \\ &\verb|mhProp_alpha_var| & the shape parameter $\alpha$ is updated using a Metropolis-Hastings random walk step generating proposals from a Gamma distribution with variance \verb|mhProp_alpha_var|. \\[.12cm] \multicolumn{3}{l}{\textbf{Starting Values}}\\ & \texttt{startValues}& use \texttt{initiate.startValues\_HReg(Formula, data, model, nChain, beta = NULL, WB.alpha = NULL, WB.kappa = NULL)} which initiates starting values for $\beta$, $\alpha$ and $\kappa$ in the Metropolis-Hastings algorithm if left unspecified. Users may set non-null starting values for any of these parameters. \\[.12cm] \multicolumn{3}{l}{\textbf{Storage}}\\ & \texttt{path} & name of the directory where results are stored. Can leave unspecified.\\ \bottomrule \end{tabular} % \rule{0pt}{0.5ex} & &\\ \vspace{.5cm} \noindent{\large \textbf{Implementation}} \begin{verbatim} data(survData) form <- Formula(time + event ~ cov1 + cov2) ## WB.ab <- c(0.5, 0.01) # prior parameters for alpha WB.cd <- c(0.5, 0.05) # prior parameters for kappa hyperParams <- list(WB=list(WB.ab=WB.ab, WB.cd=WB.cd)) ## numReps <- 2000 burninPerc <- 0.5 thin <- 10 mhProp_alpha_var <- 0.01 mcmc <- list(run=list(numReps=numReps, thin=thin, burninPerc=burninPerc), tuning=list(mhProp_alpha_var=mhProp_alpha_var)) ## myModel <- "Weibull" myPath <- "Output/01-Results-WB/" startValues <- initiate.startValues_HReg(form, survData, model=myModel, nChain=2) ## fit_WB <- BayesSurv_HReg(form, survData, id=NULL, model=myModel, hyperParams, startValues, mcmc, path=myPath) summary(fit_WB) pred_WB <- predict(fit_WB, tseq=seq(from=0, to=30, by=5)) plot(pred_WB, plot.est="Haz") plot(pred_WB, plot.est="Surv") \end{verbatim} \newpage %%%%%%%%%%%%%%%%%% %%%%%%% 2 %%%%%%%%%%%%%%%%%% \par\hbox{{\large2. \texttt{BayesSurv\_HReg} }applied to independent, univariate time-to-event data fit to a Cox PH model with PEM baseline hazard}\hrule \vspace{.5cm} \noindent{\large \textbf{Definition of the survival model}}\\ \noindent Let $t_i$ denote the time-to-event of interest for individuals $i=1,\dots,n$, subject to right censoring at time $c_i$. Let $(y_i,\delta_i,x_i)$ denote independent observations, where $y_i=\min\left(t_i,c_i \right)$, $\delta_i=\mathbbm{1}(t_i\le c_i)$, and $x_i$ is a vector of covariates for individual $i$. The following Cox proportional hazards model is assumed \begin{equation*} h(t_i|x_i) = h_0(t_i)\exp\left(x_i^\top \beta \right),~t_i>0. \end{equation*} The baseline hazard $h_0$ is defined non-parametrically by a mixture of piecewise exponential functions as follows $$\lambda_0(t) = \log{h_0(t)} = \displaystyle\sum_{k=1}^{K+1} \lambda_k \mathbbm{1}\left\{t\in (s_{k-1},s_k]\right\},$$ where $\lambda_k$ is constant and the time interval between 0 and the largest observed failure time, denoted $s_k$, is partitioned into $K+1$ disjoint intervals: $00. \end{equation*} We take $\epsilon_i$ to follow the Normal($\mu$, $\sigma^2$) distribution for $\epsilon_i$ for the parametric AFT model. In the Bayesian framework, priors must be specified for $\beta$, $\mu$, and $\sigma^2$. The following specifications are made \begin{align*} \pi(\beta, \mu) &\propto 1, \\ \sigma^2 &\sim Inverse-Gamma(a_{\sigma}, b_{\sigma}). \end{align*} \noindent{\large\textbf{Hyperparameters}}\\ The hyperparameters, $a_{\sigma}$ and $b_{\sigma}$, must be specified for the prior distribution of $\sigma^2$.\\ \noindent{\large \textbf{Arguments to specify}}\\ \noindent\begin{longtable}{p{.12in} p{1.20in} p{5.85in}} \toprule \multicolumn{3}{l}{\textbf{Model-related}}\\ & \texttt{Formula} & a \texttt{Formula} object that corresponds to $\log(t_i)$: $L | y_{L}+y_{U} \sim x$. \\ &\texttt{data} & an $(n\times q)$-dimensional data.frame; the $q$-columns correspond to $q$ covariate vectors named in the formula in \texttt{Formula}.\\[.12cm] \multicolumn{3}{l}{\textbf{Hyperparameters}}\\ &\texttt{LN.ab} & a 2-vector of positive hyperparameters $a$ and $b$ of the prior distribution for $\sigma^2$. Example: \texttt{LN.ab <- c(0.7,0.7)}. \\[.12cm] \multicolumn{3}{l}{\textbf{MCMC Settings}}\\ &\texttt{numReps} & total number of scans\\ &\texttt{thin} & extent of thinning, e.g. if \texttt{thin=10} retain every 10$^{th}$ sample. \\ &\texttt{burninPerc} & the proportion of burn-in (samples to be discarded before analyzing the data). \\ &\texttt{beta.prop.var} & the parameter $\beta$ is updated using a Metropolis-Hastings random walk step generating proposals from a Normal distribution with variance \verb|beta.prop.var|.\\ &\texttt{mu.prop.var} & the parameter $\mu$ is updated using a Metropolis-Hastings random walk step generating proposals from a Normal distribution with variance \verb|mu.prop.var|.\\ &\texttt{zeta.prop.var} & the parameter $\zeta=1/\sigma^2$ is updated using a Metropolis-Hastings random walk step generating proposals from a log-Normal distribution with variance \verb|zeta.prop.var|.\\[.12cm] \multicolumn{3}{l}{\textbf{Starting Values}}\\ & \texttt{startValues}& use \texttt{initiate.startValues\_AFT(Formula, data, model, nChain, beta = NULL, y = NULL, LN.mu = NULL, LN.sigSq = NULL)} which initiates all necessary starting values in the Metropolis-Hastings algorithm. Users may set non-null starting values for \texttt{beta}, \texttt{y}, \texttt{LN.mu}, \texttt{LN.sigSq}. \\[.12cm] \multicolumn{3}{l}{\textbf{Storage}}\\ & \texttt{path} & name of the directory where results are stored. Can leave unspecified.\\ \bottomrule \end{longtable} % \rule{0pt}{0.5ex} & &\\ \vspace{.5cm} \noindent{\large \textbf{Implementation}} \begin{verbatim} data(survData) survData$yL <- survData$yU <- survData[,1] survData$yU[which(survData[,2] == 0)] <- Inf survData$LT <- rep(0, dim(survData)[1]) form <- Formula(LT | yL + yU ~ cov1 + cov2) ## LN.ab <- c(0.3, 0.3) hyperParams <- list(LN=list(LN.ab=LN.ab)) ## numReps <- 1000 thin <- 10 burninPerc <- 0.5 beta.prop.var <- 0.01 mu.prop.var <- 0.1 zeta.prop.var <- 0.1 mcmcParams <- list(run=list(numReps=numReps, thin=thin, burninPerc=burninPerc), tuning=list(beta.prop.var=beta.prop.var, mu.prop.var=mu.prop.var, zeta.prop.var=zeta.prop.var)) ## myModel <- "LN" myPath <- "Output/01-Results-LN/" startValues <- initiate.startValues_AFT(form, survData, model=myModel, nChain=2) ## fit_LN <- BayesSurv_AFT(form, survData, model=myModel, hyperParams, startValues, mcmcParams, path=myPath) summary(fit_LN) pred_LN <- predict(fit_LN, time = seq(0, 35, 1), tseq=seq(from=0, to=30, by=5)) plot(pred_LN, plot.est="Haz") plot(pred_LN, plot.est="Surv") \end{verbatim} \newpage %%%%%%%%%%%%%%%%%% %%%%%%% 4 %%%%%%%%%%%%%%%%%% \par\hbox{{\large 4. \texttt{BayesSurv\_AFT} }applied to independent, univariate time-to-event data fit to an AFT model with DPM baseline survival distribution}\hrule \vspace{.5cm} \noindent{\large \textbf{Definition of the survival model}}\\ \noindent Let $t_i$ denote the time-to-event of interest for individuals $i=1,\dots,n$. Considering interval censoring, the time-to-event for the $i^{\textrm{th}}$ subject satisfies $c_{ij}\leq t_i 0, \end{equation*} where $\epsilon_i$ is assumed to be taken as draws from the DPM of normal distributions: \begin{align*} \epsilon_{i} | r_{i} &\sim \textrm{Normal}(\mu_{r_{i}}, \sigma_{r_{i}}^2),\\ (\mu_{r}, \sigma_{r}^2) &\sim G_{0}, \textrm{ for } r=1,\ldots,M, \\ r_{i}|p &\sim Discrete(r_{i} | p_{1},\ldots,p_{M}), \\ p &\sim Dirichlet(\tau/M, \ldots, \tau/M). \end{align*} In the Bayesian framework, priors must be specified for the unknown parameters. We take the $G_{0}$ as a normal distribution centered at $\mu_{0}$ with a variance $\sigma_{0}^2$ for $\mu_{r}$ and an inverse-Gamma($a_{\sigma}$, $b_{\sigma}$) for $\sigma_{r}^2$. For $\beta$, we adopt non-informative flat priors on the real line. Finally, we specify a Gamma($a_{\tau}$, $b_{\tau}$) hyperprior for the precision parameter $\tau$.\\ \noindent{\large\textbf{Hyperparameters}}\\ The hyperparameter ($\mu_{0}$, $\sigma_{0}^2$, $a_{\sigma}$, $b_{\sigma}$) must be specified for the centering distribution $G_0$, as well as $a_{\tau}$ and $b_{\tau}$, the rate and shape of the Gamma distributed hyperprior for $\tau$.\\ \noindent{\large \textbf{Arguments to specify}}\\ \noindent\begin{longtable}{p{.12in} p{1.20in} p{5.85in}} \toprule \multicolumn{3}{l}{\textbf{Model-related}}\\ & \texttt{Formula} & a \texttt{Formula} object that corresponds to $\log(t_i)$: $L | y_{L}+y_{U} \sim x$. \\ &\texttt{data} & an $(n\times q)$-dimensional data.frame; the $q$-columns correspond to $q$ covariate vectors named in the formula in \texttt{Formula}.\\[.12cm] \multicolumn{3}{l}{\textbf{Hyperparameters}}\\ &\texttt{DPM.mu} & a hyperparameter $\mu_{0}$ of the centering distribution $G_0$.\\ &\texttt{DPM.sigSq} & a positive-valued hyperparameter $\sigma_{0}^2$ of the centering distribution $G_0$.\\ &\texttt{DPM.ab} & a 2-vector of positive hyperparameters $a_{\sigma}$ and $b_{\sigma}$ of the centering distribution $G_0$.\\ &\texttt{Tau.ab} & a 2-vector of positive hyperparameters $a_{\tau}$ and $b_{\tau}$ of the hyperprior distribution for $\tau$. Example: \texttt{Tau.ab <- c(1.5, 0.0125)}. \\[.12cm] \multicolumn{3}{l}{\textbf{MCMC Settings}}\\ &\texttt{numReps} & total number of scans\\ &\texttt{thin} & extent of thinning, e.g. if \texttt{thin=10} retain every 10$^{th}$ sample. \\ &\texttt{burninPerc} & the proportion of burn-in (samples to be discarded before analyzing the data). \\ &\texttt{beta.prop.var} & the parameter $\beta$ is updated using a Metropolis-Hastings random walk step generating proposals from a Normal distribution with variance \verb|beta.prop.var|.\\ &\texttt{mu.prop.var} & the parameter $\mu_r$ is updated using a Metropolis-Hastings random walk step generating proposals from a Normal distribution with variance \verb|mu.prop.var|.\\ &\texttt{zeta.prop.var} & the parameter $\zeta_r=1/\sigma_r^2$ is updated using a Metropolis-Hastings random walk step generating proposals from a log-Normal distribution with variance \verb|zeta.prop.var|.\\[.12cm] \multicolumn{3}{l}{\textbf{Starting Values}}\\ & \texttt{startValues}& use \texttt{initiate.startValues\_AFT(Formula, data, model, nChain, beta = NULL, y = NULL, DPM.class = NULL, DPM.mu = NULL, DPM.zeta=NULL, DPM.tau=NULL)} which initiates all necessary starting values in the Metropolis-Hastings algorithm. Users may set non-null starting values for \texttt{beta}, \texttt{y}, \texttt{DPM.class}, \texttt{DPM.mu}, \texttt{DPM.zeta}, \texttt{DPM.tau}. \\[.12cm] \multicolumn{3}{l}{\textbf{Storage}}\\ & \texttt{path} & name of the directory where results are stored. Can leave unspecified.\\ \bottomrule \end{longtable} % \rule{0pt}{0.5ex} & &\\ \vspace{.5cm} \newpage \noindent{\large \textbf{Implementation}} \begin{verbatim} data(survData) survData$yL <- survData$yU <- survData[,1] survData$yU[which(survData[,2] == 0)] <- Inf survData$LT <- rep(0, dim(survData)[1]) form <- Formula(LT | yL + yU ~ cov1 + cov2) ## DPM.mu <- log(12) DPM.sigSq <- 100 DPM.ab <- c(2, 1) Tau.ab <- c(1.5, 0.0125) hyperParams <- list(DPM=list(DPM.mu=DPM.mu, DPM.sigSq=DPM.sigSq, DPM.ab=DPM.ab, Tau.ab=Tau.ab)) ## numReps <- 1000 thin <- 10 burninPerc <- 0.5 beta.prop.var <- 0.01 mu.prop.var <- 0.1 zeta.prop.var <- 0.1 mcmcParams <- list(run=list(numReps=numReps, thin=thin, burninPerc=burninPerc), tuning=list(beta.prop.var=beta.prop.var, mu.prop.var=mu.prop.var, zeta.prop.var=zeta.prop.var)) ## myModel <- "DPM" myPath <- "Output/02-Results-DPM/" startValues <- initiate.startValues_AFT(form, survData, model=myModel, nChain=2) ## fit_DPM <- BayesSurv_AFT(form, survData, model=myModel, hyperParams, startValues, mcmcParams, path=myPath) summary(fit_DPM) pred_DPM <- predict(fit_DPM, time = seq(0, 35, 1), tseq=seq(from=0, to=30, by=5)) plot(pred_DPM, plot.est="Haz") plot(pred_DPM, plot.est="Surv") \end{verbatim} \newpage %%%%%%%%%%%%%%%%%% %%%%%%% 5 %%%%%%%%%%%%%%%%%% \par\hbox{{\large 5. \texttt{BayesSurv\_HReg} }applied to cluster-correlated, univariate time-to-event data fit to a Cox PH model with Weibull baseline hazard}\hrule \vspace{.5cm} \noindent{\large \textbf{Definition of the survival model}}\\ \noindent Let $t_{ji}$ denote the time-to-event of interest for individuals $i=1,\dots,n_j$ in cluster $j=1,\dots J$, subject to right censoring at time $c_{ji}$. Let $(y_{ji},\delta_{ji},x_{ji})$ denote independent observations, where $y_{ji}=\min\left(t_{ji},c_{ji} \right)$, $\delta_{ji}=\mathbbm{1}(t_{ji}\le c_{ji})$, and $x_{ji}$ is a vector of covariates for individual $i$. The following Cox proportional hazards model is assumed \begin{equation*} h(t_{ji}|x_{ji}) = h_0(t_{ji})\exp\left(x_{ji}^\top \beta + V_j \right),~t_{ji}>0, \end{equation*} where the $V_j$'s are cluster-specific random effects and the baseline hazard $h_0$ is defined parametrically by a Weibull hazard, $h_0(t) = \alpha \kappa t^{\alpha - 1}$. In the Bayesian framework, priors must be specified for the regression parameter, $\beta$, the cluster-specific random effects, $V_j$, and the shape and scale parameters of baseline hazard function, $\alpha$ and $\kappa$, respectively. The prior distributions for $\beta$, $\alpha$ and $\kappa$ are given below. \begin{align*} \pi(\beta) & \propto 1,\\ \pi(\alpha) & \sim Gamma(a,b),\\ \pi(\kappa) & \sim Gamma(c,d). \end{align*} We provide two possible prior specifications for the cluster-specific random effects below. $$\begin{array}{rcl c rcl} %\multicolumn{3}{c}{\text{Normal prior}} &~~~~~~~~~~~~~~~~~~~~ & \multicolumn{3}{c}{\text{Dirichlet process mixture (DPM) prior}}\\ V_j&\sim&Normal(0,\sigma^2), &~~~~~~~~~~~~~~~~~~~~ & V_j|m_j&\sim & Normal(\mu_m,\sigma^2_m),\\ \zeta = \displaystyle\frac{1}{\sigma^2}&\sim&Gamma(a_N,b_N). & & (\mu_m,\sigma_m^2)&\sim & G_0,~~\text{for}~~m=1,\dots M,\\ & & & & m_j|p &\sim & Discrete(m_j|p_1,\dots,p_M), \\ & & & & p &\sim & Dirichlet({\tau}/{M},\dots, {\tau}/{M}), \\ & & & & \tau &\sim & Gamma(a_\tau,b_\tau). \end{array}$$ In the first column, the individual specific-random effects are assumed to be $\overset{iid}{\sim} N(0,\sigma^2)$. In the second column, the cluster-specific random effects are drawn from a mixture of $M$ normal distributions each with mean and variance $(\mu_m,\sigma_m^2)$ which are distributed as a multivariate Normal/Inverse-Gamma (NIG), denoted by $G_0$; we refer to this as the Dirichlet process mixture (DPM) prior. The probability density of $G_0$ is defined by the product $$f_{\text{NIG}}(\mu,\sigma^2|\mu_0,\zeta_0,a_0,b_0) = f_{\text{Normal}}(\mu|\mu_0,1/\zeta_0^2) \times f_{\text{Gamma}}(\zeta=1/\sigma^2|a_0,b_0).$$ We assume $\mu_0=0$ and $\zeta_0=1$. \\ \noindent{\large\textbf{Hyperparameters}}\\ \noindent\begin{tabular}{lcl} $a$, $b$ &: & shape and rate of Gamma prior for $\alpha$\\ $c$, $d$ &: & shape and rate of Gamma prior for $\kappa$\\ $a_N$, $b_N$ & : & mean and variance of normal prior for $V_{j}$\\ $a_0$, $b_0$ &: & shape and rate of Gamma component of the prior distribution, $G_0$, of $(\mu_m,\sigma_m^2)$ (DPM prior)\\ $a_\tau$, $b_\tau$ & : & shape and rate of Gamma hyperprior for $\tau$ (DPM prior)\\[0.5cm] \end{tabular} \noindent{\large \textbf{Arguments to specify}} \noindent\begin{longtable}{p{.15in} p{1.20in} p{5.85in}} \toprule \multicolumn{3}{l}{\textbf{Model-related}}\\ & \texttt{Formula} & a \texttt{Formula} object that corresponds to the hazard $h(t_i|x_i)$: $y+\delta \sim x$.\\ &\texttt{data} & an $(n\times q)$-dimensional data.frame; the $q$-columns correspond to $q$ covariate vectors named in the formula in \texttt{Formula}.\\ & \texttt{model}& a character vector that specifies the type of components in the model. Use \texttt{model <- c("Weibull", "Normal")} for Normal prior for $V_j$ and use \texttt{model <- c("Weibull", "DPM")} for DPM prior.\\ &\texttt{id}& an $n$-vector of cluster information where cluster membership corresponds to one of the positive integers $1,\dots, J$. \\[.12cm] \multicolumn{3}{l}{\textbf{Hyperparameters}}\\ &\texttt{WB.ab} & a 2-vector of positive hyperparameters $a$ and $b$ of the prior distribution for the shape parameter $\alpha$ of the Weibull baseline hazard. Example: \texttt{WB.ab <- c(0.5, 0.01)}. \\ &\texttt{WB.cd} & a 2-vector of positive hyperparameters $c$ and $d$ of the prior distribution for the scale parameter $\kappa$ of the Weibull baseline hazard. Example: \texttt{WB.cd <- c(0.5, 0.05)}. \\[.12cm] \multicolumn{2}{l}{\hspace{0.35cm}\textbf{Normal prior for $V_j$} }& \\ &\texttt{Normal.ab} & a 2-vector of positive hyperparameters $a_N$ and $b_N$ of the prior for $1/\sigma^2$, the precision of the normally distributed cluster-specific random effects. Example: \texttt{Normal.ab <- c(0.5, 0.01)}. \\ \multicolumn{2}{l}{\hspace{0.35cm}\textbf{DPM prior for $V_j$} }& \\ &\texttt{DPM.ab} & a 2-vector of positive hyperparameters $a_0$ and $b_0$ of the prior for $(\mu_m,\sigma_m^2)$, the parameters of the normally distributed cluster-specific random effects. Example: \texttt{DPM.ab <- c(0.5, 0.01)}. \\ &\texttt{aTau} & a positive-valued hyperparameter corresponding to the shape parameter, $a_\tau$, of the Gamma prior of $\tau$.\\ &\texttt{bTau} & a positive-valued hyperparameter corresponding to the rate parameter, $b_\tau$, of the Gamma prior of $\tau$.\\[.12cm] \multicolumn{3}{l}{\textbf{MCMC Settings}}\\ &\texttt{numReps} & total number of scans\\ &\texttt{thin} & extent of thinning, e.g. if \texttt{thin=10} retain every 10$^{th}$ sample. \\ &\texttt{burninPerc} & the proportion of burn-in (samples to be discarded before analyzing the data). \\ &\verb|mhProp_alpha_var| & the shape parameter $\alpha$ is updated using a Metropolis-Hastings random walk algorithm which generates proposals from a Gamma distribution with variance \verb|mhProp_alpha_var|. \\ &\verb|mhProp_V_var| & the cluster-specific random effects, $V_{ji}$, are updated using a Metropolis-Hastings random walk algorithm which generates proposals from a Normal distribution with variance \verb|mhProp_V_var| \\[.12cm] \multicolumn{3}{l}{\textbf{Starting Values}}\\* & \texttt{startValues}& use \texttt{initiate.startValues\_HReg(Formula, data, model, id, nChain, beta = NULL, WB.alpha = NULL, WB.kappa = NULL, V.j = NULL, Normal.zeta = NULL, DPM.class = NULL, DPM.tau = NULL)} which initiates starting values for $\beta$, $\alpha$, $\kappa$, $V_j$, $\zeta$ (in the DPM model for $V_j$) and $\tau$ in the Metropolis-Hastings-Green algorithm if left unspecified; \texttt{DPM.class} sets the starting value for class membership in the DPM model. Users may set non-null starting values for any of these parameters. \\[.12cm] \multicolumn{3}{l}{\textbf{Storage}}\\ & \texttt{path} & name of the directory where results are stored. Can leave unspecified.\\ & \texttt{storeV} & a \texttt{TRUE/FALSE} logical constant indicating storage of $V_{j}$ values. \\ \bottomrule \end{longtable} % \rule{0pt}{0.5ex} & &\\ \vspace{.5cm} \noindent{\large \textbf{Implementation}} \begin{verbatim} data(survData) id=survData$cluster form <- Formula(time + event ~ cov1 + cov2) ## WB.ab <- c(0.5, 0.01) # prior parameters for alpha WB.cd <- c(0.5, 0.05) # prior parameters for kappa Normal.ab <- c(0.5, 0.01) # for Normal random effects DPM.ab <- c(0.5, 0.01) # For DPM aTau <- 1.5 bTau <- 0.0125 hyperParams.WB.Normal <- list(WB=list(WB.ab=WB.ab, WB.cd=WB.cd), Normal=list(Normal.ab=Normal.ab)) hyperParams.WB.DPM <- list(WB=list(WB.ab=WB.ab, WB.cd=WB.cd), DPM=list(DPM.ab=DPM.ab, aTau=aTau, bTau=bTau)) ## numReps <- 2000 burninPerc <- 0.5 thin <- 10 mhProp_alpha_var <- 0.01 mhProp_V_var <- 0.05 storeV <- TRUE mcmc.WB <- list(run=list(numReps=numReps, thin=thin, burninPerc=burninPerc), storage=list(storeV=storeV), tuning=list(mhProp_alpha_var=mhProp_alpha_var, mhProp_V_var=mhProp_V_var)) ## myModel.WB.Normal <- c("Weibull","Normal") myPath.WB.Normal <- "Output/03-Results-WB_Normal/" startValues.WB.Normal <- initiate.startValues_HReg(form, survData, id, model=myModel.WB.Normal, nChain=2) ## fit_WB_N <- BayesSurv_HReg(form, survData, id, model=myModel.WB.Normal, hyperParams.WB.Normal, startValues.WB.Normal, mcmc.WB, path=myPath.WB.Normal) summary(fit_WB_N) pred_WB_N <- predict(fit_WB_N, tseq=seq(from=0, to=30, by=5)) plot(pred_WB_N, plot.est="Haz") plot(pred_WB_N, plot.est="Surv") ## myModel.WB.DPM <- c("Weibull","DPM") myPath.WB.DPM <- "Output/04-Results-WB_DPM/" startValues.WB.DPM <- initiate.startValues_HReg(form, survData, id, model=myModel.WB.DPM, nChain=2) ## fit_WB_DPM <- BayesSurv_HReg(form, survData, id, model=myModel.WB.DPM, hyperParams.WB.DPM, startValues.WB.DPM, mcmc.WB, path=myPath.WB.DPM) summary(fit_WB_DPM) pred_WB_DPM <- predict(fit_WB_DPM, tseq=seq(from=0, to=30, by=5)) plot(pred_WB_DPM, plot.est="Haz") plot(pred_WB_DPM, plot.est="Surv") \end{verbatim} \newpage %%%%%%%%%%%%%%%%%% %%%%%%% 6 %%%%%%%%%%%%%%%%%% \par\hbox{{\large 6. \texttt{BayesSurv} }applied to cluster-correlated, univariate time-to-event data fit to a Cox PH model with PEM baseline hazard}\hrule \vspace{.25cm} \noindent{\large \textbf{Definition of the survival model}}\\ \noindent Let $t_{ji}$ denote the time-to-event of interest for individuals $i=1,\dots,n_j$ in cluster $j=1,\dots J$, subject to right censoring at time $c_{ji}$. Let $(y_{ji},\delta_{ji},x_{ji})$ denote independent observations, where $y_{ji}=\min\left(t_{ji},c_{ji} \right)$, $\delta_{ji}=\mathbbm{1}(t_{ji}\le c_{ji})$, and $x_{ji}$ is a vector of covariates for individual $i$. The following Cox proportional hazards model is assumed \begin{equation*} h(t_{ji}|x_{ji}) = h_0(t_{ji})\exp\left(x_{ji}^\top \beta + V_j \right),~t_{ji}>0, \end{equation*} The baseline hazard $h_0$ is defined non-parametrically by a mixture of piecewise exponential functions as follows $$\lambda_0(t) = \log{h_0(t)} = \displaystyle\sum_{k=1}^{K+1} \lambda_k \mathbbm{1}\left\{t\in (s_{k-1},s_k]\right\},$$ where $\lambda_k$ is constant and the time interval between 0 and the largest observed failure time, denoted $s_k$, is partitioned into $K+1$ disjoint intervals: $00,\label{BayesID_HReg1}\\ h_2(t_{i2}|\gamma_{ji}, x_{i2}) & = \gamma_{ji} h_{02}(t_{i2})\exp\left(x_{i2}^\top\beta_2 \right),~t_{i2}>0,\label{BayesID_HReg2}\\ h_3(t_{i2}|t_{i1}, \gamma_{ji}, x_{i3}) & = \gamma_{ji} h_{03}(t_{i2})\exp\left(x_{i3}^\top\beta_3 \right),~t_{i2}>0,\label{BayesID_HRegMarkov} \end{eqnarray} where $\gamma_{ji}$ is a subject-specific frailty with vectors of covariates $x_{i1}$, $x_{i2}$ and $x_{i3}$ which are subsets of $x_i$. The baseline hazard functions are defined parametrically by Weibull hazards of the form $h_{0g}(t)=\alpha_g \kappa_g t^{\alpha_g-1}$, for $g\in\{1,2,3 \}$. The baseline hazard function $h_{03}$ is assumed to be Markov with respect to $t_{i1}$; we will refer to the set of conditional hazard functions in (\ref{BayesID_HReg1})-(\ref{BayesID_HRegMarkov}) as the Markov model. Alternatively, we consider modeling $h_3$ as follows: \begin{eqnarray} h_3(t_{i2}|t_{i1}, \gamma_{ji}, x_{i3}) & = \gamma_{ji} h_{03}(t_{i2}-t_{i1})\exp\left(x_{i3}^\top\beta_3 \right),~00,\label{BayesID_HRegP1}\\ h_2(t_{i2}|\gamma_{ji}, x_{i2}) & = \gamma_{ji} h_{02}(t_{i2})\exp\left(x_{i2}^\top\beta_2 \right),~t_{i2}>0,\label{BayesID_HRegP2}\\ h_3(t_{i2}|t_{i1}, \gamma_{ji}, x_{i3}) & = \gamma_{ji} h_{03}(t_{i2})\exp\left(x_{i3}^\top\beta_3 \right),~t_{i2}>0,\label{BayesID_HRegPMarkov} \end{eqnarray} where $\gamma_{ji}$ is a subject-specific frailty with vectors of covariates $x_{i1}$, $x_{i2}$ and $x_{i3}$ which are subsets of $x_i$. The baseline hazard $h_0$ is defined non-parametrically by a mixture of piecewise exponential functions as follows $$\lambda_0(t) = \log{h_0(t)} = \displaystyle\sum_{k=1}^{K+1} \lambda_k \mathbbm{1}\left\{t\in (s_{k-1},s_k]\right\},$$ where $\lambda_k$ is constant and the time interval between 0 and the largest observed failure time, denoted $s_k$, is partitioned into $K+1$ disjoint intervals: $0 0, \nonumber\\ \log(t_{i2}) &=& x_{i2}^\top\beta_2 + \gamma_i + \epsilon_{i2}, ~~ t_{i2} > 0, \nonumber\\ \log(t_{i2} - t_{i1}) &=& x_{i3}^\top\beta_3 + \gamma_i + \epsilon_{i3}, ~~ t_{i2} > t_{i1}, \nonumber \end{eqnarray} where $\gamma_i$ is a study participant-specific random effect, $x_{ig}$ is a vector of transition-specific covariates, $\beta_g$ is a corresponding vector of transition-specific regression parameters, and $\epsilon_{ig}$ is a transition-specific random variable whose distribution determines that of the corresponding transition time, $g \in \{1,2,3\}$. In the presence of interval censoring, the times-to-event for the $i^{\textrm{th}}$ subject satisfy $c_{ij}\leq t_{i1} t_2\right\}$ to be stored\\ & \texttt{path} & name of the directory where results are stored. Can leave unspecified.\\ \bottomrule \end{longtable} % \rule{0pt}{0.5ex} & &\\ \vspace{.5cm} \newpage \noindent{\large \textbf{Implementation}} \begin{verbatim} data(scrData) scrData$y1L <- scrData$y1U <- scrData[,1] scrData$y1U[which(scrData[,2] == 0)] <- Inf scrData$y2L <- scrData$y2U <- scrData[,3] scrData$y2U[which(scrData[,4] == 0)] <- Inf scrData$LT <- rep(0, dim(scrData)[1]) form <- Formula(LT | y1L + y1U | y2L + y2U ~ x1 + x2 + x3 | x1 + x2 | x1 + x2) ## theta.ab <- c(0.5, 0.05) LN.ab1 <- c(0.3, 0.3) LN.ab2 <- c(0.3, 0.3) LN.ab3 <- c(0.3, 0.3) hyperParams <- list(theta=theta.ab, LN=list(LN.ab1=LN.ab1, LN.ab2=LN.ab2, LN.ab3=LN.ab3)) ## numReps <- 300 thin <- 3 burninPerc <- 0.5 nGam_save <- 10 nY1_save <- 10 nY2_save <- 10 nY1.NA_save <- 10 betag.prop.var <- c(0.01,0.01,0.01) mug.prop.var <- c(0.1,0.1,0.1) zetag.prop.var <- c(0.1,0.1,0.1) gamma.prop.var <- 0.01 mcmcParams <- list(run=list(numReps=numReps, thin=thin, burninPerc=burninPerc), storage=list(nGam_save=nGam_save, nY1_save=nY1_save, nY2_save=nY2_save, nY1.NA_save=nY1.NA_save), tuning=list(betag.prop.var=betag.prop.var, mug.prop.var=mug.prop.var,zetag.prop.var=zetag.prop.var, gamma.prop.var=gamma.prop.var)) ## myModel <- "LN" myPath <- "Output/01-Results-LN/" startValues <- initiate.startValues_AFT(form, scrData, model=myModel, nChain=2) ## fit_LN <- BayesID_AFT(form, scrData, model=myModel, hyperParams, startValues, mcmcParams, path=myPath) summary(fit_LN) pred_LN <- predict(fit_LN, time = seq(0, 35, 1), tseq=seq(from=0, to=30, by=5)) plot(pred_LN, plot.est="Haz") plot(pred_LN, plot.est="Surv") \end{verbatim} \newpage %%%%%%%%%%%%%%%%%% %%%%%%% 10 %%%%%%%%%%%%%%%%%% \par\hbox{{\large 10. \texttt{BayesID\_AFT} }to analyze independent semi-competing risks data using an illness-death model with DPM baseline survival distribution} \hrule \vspace{.25cm} \noindent{\large \textbf{Definition of the survival model}}\\ \noindent Let $t_{i1}$, $t_{i2}$ denote time to non-terminal and terminal event from subject $i=1,...,n$. The independent semi-competing risks data are assumed to arise from an illness-death model system with transitions that are modeled through the following three hazard functions: \begin{eqnarray} \log(t_{i1}) &=& x_{i1}^\top\beta_1 + \gamma_i + \epsilon_{i1}, ~~ t_{i1} > 0, \nonumber\\ \log(t_{i2}) &=& x_{i2}^\top\beta_2 + \gamma_i + \epsilon_{i2}, ~~ t_{i2} > 0, \nonumber\\ \log(t_{i2} - t_{i1}) &=& x_{i3}^\top\beta_3 + \gamma_i + \epsilon_{i3}, ~~ t_{i2} > t_{i1}, \nonumber \end{eqnarray} where $\gamma_i$ is a study participant-specific random effect, $x_{ig}$ is a vector of transition-specific covariates, $\beta_g$ is a corresponding vector of transition-specific regression parameters, and $\epsilon_{ig}$ is a transition-specific random variable whose distribution determines that of the corresponding transition time, $g \in \{1,2,3\}$. In the presence of interval censoring, the times-to-event for the $i^{\textrm{th}}$ subject satisfy $c_{ij}\leq t_{i1} t_2\right\}$ to be stored\\ & \texttt{path} & name of the directory where results are stored. Can leave unspecified.\\ \bottomrule \end{longtable} % \rule{0pt}{0.5ex} & &\\ \vspace{.5cm} \noindent{\large \textbf{Implementation}} \begin{verbatim} data(scrData) scrData$y1L <- scrData$y1U <- scrData[,1] scrData$y1U[which(scrData[,2] == 0)] <- Inf scrData$y2L <- scrData$y2U <- scrData[,3] scrData$y2U[which(scrData[,4] == 0)] <- Inf scrData$LT <- rep(0, dim(scrData)[1]) form <- Formula(LT | y1L + y1U | y2L + y2U ~ x1 + x2 + x3 | x1 + x2 | x1 + x2) ## theta.ab <- c(0.5, 0.05) ## DPM.mu1 <- log(12) DPM.mu2 <- log(12) DPM.mu3 <- log(12) DPM.sigSq1 <- 100 DPM.sigSq2 <- 100 DPM.sigSq3 <- 100 DPM.ab1 <- c(2, 1) DPM.ab2 <- c(2, 1) DPM.ab3 <- c(2, 1) Tau.ab1 <- c(1.5, 0.0125) Tau.ab2 <- c(1.5, 0.0125) Tau.ab3 <- c(1.5, 0.0125) hyperParams <- list(theta=theta.ab, DPM=list(DPM.mu1=DPM.mu1, DPM.mu2=DPM.mu2, DPM.mu3=DPM.mu3, DPM.sigSq1=DPM.sigSq1, DPM.sigSq2=DPM.sigSq2, DPM.sigSq3=DPM.sigSq3, DPM.ab1=DPM.ab1, DPM.ab2=DPM.ab2, DPM.ab3=DPM.ab3, Tau.ab1=Tau.ab1, Tau.ab2=Tau.ab2, Tau.ab3=Tau.ab3)) ## numReps <- 300 thin <- 3 burninPerc <- 0.5 nGam_save <- 10 nY1_save <- 10 nY2_save <- 10 nY1.NA_save <- 10 betag.prop.var <- c(0.01,0.01,0.01) mug.prop.var <- c(0.1,0.1,0.1) zetag.prop.var <- c(0.1,0.1,0.1) gamma.prop.var <- 0.01 mcmcParams <- list(run=list(numReps=numReps, thin=thin, burninPerc=burninPerc), storage=list(nGam_save=nGam_save, nY1_save=nY1_save, nY2_save=nY2_save, nY1.NA_save=nY1.NA_save), tuning=list(betag.prop.var=betag.prop.var, mug.prop.var=mug.prop.var, zetag.prop.var=zetag.prop.var, gamma.prop.var=gamma.prop.var)) ## myModel <- "DPM" myPath <- "Output/02-Results-DPM/" startValues <- initiate.startValues_AFT(form, scrData, model=myModel, nChain=2) ## fit_DPM <- BayesID_AFT(form, scrData, model=myModel, hyperParams, startValues, mcmcParams, path=myPath) summary(fit_DPM); pred_DPM <- predict(fit_DPM, time = seq(0, 35, 1), tseq=seq(from=0, to=30, by=5)) plot(pred_DPM, plot.est="Haz") plot(pred_DPM, plot.est="Surv") \end{verbatim} \newpage %%%%%%%%%%%%%%%%%% %%%%%%% 11 %%%%%%%%%%%%%%%%%% \par\hbox{{\large 11. \texttt{BayesID\_HReg} }to analyze cluster-correlated semi-competing risks data using an illness-death model with Weibull baseline hazards}\hrule \vspace{.25cm} \noindent{\large \textbf{Definition of the survival model}}\\ \noindent Let $t_{ji1}$ and $t_{ji2}$ denote the time to nonterminal event and terminal event from subject $i=1,\dots,n_j$ in cluster $j=1,\dots, J$, subject to right censoring at time $c_{ji}$. Let $(y_{ji1},y_{ji2},\delta_{ji1},\delta_{ji2},x_{ji})$ denote independent observations, where $y_{ji1}=\min\left(t_{ji1},t_{ji2},c_{ji} \right)$, $\delta_{ji1}=\mathbbm{1}\left\{t_{ji1}\le \min\left(t_{ji2},c_{ji} \right) \right\}$, $y_{ji2}=\min\left(t_{ji2},c_{ji} \right)$, $\delta_{ji2}=\mathbbm{1}\left\{t_{ji2}\le c_{ji} \right\}$, and $x_{ji}$ is a vector of covariates for individual $i$. The independent semi-competing risks data are assumed to arise from an illness-death model system with transitions that are modeled through the following three hazard functions: \begin{eqnarray} h_1(t_{ji1}|\gamma_{ji}, x_{ji1}, V_{j1}) & = \gamma_{ji} h_{01}(t_{ji1})\exp\left(x_{ji1}^\top\beta_1 + V_{j1}\right),~t_{ji1}>0,\label{BayesID_HRegCW1}\\ h_2(t_{ji2}|\gamma_{ji}, x_{ji2}, V_{j2}) & = \gamma_{ji} h_{02}(t_{ji2})\exp\left(x_{ji2}^\top\beta_2 + V_{j2} \right),~t_{ji2}>0,\label{BayesID_HRegCW2}\\ h_3(t_{ji2}|t_{ji1}, \gamma_{ji}, x_{ji3}, V_{j3}) & = \gamma_{ji} h_{03}(t_{ji2})\exp\left(x_{ji3}^\top\beta_3 + V_{j3}\right),~t_{ji2}>0,\label{BayesID_HRegCWMarkov} \end{eqnarray} where $\gamma_{ji}$ is a subject-specific frailty, $V_j=(V_{j1}, V_{j2}, V_{j3})$ is a vector of cluster-specific random effects, and$x_{ji1}$, $x_{ji2}$ and $x_{ji3}$ which are subsets of $x_i$ are vectors of covariates. The baseline hazard functions are defined parametrically by Weibull hazards of the form $h_{0g}(t)=\alpha_g \kappa_g t^{\alpha_g-1}$, for $g\in\{1,2,3 \}$. The baseline hazard function $h_{03}$ is assumed to be Markov with respect to $t_{ji1}$; we will refer to the set of conditional hazard functions in (\ref{BayesID_HRegCW1})-(\ref{BayesID_HRegCWMarkov}) as the Markov model. Alternatively, we consider modeling $h_3$ as follows: \begin{eqnarray} h_3(t_{ji2}|t_{ji1}, \gamma_{ji}, x_{ji3}, V_{j3}) & = \gamma_{ji} h_{03}(t_{ji2}-t_{ji1})\exp\left(x_{ji3}^\top\beta_3 + V_{j3}\right),~00,\label{1}\\ h_2(t_{ji2}|\gamma_{ji}, x_{ji2}, V_{j2}) & = \gamma_{ji} h_{02}(t_{ji2})\exp\left(x_{ji2}^\top\beta_2 + V_{j2} \right),~t_{ji2}>0,\label{2}\\ h_3(t_{ji2}|t_{ji1}, \gamma_{ji}, x_{ji3}, V_{j3}) & = \gamma_{ji} h_{03}(t_{ji2})\exp\left(x_{ji3}^\top\beta_3 + V_{j3}\right),~t_{ji2}>0,\label{Markov} \end{eqnarray} where $\gamma_{ji}$ is a subject-specific frailty, $V_j=(V_{j1}, V_{j2}, V_{j3})$ is a vector of cluster-specific random effects, and$x_{ji1}$, $x_{ji2}$ and $x_{ji3}$ which are subsets of $x_i$ are vectors of covariates. The baseline hazard $h_0$ is defined non-parametrically by a mixture of piecewise exponential functions as follows $$\lambda_0(t) = \log{h_0(t)} = \displaystyle\sum_{k=1}^{K+1} \lambda_k \mathbbm{1}\left\{t\in (s_{k-1},s_k]\right\},$$ where $\lambda_k$ is constant and the time interval between 0 and the largest observed failure time, denoted $s_k$, is partitioned into $K+1$ disjoint intervals: $0