## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = (4/6)*7 ) ## ----setup-------------------------------------------------------------------- library(QAEnsemble) ## ----eval=TRUE---------------------------------------------------------------- library(svMisc) library(coda) ## ----eval=TRUE---------------------------------------------------------------- set.seed(10) ## ----eval=TRUE---------------------------------------------------------------- a_shape = 8 sigma_scale = 1100 time_use = 12*c(1:110) plot(time_use, exp(-1*((time_use/sigma_scale)^a_shape)), xlab="Months (t)", ylab="Survival probability (S)", type = "l", col = "red") ## ----eval=TRUE---------------------------------------------------------------- #Input for the 'create_life_table_info' function: #interval_time_input: the start and end times for each time interval (Note: it #is assumed in the 'create_life_table_info' function that the first start time #is at month 0) #num_ran_samples_input: the number of random samples from the Weibull #distribution #data_weibull_input: the Weibull distribution mortality time samples #Output for the 'create_life_table_info' is a list containing two data frames. #The first data frame contains the life table information: time intervals, the #number of people at risk in the interval, number of deaths that occur in the #interval, risk of death in the interval, probability of surviving in the #interval, and the survival probabilities. The second data frame contains the #interval-censored information in a format easy to use in the likelihood #function: interval-censored mortality time observation number, interval left #bound, and interval right bound. create_life_table_info <- function(interval_time_input, num_ran_samples_input, data_weibull_input) { num_at_risk_during_interval_output = matrix(NA, nrow = 1, ncol = length(interval_time_input)) num_deaths_during_interval_output = matrix(NA, nrow = 1, ncol = length(interval_time_input)) risk_death_during_interval_output = matrix(NA, nrow = 1, ncol = length(interval_time_input)) prob_surv_during_interval_output = matrix(NA, nrow = 1, ncol = length(interval_time_input)) s_values_output = matrix(NA, nrow = 1, ncol = length(interval_time_input)) obs_num_output = matrix(1:num_ran_samples_input, nrow = 1, ncol = num_ran_samples_input) left_endpoint_output = matrix(NA, nrow = 1, ncol = num_ran_samples_input) right_endpoint_output = matrix(NA, nrow = 1, ncol = num_ran_samples_input) for(t in 1:length(interval_time_input)) { if(t == 1) { num_at_risk_during_interval_output[t] = num_ran_samples_input num_deaths_during_interval_output[t] = length(data_weibull_input[0 <= data_weibull_input & data_weibull_input <= interval_time_input[1]]) risk_death_during_interval_output[t] = num_deaths_during_interval_output[t]/num_at_risk_during_interval_output[t] prob_surv_during_interval_output[t] = 1 - risk_death_during_interval_output[t] s_values_output[t] = 1*prob_surv_during_interval_output[t] } if(t != 1) { if(num_at_risk_during_interval_output[t-1] - num_deaths_during_interval_output[t-1] > 0) { num_at_risk_during_interval_output[t] = num_at_risk_during_interval_output[t-1] - num_deaths_during_interval_output[t-1] num_deaths_during_interval_output[t] = length(data_weibull_input[interval_time_input[t-1] < data_weibull_input & data_weibull_input <= interval_time_input[t]]) risk_death_during_interval_output[t] = num_deaths_during_interval_output[t]/num_at_risk_during_interval_output[t] prob_surv_during_interval_output[t] = 1 - risk_death_during_interval_output[t] s_values_output[t] = s_values_output[t-1]*prob_surv_during_interval_output[t] } if(num_at_risk_during_interval_output[t-1] - num_deaths_during_interval_output[t-1] <= 0) { num_at_risk_during_interval_output[t] = 0 num_deaths_during_interval_output[t] = 0 risk_death_during_interval_output[t] = 1 prob_surv_during_interval_output[t] = 1 - risk_death_during_interval_output[t] s_values_output[t] = s_values_output[t-1]*prob_surv_during_interval_output[t] } } } string_time_interval_names = matrix(NA, nrow = 1, ncol = length(interval_time_input)) i = 1 for(t in 1:length(interval_time_input)) { if(t == 1) { string_time_interval_names[1] = paste(0, "-", 12, " mo", " (", 0, "-", 1, " yr)", sep = "") if(num_deaths_during_interval_output[t] > 0) { for(j in 1:num_deaths_during_interval_output[t]) { left_endpoint_output[i] = 0 right_endpoint_output[i] = 12 i = i + 1 } } } if(t != 1) { string_time_interval_names[t] = paste(interval_time_input[t-1], "-", interval_time_input[t], " mo", " (", interval_time_input[t-1]/12, "-", interval_time_input[t]/12, " yr)", sep = "") if(num_deaths_during_interval_output[t] > 0) { for(j in 1:num_deaths_during_interval_output[t]) { left_endpoint_output[i] = interval_time_input[t-1] right_endpoint_output[i] = interval_time_input[t] i = i + 1 } } } } life_table_df = data.frame(t(string_time_interval_names), t(num_at_risk_during_interval_output), t(num_deaths_during_interval_output), t(risk_death_during_interval_output), t(prob_surv_during_interval_output), t(s_values_output)) names(life_table_df)[1] = "Time interval" names(life_table_df)[2] = "Number of people at risk in the interval" names(life_table_df)[3] = "Number of deaths that occur in the interval" names(life_table_df)[4] = "Risk of death in the interval" names(life_table_df)[5] = "Probability of surviving in the interval" names(life_table_df)[6] = "Survival probability" interval_censored_df = data.frame(t(obs_num_output), t(left_endpoint_output), t(right_endpoint_output)) names(interval_censored_df)[1] = "Interval-censored mortality time observation number" names(interval_censored_df)[2] = "Interval left bound" names(interval_censored_df)[3] = "Interval right bound" return(list("life_table_df" = life_table_df, "interval_censored_df" = interval_censored_df)) } ## ----eval=TRUE---------------------------------------------------------------- num_ran_samples = 50 data_weibull = rweibull(num_ran_samples, shape = a_shape, scale = sigma_scale) info_use = create_life_table_info(time_use, num_ran_samples, data_weibull) knitr::kable(info_use$life_table_df, caption = "Life table data", format = "pipe", padding = 0) ## ----eval=TRUE---------------------------------------------------------------- knitr::kable(info_use$interval_censored_df, caption = "Interval-censored data", format = "pipe", padding = 0) ## ----eval=TRUE---------------------------------------------------------------- #Plot the survival probability from the life table data plot(time_use, info_use$life_table_df[[6]], xlab="Months (t)", ylab="Survival probability (S)", type = "s") ## ----eval=TRUE---------------------------------------------------------------- logp <- function(param) { a_shape_use = param[1] sigma_scale_use = param[2] logp_val = dunif(a_shape_use, min = 1e-2, max = 1e2, log = TRUE) + dunif(sigma_scale_use, min = 1, max = 1e4, log = TRUE) return(logp_val) } logl <- function(param) { a_shape_use = param[1] sigma_scale_use = param[2] logl_val = 0 interval_l_bound = info_use$interval_censored_df[[2]] interval_r_bound = info_use$interval_censored_df[[3]] for(i in 1:num_ran_samples) { logl_val = logl_val + log(exp(-1*((interval_l_bound[i]/sigma_scale_use)^a_shape_use)) - exp(-1*((interval_r_bound[i]/sigma_scale_use)^a_shape_use))) } if(is.na(logl_val) || is.infinite(logl_val)) { logl_val = -Inf } return(logl_val) } logfuns = list(logp = logp, logl = logl) ## ----eval=TRUE---------------------------------------------------------------- num_par = 2 num_chains = 2*num_par theta0 = matrix(0, nrow = num_par, ncol = num_chains) temp_val = 0 j = 0 while(j < num_chains) { initial = c(runif(1, 1e-2, 1e2), runif(1, 1, 1e4)) temp_val = logl(initial) + logp(initial) while(is.na(temp_val) || is.infinite(temp_val)) { initial = c(runif(1, 1e-2, 1e2), runif(1, 1, 1e4)) temp_val = logl(initial) + logp(initial) } j = j + 1 message(paste('j:', j)) theta0[1,j] = initial[1] theta0[2,j] = initial[2] } ## ----eval=TRUE---------------------------------------------------------------- num_chain_iterations = 1e5 thin_val_par = 10 floor(num_chain_iterations/thin_val_par)*num_chains ## ----eval=TRUE---------------------------------------------------------------- Weibull_Surv_Quad_result = ensemblealg(theta0, logfuns, T_iter = num_chain_iterations, Thin_val = thin_val_par, ShowProgress = TRUE, ReturnCODA = TRUE) ## ----eval=TRUE---------------------------------------------------------------- my_samples = Weibull_Surv_Quad_result$theta_sample my_log_prior = Weibull_Surv_Quad_result$log_prior_sample my_log_like = Weibull_Surv_Quad_result$log_like_sample my_mcmc_list_coda = Weibull_Surv_Quad_result$mcmc_list_coda ## ----eval=TRUE---------------------------------------------------------------- varnames(my_mcmc_list_coda) = c("a_shape", "sigma_scale") plot(my_mcmc_list_coda) ## ----eval=TRUE---------------------------------------------------------------- my_samples_burn_in = my_samples[,,-c(1:floor((num_chain_iterations/thin_val_par)*0.25))] my_log_prior_burn_in = my_log_prior[,-c(1:floor((num_chain_iterations/thin_val_par)*0.25))] my_log_like_burn_in = my_log_like[,-c(1:floor((num_chain_iterations/thin_val_par)*0.25))] ## ----eval=TRUE---------------------------------------------------------------- diagnostic_result = psrfdiagnostic(my_samples_burn_in, 0.05) diagnostic_result$p_s_r_f_vec ## ----eval=TRUE---------------------------------------------------------------- log_un_post_vec = as.vector(my_log_prior_burn_in + my_log_like_burn_in) k1 = as.vector(my_samples_burn_in[1,,]) k2 = as.vector(my_samples_burn_in[2,,]) ## ----eval=TRUE---------------------------------------------------------------- #a_shape posterior median and 95% credible interval median(k1) hpdparameter(k1, 0.05) #(The 95% CI is narrower than the prior distribution and it contains the true #value of a_shape, 8) #sigma_scale posterior median and 95% credible interval median(k2) hpdparameter(k2, 0.05) #(The 95% CI is narrower than the prior distribution and it contains the true #value of sigma_scale, 1100) #a_shape, sigma_scale, and stand_dev maximum posterior k1[which.max(log_un_post_vec)] k2[which.max(log_un_post_vec)] ## ----eval=TRUE---------------------------------------------------------------- plot(k1, exp(log_un_post_vec), xlab="a_shape", ylab="unnormalized posterior density") ## ----eval=TRUE---------------------------------------------------------------- plot(k2, exp(log_un_post_vec), xlab="sigma_scale", ylab="unnormalized posterior density") ## ----eval=TRUE---------------------------------------------------------------- num_samples = 10000 w_predict = matrix(NA, nrow = num_samples, ncol = num_ran_samples) s_predict = matrix(NA, nrow = num_samples, ncol = length(time_use)) discrepancy_s = matrix(NA, nrow = num_samples, ncol = 1) discrepancy_s_pred = matrix(NA, nrow = num_samples, ncol = 1) ind_pred_exceeds_s = matrix(NA, nrow = num_samples, ncol = 1) for (i in (length(log_un_post_vec) - num_samples + 1):length(log_un_post_vec)) { l = i - (length(log_un_post_vec) - num_samples) w_predict[l,] = rweibull(num_ran_samples, shape = k1[i], scale = k2[i]) data_weibull_temp = w_predict[l,] info_use_predict = create_life_table_info(time_use, num_ran_samples, data_weibull_temp) s_predict[l,] = t(info_use_predict$life_table_df[[6]]) s_data = t(info_use$life_table_df[[6]]) my_s_mean = exp(-1*((time_use/k2[i])^k1[i])) discrepancy_s[l,1] = sum(((s_data - my_s_mean)^2)) discrepancy_s_pred[l,1] = sum(((s_predict[l,] - my_s_mean)^2)) if(discrepancy_s_pred[l,1] > discrepancy_s[l,1]) { ind_pred_exceeds_s[l,1] = 1 } else { ind_pred_exceeds_s[l,1] = 0 } svMisc::progress(l,num_samples,progress.bar = TRUE,init = (l == 1)) } ## ----eval=TRUE---------------------------------------------------------------- Bayes_p_value = sum(ind_pred_exceeds_s)/length(ind_pred_exceeds_s) Bayes_p_value ## ----eval=TRUE---------------------------------------------------------------- s_predict_lower = matrix(NA, nrow = 1, ncol = length(time_use)) s_predict_upper = matrix(NA, nrow = 1, ncol = length(time_use)) for(j in 1:length(time_use)) { s_predict_lower[j] = quantile(s_predict[,j], probs = 0.025) s_predict_upper[j] = quantile(s_predict[,j], probs = 0.975) } ## ----eval=TRUE---------------------------------------------------------------- plot(time_use, info_use$life_table_df[[6]], main="Weibull survival curve (a = 8, sigma = 1100) Example", cex.main = 1, xlab="Months (t)", ylab="Survival probability (S)", type = "s", ylim = c(0, 1)) lines(time_use, exp(-1*((time_use/sigma_scale)^a_shape)), type = "l", lty = 1, col = "red") lines(time_use, colMeans(s_predict), type = "l", lty = 1, col = "blue") lines(time_use, s_predict_lower, type = "l", lty = 2, col = "blue") lines(time_use, s_predict_upper, type = "l", lty = 2, col = "blue") legend("bottomleft", legend = c("Data", "True model", "Posterior predictive mean", "95% prediction interval"), lty=c(1,1,1,2), col = c("black", "red", "blue", "blue"), pch=c(NA,NA,NA,NA))