Ensemble Quadratic MCMC algorithm detailed example for fitting a Weibull survival function

Load the ‘svMisc’ package to use the progress bar in the Ensemble MCMC algorithm and load the ‘coda’ package to return a ‘mcmc.list’ object from the Ensemble MCMC algorithm that can be used with various MCMC diagnostic functions in the ‘coda’ package.

library(svMisc)
library(coda)

Assume the true parameters in the Weibull survival function \(S(t) = e^{- \big( \frac{t}{\sigma} \big)^a}\) are shape \(a = 8\) and scale \(\sigma = 1100\).This Weibull lifetime survival function is in terms of months and this Weibull lifetime survival function is extended for 1320 months (110 years).

a_shape = 8
sigma_scale = 1100

time_use = 12*c(1:110)

plot(time_use, exp(-1*((time_use/sigma_scale)^a_shape)), xlab="Months (t)", ylab="Survival probability (S)", type = "l", col = "red")

Then we take 50 random samples from this Weibull distribution, \(T \sim \textrm{WEI}(a = 8, \sigma = 1100)\), and these random samples provide mortality times. From these mortality time random samples, \(t_i\) for \(i = 1, ..., 50\), a life table is created for every 12 month interval (every year interval) up to 1320 months (110 years) and the estimated survival probability is calculated for every 12 month interval. It is assumed that only the life table information is known for every 12 month interval, and the actual mortality times from the Weibull distribution random samples, \(t_i\), are unknown. So, the life table information provides interval-censored mortality data.


#Input for the 'create_life_table_info' function:

#interval_time_input: the start and end times for each time interval (Note: it 
#is assumed in the 'create_life_table_info' function that the first start time 
#is at month 0)

#num_ran_samples_input: the number of random samples from the Weibull 
#distribution

#data_weibull_input: the Weibull distribution mortality time samples

#Output for the 'create_life_table_info' is a list containing two data frames. 
#The first data frame contains the life table information: time intervals, the 
#number of people at risk in the interval, number of deaths that occur in the 
#interval, risk of death in the interval, probability of surviving in the 
#interval, and the survival probabilities. The second data frame contains the 
#interval-censored information in a format easy to use in the likelihood 
#function: interval-censored mortality time observation number, interval left 
#bound, and interval right bound.

create_life_table_info <- function(interval_time_input, num_ran_samples_input, data_weibull_input)
{
  num_at_risk_during_interval_output = matrix(NA, nrow = 1, ncol = length(interval_time_input))
  num_deaths_during_interval_output = matrix(NA, nrow = 1, ncol = length(interval_time_input))
  risk_death_during_interval_output = matrix(NA, nrow = 1, ncol = length(interval_time_input))
  prob_surv_during_interval_output = matrix(NA, nrow = 1, ncol = length(interval_time_input))
  s_values_output = matrix(NA, nrow = 1, ncol = length(interval_time_input))
  
  obs_num_output = matrix(1:num_ran_samples_input, nrow = 1, ncol = num_ran_samples_input)
  left_endpoint_output = matrix(NA, nrow = 1, ncol = num_ran_samples_input)
  right_endpoint_output = matrix(NA, nrow = 1, ncol = num_ran_samples_input)
  
  for(t in 1:length(interval_time_input))
  {
  if(t == 1)
  {
    num_at_risk_during_interval_output[t] = num_ran_samples_input
    
    num_deaths_during_interval_output[t] = length(data_weibull_input[0 <= data_weibull_input & data_weibull_input <= interval_time_input[1]])
    
    risk_death_during_interval_output[t] = num_deaths_during_interval_output[t]/num_at_risk_during_interval_output[t]
    
    prob_surv_during_interval_output[t] = 1 - risk_death_during_interval_output[t]
    
    s_values_output[t] = 1*prob_surv_during_interval_output[t]
  }
  
  if(t != 1)
  {
    if(num_at_risk_during_interval_output[t-1] - num_deaths_during_interval_output[t-1] > 0)
    {
    num_at_risk_during_interval_output[t] = num_at_risk_during_interval_output[t-1] - num_deaths_during_interval_output[t-1]
    
    num_deaths_during_interval_output[t] = length(data_weibull_input[interval_time_input[t-1] < data_weibull_input & data_weibull_input <= interval_time_input[t]])
    
    risk_death_during_interval_output[t] = num_deaths_during_interval_output[t]/num_at_risk_during_interval_output[t]
    
    prob_surv_during_interval_output[t] = 1 - risk_death_during_interval_output[t]
    
    s_values_output[t] = s_values_output[t-1]*prob_surv_during_interval_output[t]
    }

    if(num_at_risk_during_interval_output[t-1] - num_deaths_during_interval_output[t-1] <= 0)
    {
    num_at_risk_during_interval_output[t] = 0
    
    num_deaths_during_interval_output[t] = 0
    
    risk_death_during_interval_output[t] = 1
    
    prob_surv_during_interval_output[t] = 1 - risk_death_during_interval_output[t]
    
    s_values_output[t] = s_values_output[t-1]*prob_surv_during_interval_output[t]
    }
  }
  
  }
  
  string_time_interval_names = matrix(NA, nrow = 1, ncol = length(interval_time_input))
  
  i = 1
  
  for(t in 1:length(interval_time_input))
  {
    if(t == 1)
    {
      string_time_interval_names[1] = paste(0, "-", 12, " mo", " (", 0, "-", 1, " yr)", sep = "")
      
      if(num_deaths_during_interval_output[t] > 0)
      {
        for(j in 1:num_deaths_during_interval_output[t])
        {
          left_endpoint_output[i] = 0
          right_endpoint_output[i] = 12
        
          i = i + 1
        }
      }
    }
    
    if(t != 1)
    {
      string_time_interval_names[t] = paste(interval_time_input[t-1], "-", interval_time_input[t], " mo", " (", interval_time_input[t-1]/12, "-", interval_time_input[t]/12, " yr)", sep = "")
      
      if(num_deaths_during_interval_output[t] > 0)
      {
        for(j in 1:num_deaths_during_interval_output[t])
        {
          left_endpoint_output[i] = interval_time_input[t-1]
          right_endpoint_output[i] = interval_time_input[t]
        
          i = i + 1
        }
      }
    }
  }
  
  life_table_df = data.frame(t(string_time_interval_names), t(num_at_risk_during_interval_output), t(num_deaths_during_interval_output), t(risk_death_during_interval_output), t(prob_surv_during_interval_output), t(s_values_output))
  
  names(life_table_df)[1] = "Time interval"
  names(life_table_df)[2] = "Number of people at risk in the interval"
  names(life_table_df)[3] = "Number of deaths that occur in the interval"
  names(life_table_df)[4] = "Risk of death in the interval"
  names(life_table_df)[5] = "Probability of surviving in the interval"
  names(life_table_df)[6] = "Survival probability"
  
  interval_censored_df = data.frame(t(obs_num_output), t(left_endpoint_output), t(right_endpoint_output))
  
  names(interval_censored_df)[1] = "Interval-censored mortality time observation number"
  names(interval_censored_df)[2] = "Interval left bound"
  names(interval_censored_df)[3] = "Interval right bound"

  return(list("life_table_df" = life_table_df, "interval_censored_df" = interval_censored_df))
}


num_ran_samples = 50

data_weibull = rweibull(num_ran_samples, shape = a_shape, scale = sigma_scale)

info_use = create_life_table_info(time_use, num_ran_samples, data_weibull)

knitr::kable(info_use$life_table_df, caption = "Life table data", format = "pipe", padding = 0)

Life table data
Time interval	Number of people at risk in the interval	Number of deaths that occur in the interval	Risk of death in the interval	Probability of surviving in the interval	Survival probability
0-12 mo (0-1 yr)	50	0	0.0000000	1.0000000	1.00
12-24 mo (1-2 yr)	50	0	0.0000000	1.0000000	1.00
24-36 mo (2-3 yr)	50	0	0.0000000	1.0000000	1.00
36-48 mo (3-4 yr)	50	0	0.0000000	1.0000000	1.00
48-60 mo (4-5 yr)	50	0	0.0000000	1.0000000	1.00
60-72 mo (5-6 yr)	50	0	0.0000000	1.0000000	1.00
72-84 mo (6-7 yr)	50	0	0.0000000	1.0000000	1.00
84-96 mo (7-8 yr)	50	0	0.0000000	1.0000000	1.00
96-108 mo (8-9 yr)	50	0	0.0000000	1.0000000	1.00
108-120 mo (9-10 yr)	50	0	0.0000000	1.0000000	1.00
120-132 mo (10-11 yr)	50	0	0.0000000	1.0000000	1.00
132-144 mo (11-12 yr)	50	0	0.0000000	1.0000000	1.00
144-156 mo (12-13 yr)	50	0	0.0000000	1.0000000	1.00
156-168 mo (13-14 yr)	50	0	0.0000000	1.0000000	1.00
168-180 mo (14-15 yr)	50	0	0.0000000	1.0000000	1.00
180-192 mo (15-16 yr)	50	0	0.0000000	1.0000000	1.00
192-204 mo (16-17 yr)	50	0	0.0000000	1.0000000	1.00
204-216 mo (17-18 yr)	50	0	0.0000000	1.0000000	1.00
216-228 mo (18-19 yr)	50	0	0.0000000	1.0000000	1.00
228-240 mo (19-20 yr)	50	0	0.0000000	1.0000000	1.00
240-252 mo (20-21 yr)	50	0	0.0000000	1.0000000	1.00
252-264 mo (21-22 yr)	50	0	0.0000000	1.0000000	1.00
264-276 mo (22-23 yr)	50	0	0.0000000	1.0000000	1.00
276-288 mo (23-24 yr)	50	0	0.0000000	1.0000000	1.00
288-300 mo (24-25 yr)	50	0	0.0000000	1.0000000	1.00
300-312 mo (25-26 yr)	50	0	0.0000000	1.0000000	1.00
312-324 mo (26-27 yr)	50	0	0.0000000	1.0000000	1.00
324-336 mo (27-28 yr)	50	0	0.0000000	1.0000000	1.00
336-348 mo (28-29 yr)	50	0	0.0000000	1.0000000	1.00
348-360 mo (29-30 yr)	50	0	0.0000000	1.0000000	1.00
360-372 mo (30-31 yr)	50	0	0.0000000	1.0000000	1.00
372-384 mo (31-32 yr)	50	0	0.0000000	1.0000000	1.00
384-396 mo (32-33 yr)	50	0	0.0000000	1.0000000	1.00
396-408 mo (33-34 yr)	50	0	0.0000000	1.0000000	1.00
408-420 mo (34-35 yr)	50	0	0.0000000	1.0000000	1.00
420-432 mo (35-36 yr)	50	0	0.0000000	1.0000000	1.00
432-444 mo (36-37 yr)	50	0	0.0000000	1.0000000	1.00
444-456 mo (37-38 yr)	50	0	0.0000000	1.0000000	1.00
456-468 mo (38-39 yr)	50	0	0.0000000	1.0000000	1.00
468-480 mo (39-40 yr)	50	0	0.0000000	1.0000000	1.00
480-492 mo (40-41 yr)	50	0	0.0000000	1.0000000	1.00
492-504 mo (41-42 yr)	50	0	0.0000000	1.0000000	1.00
504-516 mo (42-43 yr)	50	0	0.0000000	1.0000000	1.00
516-528 mo (43-44 yr)	50	0	0.0000000	1.0000000	1.00
528-540 mo (44-45 yr)	50	0	0.0000000	1.0000000	1.00
540-552 mo (45-46 yr)	50	0	0.0000000	1.0000000	1.00
552-564 mo (46-47 yr)	50	0	0.0000000	1.0000000	1.00
564-576 mo (47-48 yr)	50	0	0.0000000	1.0000000	1.00
576-588 mo (48-49 yr)	50	0	0.0000000	1.0000000	1.00
588-600 mo (49-50 yr)	50	0	0.0000000	1.0000000	1.00
600-612 mo (50-51 yr)	50	0	0.0000000	1.0000000	1.00
612-624 mo (51-52 yr)	50	0	0.0000000	1.0000000	1.00
624-636 mo (52-53 yr)	50	0	0.0000000	1.0000000	1.00
636-648 mo (53-54 yr)	50	0	0.0000000	1.0000000	1.00
648-660 mo (54-55 yr)	50	0	0.0000000	1.0000000	1.00
660-672 mo (55-56 yr)	50	0	0.0000000	1.0000000	1.00
672-684 mo (56-57 yr)	50	0	0.0000000	1.0000000	1.00
684-696 mo (57-58 yr)	50	0	0.0000000	1.0000000	1.00
696-708 mo (58-59 yr)	50	0	0.0000000	1.0000000	1.00
708-720 mo (59-60 yr)	50	0	0.0000000	1.0000000	1.00
720-732 mo (60-61 yr)	50	0	0.0000000	1.0000000	1.00
732-744 mo (61-62 yr)	50	0	0.0000000	1.0000000	1.00
744-756 mo (62-63 yr)	50	1	0.0200000	0.9800000	0.98
756-768 mo (63-64 yr)	49	0	0.0000000	1.0000000	0.98
768-780 mo (64-65 yr)	49	0	0.0000000	1.0000000	0.98
780-792 mo (65-66 yr)	49	0	0.0000000	1.0000000	0.98
792-804 mo (66-67 yr)	49	0	0.0000000	1.0000000	0.98
804-816 mo (67-68 yr)	49	0	0.0000000	1.0000000	0.98
816-828 mo (68-69 yr)	49	0	0.0000000	1.0000000	0.98
828-840 mo (69-70 yr)	49	1	0.0204082	0.9795918	0.96
840-852 mo (70-71 yr)	48	0	0.0000000	1.0000000	0.96
852-864 mo (71-72 yr)	48	0	0.0000000	1.0000000	0.96
864-876 mo (72-73 yr)	48	1	0.0208333	0.9791667	0.94
876-888 mo (73-74 yr)	47	2	0.0425532	0.9574468	0.90
888-900 mo (74-75 yr)	45	1	0.0222222	0.9777778	0.88
900-912 mo (75-76 yr)	44	1	0.0227273	0.9772727	0.86
912-924 mo (76-77 yr)	43	0	0.0000000	1.0000000	0.86
924-936 mo (77-78 yr)	43	2	0.0465116	0.9534884	0.82
936-948 mo (78-79 yr)	41	1	0.0243902	0.9756098	0.80
948-960 mo (79-80 yr)	40	1	0.0250000	0.9750000	0.78
960-972 mo (80-81 yr)	39	2	0.0512821	0.9487179	0.74
972-984 mo (81-82 yr)	37	1	0.0270270	0.9729730	0.72
984-996 mo (82-83 yr)	36	1	0.0277778	0.9722222	0.70
996-1008 mo (83-84 yr)	35	2	0.0571429	0.9428571	0.66
1008-1020 mo (84-85 yr)	33	1	0.0303030	0.9696970	0.64
1020-1032 mo (85-86 yr)	32	1	0.0312500	0.9687500	0.62
1032-1044 mo (86-87 yr)	31	1	0.0322581	0.9677419	0.60
1044-1056 mo (87-88 yr)	30	3	0.1000000	0.9000000	0.54
1056-1068 mo (88-89 yr)	27	0	0.0000000	1.0000000	0.54
1068-1080 mo (89-90 yr)	27	4	0.1481481	0.8518519	0.46
1080-1092 mo (90-91 yr)	23	2	0.0869565	0.9130435	0.42
1092-1104 mo (91-92 yr)	21	1	0.0476190	0.9523810	0.40
1104-1116 mo (92-93 yr)	20	2	0.1000000	0.9000000	0.36
1116-1128 mo (93-94 yr)	18	1	0.0555556	0.9444444	0.34
1128-1140 mo (94-95 yr)	17	3	0.1764706	0.8235294	0.28
1140-1152 mo (95-96 yr)	14	3	0.2142857	0.7857143	0.22
1152-1164 mo (96-97 yr)	11	2	0.1818182	0.8181818	0.18
1164-1176 mo (97-98 yr)	9	0	0.0000000	1.0000000	0.18
1176-1188 mo (98-99 yr)	9	2	0.2222222	0.7777778	0.14
1188-1200 mo (99-100 yr)	7	0	0.0000000	1.0000000	0.14
1200-1212 mo (100-101 yr)	7	0	0.0000000	1.0000000	0.14
1212-1224 mo (101-102 yr)	7	2	0.2857143	0.7142857	0.10
1224-1236 mo (102-103 yr)	5	2	0.4000000	0.6000000	0.06
1236-1248 mo (103-104 yr)	3	0	0.0000000	1.0000000	0.06
1248-1260 mo (104-105 yr)	3	1	0.3333333	0.6666667	0.04
1260-1272 mo (105-106 yr)	2	0	0.0000000	1.0000000	0.04
1272-1284 mo (106-107 yr)	2	0	0.0000000	1.0000000	0.04
1284-1296 mo (107-108 yr)	2	0	0.0000000	1.0000000	0.04
1296-1308 mo (108-109 yr)	2	0	0.0000000	1.0000000	0.04
1308-1320 mo (109-110 yr)	2	2	1.0000000	0.0000000	0.00

knitr::kable(info_use$interval_censored_df, caption = "Interval-censored data", format = "pipe", padding = 0)

Interval-censored data
Interval-censored mortality time observation number	Interval left bound	Interval right bound
1	744	756
2	828	840
3	864	876
4	876	888
5	876	888
6	888	900
7	900	912
8	924	936
9	924	936
10	936	948
11	948	960
12	960	972
13	960	972
14	972	984
15	984	996
16	996	1008
17	996	1008
18	1008	1020
19	1020	1032
20	1032	1044
21	1044	1056
22	1044	1056
23	1044	1056
24	1068	1080
25	1068	1080
26	1068	1080
27	1068	1080
28	1080	1092
29	1080	1092
30	1092	1104
31	1104	1116
32	1104	1116
33	1116	1128
34	1128	1140
35	1128	1140
36	1128	1140
37	1140	1152
38	1140	1152
39	1140	1152
40	1152	1164
41	1152	1164
42	1176	1188
43	1176	1188
44	1212	1224
45	1212	1224
46	1224	1236
47	1224	1236
48	1248	1260
49	1308	1320
50	1308	1320

#Plot the survival probability from the life table data
plot(time_use, info_use$life_table_df[[6]], xlab="Months (t)", ylab="Survival probability (S)", type = "s")

Now, we want to fit a Weibull distribution with shape parameter \(a\) and scale parameter \(\sigma\) to this interval-censored data using the Ensemble Quadratic MCMC algorithm and the prior knowledge that the shape parameter \(a \sim \textrm{U}(0.01, 100)\) and scale parameter \(\sigma \sim \textrm{U}(1, 10000)\).

\(L(\theta) = \prod_{i = 1}^{50} P(L_i \leq T_i \leq R_i) = \prod_{i = 1}^{50} S(L_i) - S(R_i) = \prod_{i = 1}^{50} e^{- \big( \frac{L_i}{\sigma} \big)^a} - e^{- \big( \frac{R_i}{\sigma} \big)^a}\),

where \(\theta = \begin{bmatrix} a \\ \sigma \end{bmatrix}\) and \([L_i, R_i]\) is the time interval in the life table where the \(T_i\) death occurred.

For more information about formulating the likelihood for interval censored data, please see the following journal articles:

Guure CB, Ibrahim NA, Adam MB (2013) Bayesian inference of the Weibull model based on interval-censored survival data. Comput Math Methods Med. 2013:849520. https://doi.org/10.1155/2013/849520

Pan C, Cai B, Wang L (2020) A Bayesian approach for analyzing partly interval-censored data under the proportional hazards model. Stat Methods Med Res. 29(11):3192-3204. https://doi.org/10.1177/0962280220921552

The log prior function and the log likelihood function for \(a\) and \(\sigma\) are coded below.


logp <- function(param)
{
  a_shape_use = param[1]
  sigma_scale_use = param[2]

  logp_val = dunif(a_shape_use, min = 1e-2, max = 1e2, log = TRUE) + dunif(sigma_scale_use, min = 1, max = 1e4, log = TRUE)

  return(logp_val)
}

logl <- function(param)
{
  a_shape_use = param[1]
  sigma_scale_use = param[2]
  
  logl_val = 0
  
  interval_l_bound = info_use$interval_censored_df[[2]]
  interval_r_bound = info_use$interval_censored_df[[3]]
  
  for(i in 1:num_ran_samples)
  {

  logl_val = logl_val + log(exp(-1*((interval_l_bound[i]/sigma_scale_use)^a_shape_use)) - exp(-1*((interval_r_bound[i]/sigma_scale_use)^a_shape_use)))
    
  }
  
  if(is.na(logl_val) || is.infinite(logl_val))
  {
    logl_val = -Inf
  }

  return(logl_val)
}

logfuns = list(logp = logp, logl = logl)

It is recommended to use at least twice as many chains as the number of parameters to be estimated. Since there are two parameters to be estimated in this example, four MCMC chains will be used in the Ensemble Quadratic MCMC algorithm.

Next, initial guesses are proposed for each of the four MCMC chains and the following while loops ensure that the initial guesses for each of the four MCMC chains produces a log unnormalized posterior density value that is not NA or infinite.

num_par = 2
num_chains = 2*num_par

theta0 = matrix(0, nrow = num_par, ncol = num_chains)

temp_val = 0
j = 0

while(j < num_chains)
{
  initial = c(runif(1, 1e-2, 1e2), runif(1, 1, 1e4))
  temp_val = logl(initial) + logp(initial)

  while(is.na(temp_val) || is.infinite(temp_val))
  {
    initial = c(runif(1, 1e-2, 1e2), runif(1, 1, 1e4))
    temp_val = logl(initial) + logp(initial)
  }

  j = j + 1

  message(paste('j:', j))
  
  theta0[1,j] = initial[1]
  theta0[2,j] = initial[2]

}
#> j: 1
#> j: 2
#> j: 3
#> j: 4

The Ensemble Quadratic MCMC algorithm will be used with 1e5 iterations for each chain in the Ensemble MCMC algorithm and each 10th iteration is returned.

The total number of samples returned by the algorithm is given by floor(num_chain_iterations/thin_val_par)*num_chains

num_chain_iterations = 1e5
thin_val_par = 10

floor(num_chain_iterations/thin_val_par)*num_chains
#> [1] 40000


Weibull_Surv_Quad_result = ensemblealg(theta0, logfuns, T_iter = num_chain_iterations, Thin_val = thin_val_par, ShowProgress = TRUE, ReturnCODA = TRUE)
#>           0-----------------------------------------------------------------1e+05
#> Progress: |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||

After the Ensemble Quadratic MCMC algorithm is run, the matrix of parameter samples, matrix of log likelihood samples, matrix of log prior samples, and ‘coda’ package ‘mcmc.list’ object are returned from the algorithm.

my_samples = Weibull_Surv_Quad_result$theta_sample
my_log_prior = Weibull_Surv_Quad_result$log_prior_sample
my_log_like = Weibull_Surv_Quad_result$log_like_sample
my_mcmc_list_coda = Weibull_Surv_Quad_result$mcmc_list_coda

Use the ‘plot’ function on the ‘mcmc.list’ object to view the convergence of the chains over the iterations.

varnames(my_mcmc_list_coda) = c("a_shape", "sigma_scale")

plot(my_mcmc_list_coda)

Now, we remove (also called burn-in) 25% of each MCMC chain to ensure that we use the samples that converged to the estimated unnormalized posterior distribution.

my_samples_burn_in = my_samples[,,-c(1:floor((num_chain_iterations/thin_val_par)*0.25))]

my_log_prior_burn_in = my_log_prior[,-c(1:floor((num_chain_iterations/thin_val_par)*0.25))]

my_log_like_burn_in = my_log_like[,-c(1:floor((num_chain_iterations/thin_val_par)*0.25))]

Then we calculate the potential scale reduction factors, which provide a formal test of the convergence of the MCMC sampling to the estimated posterior distribution. The potential scale reduction factor are close to 1 for all the estimated parameters, this indicates that the MCMC sampling converged to the estimated posterior distribution for each parameter.

diagnostic_result = psrfdiagnostic(my_samples_burn_in, 0.05)

diagnostic_result$p_s_r_f_vec
#>           [,1]     [,2]
#> [1,] 0.9989446 1.004329

We collapse the sample matrices into sample vectors to pool all of the MCMC chain samples together.

log_un_post_vec = as.vector(my_log_prior_burn_in + my_log_like_burn_in)
k1 = as.vector(my_samples_burn_in[1,,])
k2 = as.vector(my_samples_burn_in[2,,])

Next, we calculate the posterior median, 95% credible intervals, and maximum posterior for the parameters.

#a_shape posterior median and 95% credible interval
median(k1)
#> [1] 9.546631
hpdparameter(k1, 0.05)
#> [1]  7.559644 11.614131
#(The 95% CI is narrower than the prior distribution and it contains the true 
#value of a_shape, 8)

#sigma_scale posterior median and 95% credible interval
median(k2)
#> [1] 1118.288
hpdparameter(k2, 0.05)
#> [1] 1083.961 1153.739
#(The 95% CI is narrower than the prior distribution and it contains the true 
#value of sigma_scale, 1100)

#a_shape, sigma_scale, and stand_dev maximum posterior
k1[which.max(log_un_post_vec)]
#> [1] 9.618979
k2[which.max(log_un_post_vec)]
#> [1] 1117.134

The following plots display the silhouette of the unnormalized posterior surface from the chosen parameter’s perspective.

plot(k1, exp(log_un_post_vec), xlab="a_shape", ylab="unnormalized posterior density")

plot(k2, exp(log_un_post_vec), xlab="sigma_scale", ylab="unnormalized posterior density")

Now, ten thousand MCMC samples are used to generate the posterior predictive distribution. (Also, the Bayesian p-value is calculated in this for loop, which assesses the goodness of fit of the Weibull survival function with truncated normal error to the data).

num_samples = 10000

w_predict = matrix(NA, nrow = num_samples, ncol = num_ran_samples)
s_predict = matrix(NA, nrow = num_samples, ncol = length(time_use))

discrepancy_s = matrix(NA, nrow = num_samples, ncol = 1)
discrepancy_s_pred = matrix(NA, nrow = num_samples, ncol = 1)
ind_pred_exceeds_s = matrix(NA, nrow = num_samples, ncol = 1)

for (i in (length(log_un_post_vec) - num_samples + 1):length(log_un_post_vec))
{
  l = i - (length(log_un_post_vec) - num_samples)
  
  w_predict[l,] = rweibull(num_ran_samples, shape = k1[i], scale = k2[i])
  
  data_weibull_temp = w_predict[l,]
  
  info_use_predict = create_life_table_info(time_use, num_ran_samples, data_weibull_temp)
  
  s_predict[l,] = t(info_use_predict$life_table_df[[6]])
  
  s_data = t(info_use$life_table_df[[6]])
  
  my_s_mean = exp(-1*((time_use/k2[i])^k1[i]))
  
  discrepancy_s[l,1] = sum(((s_data - my_s_mean)^2))

    discrepancy_s_pred[l,1] = sum(((s_predict[l,] - my_s_mean)^2))

    if(discrepancy_s_pred[l,1] > discrepancy_s[l,1])
    {
      ind_pred_exceeds_s[l,1] = 1
    }
    else
    {
      ind_pred_exceeds_s[l,1] = 0
    }

  svMisc::progress(l,num_samples,progress.bar = TRUE,init = (l == 1))
}
#>           0-----------------------------------------------------------------10000
#> Progress: |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||

The Bayesian p-value uses the sum of squares discrepancy function and the Bayesian p-value is 0.6038, which indicates that there is no evidence against the null hypothesis that the model predictions fit the data.

This Bayesian p-value measures how extreme is the realized value of sum of squares discrepancy among all possible values that could have been realized under this model with the same parameters that generates the current data.

Note: If this Bayesian p-value was close to 0 or 1, then there would be evidence against the null hypothesis that the model predictions fit the data. Extreme tail-area probabilities, less than 0.01 or more than 0.99, indicate a major failure of the model predictions to fit the data.

Bayes_p_value = sum(ind_pred_exceeds_s)/length(ind_pred_exceeds_s)

Bayes_p_value
#> [1] 0.6038

s_predict_lower = matrix(NA, nrow = 1, ncol = length(time_use))
s_predict_upper = matrix(NA, nrow = 1, ncol = length(time_use))

for(j in 1:length(time_use))
{
  s_predict_lower[j] = quantile(s_predict[,j], probs = 0.025)
  s_predict_upper[j] = quantile(s_predict[,j], probs = 0.975)
}

Lastly, the data, posterior predictive mean, and 95% prediction interval is plotted, and this plot illustrates the good fit of the Weibull survival function with truncated normal error to the generated data.