## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(ggplot2) library(magrittr) library(dplyr) library(tidyr) ## ----eval=FALSE--------------------------------------------------------------- # install.packages('devtools') # skip this line if `devtools` is already installed. ## ----eval=FALSE--------------------------------------------------------------- # devtools::install_github("joonhap/sbi") ## ----------------------------------------------------------------------------- library(sbim) ## ----------------------------------------------------------------------------- n <- 1000 # number of iid observations gamma_t <- 1 # true shape parameter theta_t <- 1 # true rate parameter set.seed(98475) ## Marginally, observations are distributed following the negative binomial distribution. y_o <- rnbinom(n=n, size=gamma_t, prob=theta_t/(theta_t+1)) # observed data (y_1,...,y_n). MESLE <- gamma_t*n/sum(y_o) # MESLE for theta (= MLE) sims_at <- seq(.8, 1.2, by=.001) # theta values at which simulations are made llest <- sapply(sims_at, function(tht) {dpois(y_o, rgamma(n, gamma_t, rate=tht), log=TRUE)}) ## ----------------------------------------------------------------------------- s <- simll(llest, params=sims_at) ## ----------------------------------------------------------------------------- nulls_theta <- c(seq(.8, 1.2, by=.05), MESLE) # null values to be tested ## ----------------------------------------------------------------------------- ht_result <- ht(s, null.value=as.list(nulls_theta), test="MESLE") ## ----------------------------------------------------------------------------- ht_result ## ----------------------------------------------------------------------------- cci <- ci(s, level=c(.9, .95), ci="MESLE") # constructed confidence intervals ## ----echo=FALSE, fig.width=6-------------------------------------------------- coef_est <- ht_result$regression_estimates est_quad <- function(x) coef_est[[1]] + c(coef_est[[2]])*x + c(coef_est[[3]])*x^2 ## exact log-likelihood ll_exact <- function(x) { dnbinom(sum(y_o), gamma_t*n, x/(x+1), log=TRUE) } ## exact log-likelihood, shifted in y-direction ll_sub <- function(x) { ll_exact(x) - ll_exact(MESLE) + est_quad(MESLE) } sll <- apply(unclass(s), 2, sum) # simulation log likelihood (for all observations) gg_gp_simll <- ggplot(data.frame(theta=sims_at, sll=sll), aes(x=theta, y=sll)) + geom_point(size=.5) gg_gp_simll + geom_function(fun=est_quad, linetype="longdash", color='blue') + geom_function(fun=ll_sub, linetype="longdash", color='red') + xlab('theta') + ylab('simulation log-likelihood') + geom_vline(data=data.frame( kind=factor(c("MESLE",rep("CI90",2),rep("CI95",2)), levels=c("MESLE","CI90","CI95")), value=c(MESLE, cci$confidence_interval[1,2:3], cci$confidence_interval[2,2:3])), aes(xintercept=value, linetype=kind)) + scale_linetype_manual(name="", labels=c("MESLE", "90% CI", "95% CI"), values=c(MESLE="dashed",CI90="dotdash",CI95="dotted")) + theme(legend.position="top", panel.grid.major=element_blank(), panel.grid.minor=element_blank()) ## ----------------------------------------------------------------------------- ht_result <- ht(s, null.value=as.list(nulls_theta), test="parameter", case="iid") ht_result ## ----message=FALSE------------------------------------------------------------ library(pomp) ## ----------------------------------------------------------------------------- # Stochastic volatility model SV_pomp <- function(t0, times, kappa, tau, sim_seed=NULL) { rinit <- pomp::Csnippet(" s = tau*rnorm(0,1); ") rproc <- pomp::Csnippet(" s = kappa*s + tau*sqrt(1-kappa*kappa)*rnorm(0,1); ") rmeas <- pomp::Csnippet(" r = exp(s)*rt(5); ") dmeas <- pomp::Csnippet(" double logdmeas = dt(r*exp(-s), 5, 1) - s; lik = (give_log) ? logdmeas : exp(logdmeas); ") if (!is.null(sim_seed)) { set.seed(sim_seed) } pomp::simulate(t0=t0, times=times, rinit=rinit, rprocess=pomp::discrete_time(rproc,delta.t=1), rmeasure=rmeas, dmeasure=dmeas, params=c(kappa=kappa, tau=tau), statenames = "s", obsnames = "r", paramnames = c("kappa", "tau") ) } ## ----------------------------------------------------------------------------- t0 <- 0 T <- 500 times <- 1:T kappa_t <- 0.8 tau_t <- 1 theta_t <- c(kappa=kappa_t, tau=tau_t) # true parameter value theta_Est <- c(kappa=logit(kappa_t), tau=log(tau_t)) # true parameter on the estimation scale # simulate the process and generate data SV_t <- SV_pomp(t0, times, kappa_t, tau_t, sim_seed=1554654) r_o <- obs(SV_t) # generated observation sequence dat_raw <- data.frame(time=time(SV_t), latent=c(states(SV_t)), obs=c(r_o)) dat <- pivot_longer(dat_raw, -time) parnames <- c("kappa", "tau") trans <- list(logit, log) # functions to use for transformations to the estimation scale btrans <- list(plogis, exp) # functions for transformations back to the original scale transnames <- c("logit(kappa)", "log(tau)") ## ----warning=FALSE, fig.width=3.4, fig.height=2.7----------------------------- sim_widths <- c(1, .4) inc <- c("kappa") # parameter being estimated parid <- which(parnames%in%inc) M <- 100 # number of simulation points Np <- 100 # number of particles used by the bootstrap particle filter # simulation points are uniformly spaced between the true value +-1 sims_incl_Est <- cbind(seq(theta_Est[inc] - sim_widths[parid], theta_Est[inc] + sim_widths[parid], length.out=M)) |> `colnames<-`(inc) sims_at_Nat <- outer(rep(1,M), theta_t) sims_at_Nat[,inc] <- sapply(1:length(inc), function(i) btrans[[parid[i]]](sims_incl_Est[,i])) set.seed(729875) llp_est <- simplify2array(lapply(1:nrow(sims_at_Nat), function(i) { sp <- sims_at_Nat[i,] ## run the particle filter for each simulation point pfSV <- pfilter(pomp(SV_t, params=sp), Np=Np, save.states="prediction") ## estimate the simulation log-likelihood for each r_t lme <- sapply(saved_states(pfSV)$weights, logmeanexp) lme })) ll_est <- apply(llp_est, 2, sum) ## ----------------------------------------------------------------------------- s <- simll(llp_est, params=sims_incl_Est) null_values_Est <- cbind(theta_Est[inc]+sim_widths[parid]/5*(-10:10)) colnames(null_values_Est) <- inc ht_result <- ht(s, null.value=null_values_Est, test="parameter", case="stationary") ## ----------------------------------------------------------------------------- ci_result <- ci(s, level=c(.9,.95), ci="parameter", case="stationary") regcoef_k <- ci_result$regression_estimates # estimated expected simulation log-likelihood eesl <- function(x) { regcoef_k$a + regcoef_k$b*x + regcoef_k$c*x^2 } vline_names <- c("true","CI90","CI95") dat <- data.frame(simll=ll_est) dat[inc] <- sims_incl_Est g1 <- ggplot(dat, aes(x=!!sym(inc), y=simll)) + geom_point() + geom_function(fun=eesl, linetype="longdash") + geom_vline(data=data.frame( kind=factor(c("true",rep("CI90",2),rep("CI95",2)), levels=vline_names), value=c(theta_Est[inc], ci_result$confidence_interval[1,2:3], ci_result$confidence_interval[2,2:3])), aes(xintercept=value, linetype=kind)) + xlab(transnames[parid]) + ylab('simulation log-likelihood') + scale_linetype_manual(name="", labels=c("true", "90%CI", "95%CI"), values=c(true="dashed", CI90="dotdash", CI95="dotted")) + theme(legend.position="bottom") g1 ## ----warning=FALSE, fig.width=3.4, fig.height=2.7----------------------------- sim_widths <- c(1, .4) inc <- c("tau") # parameter being estimated parid <- which(parnames%in%inc) M <- 100 Np <- 100 # simulation points are uniformly spaced between the true value +-.4 sims_incl_Est <- cbind(seq(theta_Est[inc] - sim_widths[parid], theta_Est[inc] + sim_widths[parid], length.out=M)) |> `colnames<-`(inc) sims_at_Nat <- outer(rep(1,M), theta_t) sims_at_Nat[,inc] <- sapply(1:length(inc), function(i) btrans[[parid[i]]](sims_incl_Est[,i])) set.seed(729875) llp_est <- simplify2array(lapply(1:nrow(sims_at_Nat), function(i) { sp <- sims_at_Nat[i,] pfSV <- pfilter(pomp(SV_t, params=sp), Np=Np, save.states="prediction") lme <- sapply(saved_states(pfSV)$weights, logmeanexp) lme })) ll_est <- apply(llp_est, 2, sum) s <- simll(llp_est, params=sims_incl_Est) null_values_Est <- cbind(theta_Est[inc]+sim_widths[parid]/5*(-10:10)) colnames(null_values_Est) <- inc ht_result <- ht(s, null.value=null_values_Est, test="parameter", case="stationary") dat <- data.frame(simll=ll_est) dat[inc] <- sims_incl_Est ci_result <- ci(s, level=c(.9,.95), ci="parameter", case="stationary") regcoef_t <- ci_result$regression_estimates eesl <- function(x) { regcoef_t$a + regcoef_t$b*x + regcoef_t$c*x^2 } # estimated expected simlation log-likelihood vline_names <- c("true","CI90","CI95") g2 <- ggplot(dat, aes(x=!!sym(inc), y=simll)) + geom_point() + geom_function(fun=eesl, linetype="longdash") + geom_vline(data=data.frame(kind=factor(c("true",rep("CI90",2),rep("CI95",2)), levels=vline_names), value=c(theta_Est[inc], ci_result$confidence_interval[1,2:3], ci_result$confidence_interval[2,2:3])), aes(xintercept=value, linetype=kind)) + scale_linetype_manual(name=NULL, labels=c("true", "90%CI", "95%CI"), values=c(true="dashed",CI90="dotdash",CI95="dotted")) + ylab('simulation log-likelihood') + xlab(transnames[parid]) + theme(legend.position="bottom") g2 ## ----warning=FALSE, fig.width=2.4, fig.height=2.7----------------------------- sim_widths <- c(.5, .2) inc <- c("kappa", "tau") # parameters being estimated parid <- which(parnames%in%inc) M <- 400 Np <- 100 sims_incl_Est <- expand.grid(seq(theta_Est[inc[1]]-sim_widths[parid[1]], theta_Est[inc[1]]+sim_widths[parid[1]], length.out=sqrt(M)), seq(theta_Est[inc[2]]-sim_widths[parid[2]], theta_Est[inc[2]]+sim_widths[parid[2]], length.out=sqrt(M))) |> as.matrix() |> `colnames<-`(inc) sims_at_Nat <- outer(rep(1,M), theta_t) sims_at_Nat[,inc] <- sapply(1:length(inc), function(i) btrans[[parid[i]]](sims_incl_Est[,i])) set.seed(729875) llp_est <- simplify2array(lapply(1:nrow(sims_at_Nat), function(i) { sp <- sims_at_Nat[i,] pfSV <- pfilter(pomp(SV_t, params=sp), Np=Np, save.states="prediction") lme <- sapply(saved_states(pfSV)$weights, logmeanexp) lme })) ll_est <- apply(llp_est, 2, sum) s <- simll(llp_est, params=sims_incl_Est) null_values_Est <- expand.grid(theta_Est[inc[1]]+sim_widths[parid[1]]/5*(-10:10), theta_Est[inc[2]]+sim_widths[parid[2]]/5*(-10:10)) |> as.matrix() |> `colnames<-`(inc) ht_result <- ht(s, null.value=null_values_Est, test="parameter", case="stationary") dat <- data.frame(null_values_Est) g3 <- ggplot(dat %>% mutate(pval=ht_result$Hypothesis_Tests[,"pvalue"]) %>% mutate(conflvl=cut(pval, breaks=c(0,0.05,0.1,0.2,1), right=FALSE)), aes(x=!!sym(inc[1]), y=!!sym(inc[2]))) + geom_point(aes(color=conflvl), size=0.6) + xlab(transnames[parid[1]]) + ylab(transnames[parid[2]]) + scale_color_manual(name="", labels=c("100%", "95%", "90%", "80%"), values=rgb(0,0,0,c(0.05,0.25,0.5,1))) + geom_point(aes(x=theta_Est[inc[1]], y=theta_Est[inc[2]]), shape=4, size=2) + theme(legend.position="bottom") g3 ## ----------------------------------------------------------------------------- null_moments <- list(a=-984, b=c(25.8,-23.7), c=matrix(c(-8.25,8,8,-93),2,2), sigsq=5) ht(s, null.value=null_moments, test="moments") ## ----------------------------------------------------------------------------- set.seed(1098204) s <- simll(rnorm(50, 0, 1)) ## ----------------------------------------------------------------------------- null_values <- rbind(c(0,1), c(0,0.8), c(0.2, 1)) ht(s, null.value=null_values, test="moments", type="point")