README

Acknowledgement

We would like to express our gratitude to an anonymous reviewer whose valuable suggestions on applying a brute force Monte Carlo method in the implementation significantly improved the computational efficiency in certain settings.

Download

library(RDSsamplesize)
library(microbenchmark)
library(ggplot2)
library(dplyr)
library(latex2exp)

Example

A1 <-calSize(s=10,c=3,maxWave=9,rr=0.4,bruteMC=FALSE,tol=0.025)
A2 <-calSize(s=10,c=3,maxWave=9,rr=0.4,bruteMC=TRUE)
microbenchmark(A1 =calSize(s=10,c=3,maxWave=9,rr=0.4,bruteMC=FALSE,tol=0.025),
               A2 =calSize(s=10,c=3,maxWave=9,rr=0.4,bruteMC=TRUE),
               times=10)
#> Unit: milliseconds
#>  expr       min        lq     mean   median       uq      max neval cld
#>    A1 6824.2620 7110.8935 7594.315 7626.523 8068.794 8206.456    10   b
#>    A2  804.0992  923.8944 1010.687 1037.403 1080.293 1183.563    10  a
B <- calSize(s=10,c=3,maxWave=3,c(0.3,0.2,0.1),bruteMC=FALSE,tol=0.025)

Probability of sampling at least \(n\) individuals (including seeds) by each wave

round(nprobw(A1,n=210),3) # recruit 200 more individuals besides 10 seeds
#>               By wave 1 wave 2 wave 3 wave 4 wave 5 wave 6 wave 7 wave 8 wave 9
#> Pr(size>=210)         0      0      0      0  0.006  0.053  0.234  0.483  0.686
round(nprobw(A2,n=210),3) # recruit 200 more individuals besides 10 seeds
#>               By wave 1 wave 2 wave 3 wave 4 wave 5 wave 6 wave 7 wave 8 wave 9
#> Pr(size>=210)         0      0      0      0  0.001  0.036  0.204  0.452  0.661
round(nprobw(B,n=15),3) # recruit 5 more individuals besides 10 seeds
#>              By wave 1 wave 2 wave 3
#> Pr(size>=15)      0.97  0.993  0.994
round(nprobw(B,n=50),3) # recruit 40 more individuals besides 10 seeds
#>              By wave 1 wave 2 wave 3
#> Pr(size>=50)         0      0      0

Visualization

plot_survival_prob<-function(P_tau_list){
  P_tau<-data.frame(Wave=1:length(P_tau_list),PMF=P_tau_list)
  P_tau<-P_tau%>%mutate(CDF=cumsum(PMF),CCDF=1-CDF)
  print(ggplot(P_tau,aes(x=Wave,y=CCDF))+
    geom_point()+
    geom_line(linetype=2)+
    scale_y_continuous(breaks=seq(0,1,by=.1),limits = c(min(0.9,min(P_tau$CCDF)),1))+
    xlab('Wave, w')+
    ylab('')+
    scale_x_continuous(breaks =1:length(P_tau_list),minor_breaks = NULL)+
    theme_minimal()+
    theme(panel.grid.minor.y = element_blank(),
          legend.title = element_blank(),
          plot.title = element_text(hjust = 0.5))+
    ggtitle(TeX('$Pr(\\tau > w)$')))
}

### design A (with theoretical results) 
plot_survival_prob(P_tau_list=A1[[1]])

### design A (with empirical results) 
plot_survival_prob(P_tau_list=A2[[1]])

### design B
plot_survival_prob(P_tau_list=B[[1]])

plot_sample_size<-function(x){
  info<-data.frame(Wave=as.character(),Zk=as.integer(),PMF=as.double(),CCDF=as.double())
  for (i in 2:length(x)){
    info<-rbind(info,cbind(Wave=i-1,x[[i]]))
  }
  colnames(info)<-c('Wave','Size','PMF','CCDF')
  info$Wave<-paste("Wave",info$Wave)
  print(ggplot(info,aes(x=Size,y=CCDF))+
    facet_wrap(~Wave,scales = "free")+
    geom_point(size=0.5)+
    xlab('n')+
    ylab('')+
    theme_minimal()+
    theme(panel.grid.minor.y = element_blank(),
          legend.title = element_blank(),
          plot.title = element_text(hjust = 0.5))+
    ggtitle(TeX('Pr( Accmulated sample size (including seeds) $\\geq n | k$-th wave)')))
}

### Design A (with theoretical results) 
plot_sample_size(A1)

### Design A (with empirical results)
plot_sample_size(A2)

### Design B 
plot_sample_size(B)