---
title: "Confidence interval vignette."
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Confidence interval vignette.}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r options, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
```

```{r setup}
library(banditsCI)
set.seed(123)
```

## Set parameters.
We generate a data set with 5,000 observations assigned over 5 equally sized 
batches, with 10 covariates and 4 treatment arms. 
```{r parameters}
batch_sizes <- rep(1e3, 5)
A <- sum(batch_sizes)
p <- 10
K <- 4
```

We simulate the data from a tree model. 
```{r simtreedata, eval = TRUE}
data <- simple_tree_data(
  A=A,
  K=K,
  p=p,
  split = 0.25,
  noise_std = 0.25)
```

Alternatively, we could use the `generate_bandit_data()` function to generate 
data based on a `y` vector and `xs` matrix, potentially useful if we had real 
data from a pilot (this code is not evaluated here).
```{r simlindata, eval = FALSE}
# # Interacted linear model
# xs <- matrix(rnorm(A*p), ncol=p) # - X: covariates of shape [A, p]
# # generate a linear model
# coef <- c(rnorm(2), rnorm(ncol(xs)-2, sd = 0.05))
# y_latent <- xs %*% coef
# y <- as.numeric(cut(rank(y_latent),
#                     # one group is twice as large as other groups
#                     breaks = c(0, A*(2:(K+1)/(K+1))) ))
# data <- generate_bandit_data(xs = xs, y = y, noise_std = 0.5)
```

Components of data[[1]]:

- `ys`: outcomes vector of shape [A];

- `xs`: covariates of shape [A, p]. The value in xs [i, j] represents the j-th 
  covariate of the i-th observation;
  
- `muxs`: true best arm for each context of shape [A, K]. The value in muxs [i, j] represents the predicted outcome or expected reward if the i-th observation is assigned to the j-th treatment arm.

## For the contextual case. 
We run a contextual bandit experiment using our `run_experiment()` function. 
The algorithm used here is a version of linear Thompson sampling. 
```{r contextual_experiment, fig.width=7, fig.height=5, out.width = "100%"}
# access dataset components
xs <- data[[1]]$xs
ys <- data[[1]]$ys
results <- run_experiment(ys = ys, 
                          floor_start = 0.025, 
                          floor_decay = 0, 
                          batch_sizes = batch_sizes, 
                          xs = xs)

# plot the cumulative assignment graph for every arm and every batch size, 
# x-axis is the number of observations, y-axis is the cumulative assignment
plot_cumulative_assignment(results, batch_sizes)
```


The overall assignment plot over-assigns to the first arm, but we note that our
groups are not balanced: the group for whom arm 1 is best is somewhat larger. 

```{r best_arm}
table(apply(data[[1]]$muxs, 1, which.max))
```

```{r illustrate_data, fig.width=7, fig.height=5, out.width = "100%"}
muxs <- apply(data[[1]]$muxs, 1, which.max)
cols <- c("#00000080", paste0(palette()[2:4], "4D")) # for transparency
plot(xs[,1], xs[,2], col = cols[muxs], pch = 20, xlab = "X1", ylab = "X2", type = "p", 
     cex = 0.5)
graphics::legend("topleft", legend = 1:K, col=1:K, pch=19, title = "Best arm")

``` 


We now create separate assignment plots based on the true best arm; within each 
context, the algorithm should over-assign treatment to the true best arm if it 
is learning context correctly. 
Note that the effective batch sizes are different under the different 
conditions. 
```{r contextual_assignment_by_arm, fig.width=7, fig.height=5, out.width = "45%"}
for(k in 1:K){
  
  idx <- (data[[1]]$muxs[,k]==1)
  batch_sizes_w <- lapply(split(idx, cut(1:sum(batch_sizes), 
                                         c(0,cumsum(batch_sizes))) ), sum)
  
  dat <- matrix(0, nrow = sum(idx), ncol = K)
  dat[cbind(1:sum(idx), results$ws[idx])] <- 1
  dat <- apply(dat, 2, cumsum)
  graphics::matplot(dat, type = c("l"), col =1:K, 
                    lwd = 3, 
                    xlab = "Observations",
                    ylab = "Cumulative assignment",
                    main = paste0("Assignment for arm ", k))
  graphics::abline(v=cumsum(batch_sizes_w), col="#00ccff")
  graphics::legend("topleft", legend = 1:K, col=1:K, lty=1:K, lwd = 3)
  
}
```

### Estimating response. 
We then generate augmented inverse probability weighted (AIPW) scores, using a
conditional means model and known assignment probabilities. 
Here, the conditional means are estimated from a ridge model, which estimates 
conditional means for each observation based only on batchwise history, using 
the `ridge_muhat_lfo_pai()` function. 
Other conditional means models can also be used. 
```{r contextual_means_estimation}
# Get estimates for policies

# conditional means model
mu_hat <- ridge_muhat_lfo_pai(xs=xs, 
                              ws=results$ws, 
                              yobs=results$yobs, 
                              K=K, 
                              batch_sizes=batch_sizes)

# Our conditional means model is currently 3 dimensional, as we estimate 
# counterfactuals for each context at each time point. 
# Here, we only need the estimate for the actual time point in which the 
# observation was realized. 
mu_hat2d <- mu_hat[1,,]
for(i in 1:nrow(mu_hat2d)){
  mu_hat2d[i,] <- mu_hat[i,i,]
}

# inverse probability score 1[W_t=w]/e_t(w) of pulling arms, shape [A, K]
balwts <- calculate_balwts(results$ws, results$probs)


# Putting it together, generate doubly robust scores
aipw_scores <- aw_scores(
  ws = results$ws, 
  yobs = results$yobs, 
  mu_hat = mu_hat2d, 
  K = ncol(results$ys),
  balwts = balwts)

## Define counterfactual policies
### Control policy matrix policy0. This is a matrix with A rows and K columns, 
### where the elements in the first column are all 1s and the elements in the 
## remaining columns are all 0s.
policy0 <- matrix(0, nrow = A, ncol = K)
policy0[,1] <- 1

### Treatment policies list policy1. This is a list with K elements, where each 
### list contains a matrix with A rows and K columns. Identifier of treatment x: 
### the x th column of the matrix in the x th policy in the list is 1.
policy1 <- lapply(1:K, function(x) {
  pol_mat <- matrix(0, nrow = A, ncol = K)
  pol_mat[,x] <- 1
  pol_mat
}
) 

## Estimating the value Q(w) of a single arm w. Here we estimate all the arms in 
## policy1 in turn. 
out_full <- output_estimates(
  policy1 = policy1, 
  gammahat = aipw_scores, 
  probs_array = results$probs,
  floor_decay = 0)
```

We first look at estimates of mean response under each of the treatment arms. 
True mean values are represented by the dashed red line. 

```{r contextual_means_plot, fig.width=7, fig.height=5, out.width = "45%"}
op <- par(no.readonly = TRUE)

par(mar = c(5,16,4,2) + 0.1)

# set some plotting parameters across plots
xmin <- min(unlist(lapply(out_full, `[`, TRUE, "estimate")), na.rm = TRUE) - 
  2* max(unlist(lapply(out_full, `[`, TRUE, "std.error")), na.rm = TRUE)
xmax <- max(unlist(lapply(out_full, `[`, TRUE, "estimate")), na.rm = TRUE) + 
  2* max(unlist(lapply(out_full, `[`, TRUE, "std.error")), na.rm = TRUE)

for(i in 1:length(out_full)){
  xest <- out_full[[i]][,"estimate"]
  x0 <- out_full[[i]][,"estimate"] - 1.96*out_full[[i]][,"std.error"]
  x1 <- out_full[[i]][,"estimate"] + 1.96*out_full[[i]][,"std.error"]
  margin <- 2*mean(out_full[[i]][,"std.error"])
  
  plot(x = xest, 
       y = 1:length(xest),
       yaxt = "n",
       xlab = "Estimates",
       ylab = "",
       xlim = c(xmin,
                xmax), 
       main = paste0("Arm ", i, " mean response"))
  segments(y0 = 1:length(xest),
           y1 = 1:length(xest),
           x0 = x0,
           x1 = x1)
  axis(2, at = 1:length(xest),
       labels = names(xest), las = 2)
  abline(v = data$mus[i], col = "#FF3300", lty = "dashed")
  
}
suppressWarnings(par(op))
```

Then, we estimate average treatment effects of each arm compared to the control 
arm, arm 1, using two different methods. 
```{r contextual_ATEs_estimation}
# Get estimates for treatment effects of policies in contrast to control 
# \delta(w_1, w_2) = E[Y_t(w_1) - Y_t(w_2)]. 
# In Hadad et al. (2021) there are two approaches.
## The first approach: use the difference in AIPW scores as the unbiased scoring 
## rule for \delta (w_1, w_2)
### The following function implements the first approach by subtracting policy0, 
### the control arm, from all the arms in policy1, except for the control arm 
### itself.
out_full_te1 <- output_estimates(
  policy0 = policy0,
  policy1 = policy1[-1],  ## remove the control arm from policy1
  contrasts = "combined",
  gammahat = aipw_scores, 
  probs_array = results$probs,
  floor_decay = 0,
  non_contextual_twopoint = FALSE)

## The second approach takes asymptotically normal inference about 
## \delta(w_1, w_2): \delta ^ hat (w_1, w_2) = Q ^ hat (w_1) - Q ^ hat (w_2)
out_full_te2 <- output_estimates(
  policy0 = policy0,
  policy1 = policy1[-1],  ## remove the control arm from policy1
  contrasts = "separate",
  gammahat = aipw_scores, 
  probs_array = results$probs,
  floor_decay = 0)
```

1. Under the first approach, we calculate AIPW scores under both treatment(s) and 
control, take the difference in AIPW scores, and then conduct adaptive 
weighting. 

Note: We do not used this combined estimation method with the non-contextual
two-point estimation approach, which produces unstable estimates, and is not 
supported in the original source. We can do non-contextual two-point estimation 
when calculating separate adaptive weights for contrasts, however we recommend 
using other estimation procedures in contextual adaptive settings. 
```{r contextual_ATEs_plot1, fig.width=7, fig.height=5, out.width = "45%"}
par(mar = c(5,16,4,2) + 0.1)

# set some plotting parameters across plots
xmin <- min(unlist(lapply(out_full_te1, `[`, TRUE, "estimate")), na.rm = TRUE) - 
  2* max(unlist(lapply(out_full_te1, `[`, TRUE, "std.error")), na.rm = TRUE)
xmax <- max(unlist(lapply(out_full_te1, `[`, TRUE, "estimate")), na.rm = TRUE) + 
  2* max(unlist(lapply(out_full_te1, `[`, TRUE, "std.error")), na.rm = TRUE)

for(i in 1:length(out_full_te1)){
  xest <- out_full_te1[[i]][,"estimate"]
  x0 <- out_full_te1[[i]][,"estimate"] - 1.96*out_full_te1[[i]][,"std.error"]
  x1 <- out_full_te1[[i]][,"estimate"] + 1.96*out_full_te1[[i]][,"std.error"]
  margin <- 2*mean(out_full_te1[[i]][,"std.error"])
  
  plot(x = xest, 
       y = 1:length(xest),
       yaxt = "n",
       xlab = "Estimates",
       ylab = "",
       xlim = c(xmin,
                xmax), 
       main = paste0("Arm ", i+1, " ATE wrt arm 1"))
  segments(y0 = 1:length(xest),
           y1 = 1:length(xest),
           x0 = x0,
           x1 = x1)
  axis(2, at = 1:length(xest),
       labels = names(xest), las = 2)
  abline(v = data$mus[i+1] - data$mus[1], col = "#FF3300", lty = "dashed")
  
}
suppressWarnings(par(op))
```

2. In the second approach, we implement adaptive weighting on treatment and control
scores separately, and then take the difference. 
```{r contextual_ATEs_plot2, fig.width=7, fig.height=5, out.width = "45%"}
par(mar = c(5,16,4,2) + 0.1)

# set some plotting parameters across plots
xmin <- min(unlist(lapply(out_full_te2, `[`, TRUE, "estimate")), na.rm = TRUE) - 
  2* max(unlist(lapply(out_full_te2, `[`, TRUE, "std.error")), na.rm = TRUE)
xmax <- max(unlist(lapply(out_full_te2, `[`, TRUE, "estimate")), na.rm = TRUE) + 
  2* max(unlist(lapply(out_full_te2, `[`, TRUE, "std.error")), na.rm = TRUE)

for(i in 1:length(out_full_te2)){
  xest <- out_full_te2[[i]][,"estimate"]
  x0 <- out_full_te2[[i]][,"estimate"] - 1.96*out_full_te2[[i]][,"std.error"]
  x1 <- out_full_te2[[i]][,"estimate"] + 1.96*out_full_te2[[i]][,"std.error"]
  margin <- 2*mean(out_full_te2[[i]][,"std.error"])
  
  plot(x = xest, 
       y = 1:length(xest),
       yaxt = "n",
       xlab = "Estimates",
       ylab = "",
       xlim = c(xmin,
                xmax), 
       main = paste0("Arm ", i+1, " ATE wrt arm 1"))
  segments(y0 = 1:length(xest),
           y1 = 1:length(xest),
           x0 = x0,
           x1 = x1)
  axis(2, at = 1:length(xest),
       labels = names(xest), las = 2)
  abline(v = data$mus[i+1] - data$mus[1], col = "#FF3300", lty = "dashed")
  
}
par(op)
```


## For the non-contextual case. 

We also run a non-contextual experiment, using the same original data. 
The algorithm used is Thompson sampling, without contexts. 
```{r non-contextual_experiment, fig.width=7, fig.height=5, out.width = "100%"}
# For a noncontextual experiment, we simply omit the context argument
results <- run_experiment(ys = ys, 
                          floor_start = 0.025, 
                          floor_decay = 0, 
                          batch_sizes = batch_sizes)

# plot the cumulative assignment graph
# x-axis is the number of observations, y-axis is the cumulative assignment
plot_cumulative_assignment(results, batch_sizes)
```

### Estimating response. 

```{r noncontextual_means_estimation}
# Get estimates for policies
# inverse probability score 1[W_t=w]/e_t(w) of pulling arms, shape [A, K]
balwts <- calculate_balwts(results$ws, results$probs)

# Generate doubly robust scores; we don't use the contexts for a means model 
# here, but we could, even though they are not used in assignment. 
aipw_scores <- aw_scores(
  ws = results$ws, 
  yobs = results$yobs, 
  K = ncol(results$ys),
  balwts = balwts)

## Define counterfactual policies
### Control policy matrix policy0. This is a matrix with A rows and K columns, 
### where the elements in the first column are all 1s and the elements in the 
## remaining columns are all 0s.
policy0 <- matrix(0, nrow = A, ncol = K)
policy0[,1] <- 1

### Treatment policies list policy1. This is a list with K elements, where each 
### list contains a matrix with A rows and K columns. Identifier of treatment x: 
### the x th column of the matrix in the x th policy in the list is 1.
policy1 <- lapply(1:K, function(x) {
  pol_mat <- matrix(0, nrow = A, ncol = K)
  pol_mat[,x] <- 1
  pol_mat
}
) 

## Estimating the value Q(w) of a single arm w. Here we estimate all the arms in 
## policy1 in turn. 
out_full <- output_estimates(
  policy1 = policy1, 
  gammahat = aipw_scores, 
  probs_array = results$probs,
  floor_decay = 0)
```

We first look at estimates of mean response under each of the treatment arms.
True mean values are represented by the dashed red line. 
```{r noncontextual_means_plot, fig.width=7, fig.height=5, out.width = "45%"}
par(mar = c(5,16,4,2) + 0.1)

# set some plotting parameters across plots
xmin <- min(unlist(lapply(out_full, `[`, TRUE, "estimate"))) - 
  2* max(unlist(lapply(out_full, `[`, TRUE, "std.error")))
xmax <- max(unlist(lapply(out_full, `[`, TRUE, "estimate"))) + 
  2* max(unlist(lapply(out_full, `[`, TRUE, "std.error")))

for(i in 1:length(out_full)){
  xest <- out_full[[i]][,"estimate"]
  x0 <- out_full[[i]][,"estimate"] - 1.96*out_full[[i]][,"std.error"]
  x1 <- out_full[[i]][,"estimate"] + 1.96*out_full[[i]][,"std.error"]
  margin <- 2*mean(out_full[[i]][,"std.error"])
  
  plot(x = xest, 
       y = 1:length(xest),
       yaxt = "n",
       xlab = "Estimates",
       ylab = "",
       xlim = c(xmin,
                xmax), 
       main = paste0("Arm ", i, " mean response"))
  segments(y0 = 1:length(xest),
           y1 = 1:length(xest),
           x0 = x0,
           x1 = x1)
  axis(2, at = 1:length(xest),
       labels = names(xest), las = 2)
  abline(v = data$mus[i], col = "#FF3300", lty = "dashed")
  
}
par(op)
```

We again estimate average treatment effects of each arm compared to the control 
arm, arm 1, using the two different methods. 
```{r noncontextual_ATEs_estimation}
# Get estimates for treatment effects of policies in contrast to control 
# \delta(w_1, w_2) = E[Y_t(w_1) - Y_t(w_2)]. 
# In Hadad et al. (2021) there are two approaches.
## The first approach: use the difference in AIPW scores as the unbiased scoring 
## rule for \delta (w_1, w_2)
### The following function implements the first approach by subtracting policy0, 
### the control arm, from all the arms in policy1, except for the control arm 
### itself.
out_full_te1 <- output_estimates(
  policy0 = policy0,
  policy1 = policy1[-1],  ## remove the control arm from policy1
  contrasts = "combined",
  gammahat = aipw_scores, 
  probs_array = results$probs,
  floor_decay = 0)

## The second approach takes asymptotically normal inference about 
## \delta(w_1, w_2): \delta ^ hat (w_1, w_2) = Q ^ hat (w_1) - Q ^ hat (w_2)
out_full_te2 <- output_estimates(
  policy0 = policy0,
  policy1 = policy1[-1],  ## remove the control arm from policy1
  contrasts = "separate",
  gammahat = aipw_scores, 
  probs_array = results$probs,
  floor_decay = 0)
```

1. First take the difference in AIPW scores, and then conduct adaptive 
weighting.
```{r noncontextual_ATEs_plot1, fig.width=7, fig.height=5, out.width = "45%"}
par(mar = c(5,16,4,2) + 0.1)

# set some plotting parameters across plots
xmin <- min(unlist(lapply(out_full_te1, `[`, TRUE, "estimate")), na.rm = TRUE) - 
  2* max(unlist(lapply(out_full_te1, `[`, TRUE, "std.error")), na.rm = TRUE)
xmax <- max(unlist(lapply(out_full_te1, `[`, TRUE, "estimate")), na.rm = TRUE) + 
  2* max(unlist(lapply(out_full_te1, `[`, TRUE, "std.error")), na.rm = TRUE)

for(i in 1:length(out_full_te1)){
  xest <- out_full_te1[[i]][,"estimate"]
  x0 <- out_full_te1[[i]][,"estimate"] - 1.96*out_full_te1[[i]][,"std.error"]
  x1 <- out_full_te1[[i]][,"estimate"] + 1.96*out_full_te1[[i]][,"std.error"]
  margin <- 2*mean(out_full_te1[[i]][,"std.error"])
  
  plot(x = xest, 
       y = 1:length(xest),
       yaxt = "n",
       xlab = "Estimates",
       ylab = "",
       xlim = c(xmin,
                xmax), 
       main = paste0("Arm ", i+1, " ATE wrt arm 1"))
  segments(y0 = 1:length(xest),
           y1 = 1:length(xest),
           x0 = x0,
           x1 = x1)
  axis(2, at = 1:length(xest),
       labels = names(xest), las = 2)
  abline(v = data$mus[i+1] - data$mus[1], col = "#FF3300", lty = "dashed")
  
}
suppressWarnings(par(op))
```

2. Or implement adaptive weighting on treatment and control
scores separately, and then take the difference. 
```{r noncontextual_ATEs_plot2, fig.width=7, fig.height=5, out.width = "45%"}
par(mar = c(5,16,4,2) + 0.1)

# set some plotting parameters across plots
xmin <- min(unlist(lapply(out_full_te2, `[`, TRUE, "estimate")), na.rm = TRUE) - 
  2* max(unlist(lapply(out_full_te2, `[`, TRUE, "std.error")), na.rm = TRUE)
xmax <- max(unlist(lapply(out_full_te2, `[`, TRUE, "estimate")), na.rm = TRUE) + 
  2* max(unlist(lapply(out_full_te2, `[`, TRUE, "std.error")), na.rm = TRUE)

for(i in 1:length(out_full_te2)){
  xest <- out_full_te2[[i]][,"estimate"]
  x0 <- out_full_te2[[i]][,"estimate"] - 1.96*out_full_te2[[i]][,"std.error"]
  x1 <- out_full_te2[[i]][,"estimate"] + 1.96*out_full_te2[[i]][,"std.error"]
  margin <- 2*mean(out_full_te2[[i]][,"std.error"])
  
  plot(x = xest, 
       y = 1:length(xest),
       yaxt = "n",
       xlab = "Estimates",
       ylab = "",
       xlim = c(xmin,
                xmax), 
       main = paste0("Arm ", i+1, " ATE wrt arm 1"))
  segments(y0 = 1:length(xest),
           y1 = 1:length(xest),
           x0 = x0,
           x1 = x1)
  axis(2, at = 1:length(xest),
       labels = names(xest), las = 2)
  abline(v = data$mus[i+1] - data$mus[1], col = "#FF3300", lty = "dashed")
  
}
suppressWarnings(par(op))
```