## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(cramR) library(data.table) library(DT) ## ----contextual-example, fig.alt="Cumulative regret curve over time for the selected policy"---- # Number of time steps horizon <- 500L # Number of simulations simulations <- 100L # Number of arms k = 4 # Number of context features d= 3 # Reward beta parameters of linear model (the outcome generation models, one for each arm, are linear with arm-specific parameters betas) list_betas <- cramR::get_betas(simulations, d, k) # Define the contextual linear bandit, where sigma is the scale of the noise in the outcome linear model bandit <- cramR::ContextualLinearBandit$new(k = k, d = d, list_betas = list_betas, sigma = 0.3) # Define the policy object (choose between Contextual Epsilon Greedy, UCB Disjoint and Thompson Sampling) policy <- cramR::BatchContextualEpsilonGreedyPolicy$new(epsilon=0.1, batch_size=5) # policy <- cramR::BatchLinUCBDisjointPolicyEpsilon$new(alpha=1.0, epsilon=0.1, batch_size=1) # policy <- cramR::BatchContextualLinTSPolicy$new(v = 0.1, batch_size=1) sim <- cram_bandit_sim(horizon, simulations, bandit, policy, alpha=0.05, do_parallel = FALSE) ## ----contextual-estimates, fig.alt = "First rows of simulation output estimates"---- head(sim$estimates) ## ----contextual-table, fig.alt = "Table of results"--------------------------- sim$interactive_table ## ----cleanup-autograph, include=FALSE----------------------------------------- autograph_files <- list.files(tempdir(), pattern = "^__autograph_generated_file.*\\.py$", full.names = TRUE) if (length(autograph_files) > 0) { try(unlink(autograph_files, recursive = TRUE, force = TRUE), silent = TRUE) }