## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) # For reproducibility set.seed(6006) # Set the number of levels n_levels <- 3000 # Randomly draw the number of observations per level n_obs_lambda <- rgamma(n_levels, 3.1) * 100 n_obs_per_level <- rpois(n_levels, n_obs_lambda) + 1 # Simulate the mean probability per level prob_per_level <- rbeta(n_levels, 3.1, 4.4) # For each level, simulate data d <- vector(mode = "list", length = n_levels) for (l in seq_along(n_obs_per_level)){ d[[l]] <- data.frame( y = rbinom(n_obs_per_level[l], 1, prob_per_level[l]), level = l) } # Combine into a data.frame df <- do.call("rbind", d) df$level <- as.factor(df$level) ## ----echo=FALSE, fig.width=7, fig.height=4, fig.align = 'center'-------------- n_obs_total <- sum(n_obs_per_level) hist(n_obs_per_level, breaks = 80, main = paste(format(n_levels, big.mark = ","), "levels, total N =", format(n_obs_total, big.mark = ",")), xlab = "Number of observations by level") ## ----------------------------------------------------------------------------- library(glm4) run_time <- system.time( fit <- glm4(y ~ level, data = df, family = binomial(), sparse = TRUE) ) run_time ## ----------------------------------------------------------------------------- # Only print a subset of the output, restoring existing max.print after old_options <- options(max.print = 20) print(fit) options(old_options) ## ----------------------------------------------------------------------------- # Model-implied probability for level 2 plogis(sum(coef(fit)[1:2])) # Should equal the observed proportion with(df, mean(y[level == 2])) ## ----------------------------------------------------------------------------- class(fit$glm4_fit) ## ----------------------------------------------------------------------------- summary(fit) ## ----------------------------------------------------------------------------- logLik(fit) ## ----------------------------------------------------------------------------- confint(fit)[1:20,] ## ----------------------------------------------------------------------------- vcov(fit)[1:5, 1:5] ## ----------------------------------------------------------------------------- set.seed(6006) # Add covariate and fit df$x <- rnorm(nrow(df)) fit_x <- glm4(y ~ x + level, data = df, family = binomial(), sparse = TRUE) # glm4 anova method anova(fit_x) ## ----------------------------------------------------------------------------- anova(fit_x, test = "Chisq") ## ----------------------------------------------------------------------------- anova(fit, fit_x, test = "Chisq") ## ----eval = FALSE------------------------------------------------------------- # set.seed(6006) # # n_levels <- 3000 # # n_obs_lambda <- rgamma(n_levels, 3.1) * 100 # n_obs_per_level <- rpois(n_levels, n_obs_lambda) + 1 # prob_per_level <- rbeta(n_levels, 3.1, 4.4) # # d <- vector(mode = "list", length = n_levels) # for (l in seq_along(n_obs_per_level)) { # d[[l]] <- data.frame( # y = rbinom(n_obs_per_level[l], 1, prob_per_level[l]), # level = l) # } # # df <- do.call("rbind", d) # df$level <- as.factor(df$level)