## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") set.seed(1234) ## ----load--------------------------------------------------------------------- library(blockwise) data(adult) # Drop `native.country`: it has ~41 levels with a long tail of rare # countries, which can leave a per-block logistic-regression model # without an example of a level that later appears at predict time. # Tree-based learners (`learner_rpart`, `learner_gbm`) tolerate this; # `learner_glm_binomial` does not. Either drop the column or coarsen # its levels before fitting. adult <- adult[, setdiff(names(adult), "native.country")] str(adult, list.len = 20) table(adult$salary) ## ----mask--------------------------------------------------------------------- bike_style_groups <- list( c("age", "workclass", "education"), c("marital.status", "occupation", "relationship") ) adult_miss <- simulate_blockwise_missing( adult, blocks = bike_style_groups, prop_missing = 0.30, noise = 0.02 ) round(colMeans(is.na(adult_miss)) * 100, 1) ## ----split-------------------------------------------------------------------- set.seed(1234) idx <- sample(nrow(adult_miss), floor(0.75 * nrow(adult_miss))) train <- adult_miss[idx, ] test <- adult_miss[-idx, ] X_train <- train[, setdiff(names(train), "salary")] y_train <- train$salary X_test <- test[, setdiff(names(test), "salary")] y_test <- test$salary ## ----fit---------------------------------------------------------------------- set.seed(1234) fit <- brm(X_train, y_train, learner = learner_glm_binomial()) fit ## ----eval--------------------------------------------------------------------- prob <- predict(fit, X_test) pred_class <- as.integer(prob >= 0.5) acc <- mean(pred_class == y_test) cat("Accuracy:", round(acc, 3), "\n") # Confusion matrix table(truth = y_test, predicted = pred_class)