## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) ## ----setup, message=FALSE, warning=FALSE-------------------------------------- library(SuperSurv) library(survival) data("metabric", package = "SuperSurv") set.seed(42) # Create 100 columns of random noise (simulating irrelevant genes/biomarkers) n_patients <- nrow(metabric) noise_data <- matrix(rnorm(n_patients * 20), nrow = n_patients) colnames(noise_data) <- paste0("noise_", 1:20) # Combine the real data with the noise high_dim_data <- cbind(metabric, noise_data) # Standard Train/Test Split train_idx <- sample(1:nrow(high_dim_data), 0.7 * nrow(high_dim_data)) train <- high_dim_data[train_idx, ] test <- high_dim_data[-train_idx, ] X_tr <- train[, grep("^x|^noise", names(high_dim_data))] X_te <- test[, grep("^x|^noise", names(high_dim_data))] new.times <- seq(50, 200, by = 25) ## ----define-library----------------------------------------------------------- # We pass a list of vectors. The first element is the prediction model, # and the second element is the screening algorithm. my_screen_library <- list( c("surv.coxph", "screen.all"), # Baseline: Cox model with ALL variables c("surv.coxph", "screen.marg"), # Screen by marginal association, then fit Cox c("surv.weibull", "screen.elasticnet"),# Screen via Elastic Net, then fit Weibull c("surv.rpart", "screen.var") # Drop zero-variance noise, then fit a Tree ) # For the censoring library, we use an unscreened approach cens_library <- c("surv.coxph") ## ----fit-screen, results='hide', message=FALSE, warning=FALSE----------------- fit_highdim <- SuperSurv( time = train$duration, event = train$event, X = X_tr, newdata = X_te, new.times = new.times, event.library = my_screen_library, # Pass our screened library cens.library = cens_library, control = list(saveFitLibrary = TRUE), verbose = T, nFolds = 3 ) ## ----evaluate-screen---------------------------------------------------------- # Check the integrated performance screened_performance <- eval_summary( object = fit_highdim, newdata = X_te, time = test$duration, event = test$event, eval_times = new.times ) # Plot the benchmark # plot_benchmark(fit_highdim, newdata = X_te, time = test$duration, event = test$event, eval_times = new.times) ## ----extract-features--------------------------------------------------------- # The event.whichScreen object is a logical matrix. # Rows correspond to the predictors, and columns correspond to the algorithms in our event library. # 1. Get the logical vector for the 2nd model (Row 2) is_selected <- fit_highdim$event.whichScreen[2, ] # 2. Map it to our original column names selected_features <- colnames(X_tr)[is_selected] cat("Total features evaluated:", ncol(X_tr), "\n") cat("Features retained by Elastic Net:", length(selected_features), "\n\n") cat("Selected Features:\n") print(selected_features)