## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) old_options <- options(digits = 4) ## ----setup-------------------------------------------------------------------- library(pye) ## ----simulation--------------------------------------------------------------- # Load the package library(pye) # 1. Simulate data for the example set.seed(123) # Always good to set a seed for reproducibility in examples cols <- 200 cols_cov <- 20 max_rho <- 0.2 rows_train <- 200 sim_data <- create_sample_with_covariates(rows_train = rows_train, cols = cols, cols_cov = cols_cov, max_rho = max_rho, seed = 1) df <- sim_data$train_df_scaled X <- sim_data$X y <- sim_data$y C <- sim_data$C regressors_betas <- sim_data$nregressors # True betas for evaluation regressors_gammas <- sim_data$ncovariates # True gammas for evaluation # 2. Set cross-validation parameters penalty <- "SCAD" # Penalty for betas in pye estimation penalty_g <- "L12" # Penalty for gammas in covYI estimation trend <- "monotone" # Trend for the KS estimation alpha <- 0.5 c_function_of_covariates <- TRUE # Use covariates for 'c' estimation used_cores <- 1 # For this example, no parallelization max_iter <- 10 # Keep iterations low for a quick example run n_folds <- 3 # 3. Calibrate lambda_max and lambda_min for betas (pye estimation) lambda_seq <- create_lambda(n = 4, lmax = 1.5, lmin = 0.1) lambda_seq <- as.numeric(formatC(lambda_seq, format = "e", digits = 9)) # 4. Calibrate tau_max and tau_min for gammas (covYI estimation), if tau_seq <- create_lambda(n = 4, lmax = 0.15, lmin = 0.05) tau_seq <- as.numeric(formatC(tau_seq, format = "e", digits = 9)) # 5. Run the cross-validation pye_cv_result <- pye_KS_compute_cv( n_folds = n_folds, df = df, X = X, y = y, C = C, lambda = lambda_seq, tau = tau_seq, trace = 1, # Show final results alpha = alpha, penalty = penalty, regressors_betas = regressors_betas, regressors_gammas = regressors_gammas, seed = 1, used_cores = used_cores, trend = trend, max_iter = max_iter, c_function_of_covariates = c_function_of_covariates, measure_to_select_lambda = "ccr", penalty_g = penalty_g, trend_g = trend, max_iter_g = max_iter ) # 6. Print results and access optimal lambda/tau cat("\nOptimal Lambda (based on CCR):", pye_cv_result$lambda_hat_ccr, "\n") if (c_function_of_covariates == TRUE) { cat("Optimal Tau (based on CCR):", pye_cv_result$tau_hat_ccr, "\n") } # You can access other results like: pye_cv_result$auc # AUC values pye_cv_result$n_betas # Number of non-zero betas for each lambda pye_cv_result$n_gammas # Number of non-zero gammas for each tau # 7. Compute the performance over 50 simulations with the optimal lambda and tau sim_result <- pye_KS_simulation_study( n = 5, df = df, X = X, y = y, C = C, lambda = pye_cv_result$lambda_hat_ccr, tau = pye_cv_result$tau_hat_ccr, trace = 1, penalty = penalty, # Options: "L12", "L1", "EN", "SCAD", "MCP" penalty_g = penalty_g, # Options: "L12", "L1", "EN", "SCAD", "MCP" used_cores = 1, c_function_of_covariates = c_function_of_covariates, max_iter = max_iter, # Reduced for a quick example max_iter_g = 20 # Reduced for a quick example ) # 8. View summary of simulation results colMeans(sim_result$corrclass) colMeans(sim_result$corrclass_covYI) ## ----include = FALSE---------------------------------------------------------- # Restore the user's original options at the end of the vignette options(old_options)