## ----setup, include = FALSE--------------------------------------------------- ## Chunks that call DICEr() or load torch are skipped on CRAN ## (they require the torch system library and ~160 MB of weights). ## Set NOT_CRAN=true locally, or use devtools::check() which sets it automatically. knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = identical(Sys.getenv("NOT_CRAN"), "true") ) ## ----install, eval = FALSE---------------------------------------------------- # ## From a local source tarball: # install.packages( # "/path/to/DICErClust_0.1.1.tar.gz", # repos = NULL, type = "source" # ) ## ----data-format-------------------------------------------------------------- # ## Build a minimal synthetic dataset ---------------------------------------- # set.seed(42) # n_train <- 120L; n_test <- 40L; p <- 6L; q <- 3L # # make_rds <- function(n, path) { # saveRDS( # list( # matrix(runif(n * p), n, p), # data_x: continuous # matrix(as.numeric(rbinom(n * q, 1, 0.5)), n, q), # data_v: binary float # rbinom(n, 1, 0.3) # data_y: outcome # ), # path # ) # } # # data_dir <- file.path(tempdir(), "dice_intro") # dir.create(data_dir, showWarnings = FALSE) # make_rds(n_train, file.path(data_dir, "train.rds")) # make_rds(n_test, file.path(data_dir, "test.rds")) # # ## Verify format # d <- readRDS(file.path(data_dir, "train.rds")) # cat("data_x:", nrow(d[[1]]), "×", ncol(d[[1]]), " storage:", storage.mode(d[[1]]), "\n") # cat("data_v:", nrow(d[[2]]), "×", ncol(d[[2]]), " storage:", storage.mode(d[[2]]), "\n") # cat("data_y: length", length(d[[3]]), " table:", paste(table(d[[3]]), collapse = "/"), "\n") ## ----train, eval = FALSE------------------------------------------------------ # library(DICErClust) # # args <- list( # seed = 42L, # input_path = data_dir, # filename_train = "train.rds", # filename_test = "test.rds", # n_input_fea = p, # columns in data_x # n_hidden_fea = 3L, # LSTM latent dimension # lstm_layer = 1L, # lstm_dropout = 0.0, # K_clusters = 2L, # number of clusters # n_dummy_demov_fea = q, # columns in data_v # cuda = FALSE, # set TRUE to use GPU # lr = 1e-4, # init_AE_epoch = 5L, # Stage 1 warm-up epochs # iter = 20L, # Stage 2 iterations # epoch_in_iter = 2L, # lambda_AE = 1.0, # lambda_classifier = 1.0, # lambda_outcome = 1.0, # lambda_p_value = 1.0 # ) # # old_wd <- setwd(tempdir()) # DICEr(args) # writes output to hn_3_K_2/part2_AE_nhidden_3/ # setwd(old_wd) ## ----load-results, eval = FALSE----------------------------------------------- # part2_dir <- file.path(tempdir(), "hn_3_K_2", "part2_AE_nhidden_3") # # res_train <- readRDS(file.path(part2_dir, "data_train_iter.rds")) # res_test <- readRDS(file.path(part2_dir, "data_test_iter.rds")) # # ## Cluster assignments # ## Training set: use res_train$C (k-means labels, re-ordered by outcome rate) # ## Test set: use res_test$pred_C (nearest-centroid assignments) # table(res_test$pred_C) ## ----vignette-link, eval = FALSE---------------------------------------------- # vignette("heart-failure-example", package = "DICErClust")