## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ## ----setup-data--------------------------------------------------------------- # library(ukbflow) # # # Build on the derive pipeline from vignette("derive") # df <- ops_toy(n = 500) # df <- derive_missing(df) # df <- derive_covariate(df, as_factor = c("p31", "p20116_i0")) # df <- derive_selfreport(df, name = "dm", regex = "type 2 diabetes") # df <- derive_icd10(df, name = "dm", icd10 = "E11", source = c("hes", "death")) # df <- derive_case(df, name = "dm") ## ----derive-timing------------------------------------------------------------ # # Uses {name}_status and {name}_date by default # df <- derive_timing(df, name = "dm", baseline_col = "p53_i0") ## ----derive-timing-explicit--------------------------------------------------- # df <- derive_timing(df, # name = "dm", # status_col = "dm_status", # date_col = "dm_date", # baseline_col = "p53_i0" # ) ## ----derive-age--------------------------------------------------------------- # # Auto-detects {name}_date and {name}_status; produces age_at_{name} column. # df <- derive_age(df, # name = "dm", # baseline_col = "p53_i0", # age_col = "p21022" # ) ## ----derive-age-explicit------------------------------------------------------ # df <- derive_age(df, # name = "dm", # baseline_col = "p53_i0", # age_col = "p21022", # date_cols = c(dm = "dm_date"), # status_cols = c(dm = "dm_status") # ) ## ----derive-followup---------------------------------------------------------- # df <- derive_followup(df, # name = "dm", # event_col = "dm_date", # baseline_col = "p53_i0", # censor_date = as.Date("2022-10-31"), # set to your study's cut-off date # death_col = "p40000_i0", # lost_col = FALSE # not available in ops_toy # ) ## ----derive-followup-nodeath-------------------------------------------------- # df <- derive_followup(df, # name = "dm", # event_col = "dm_date", # baseline_col = "p53_i0", # censor_date = as.Date("2022-10-31"), # death_col = FALSE, # lost_col = FALSE # ) ## ----cox-example-------------------------------------------------------------- # library(survival) # # # Incident analysis: exclude prevalent cases and those with undetermined timing # df_incident <- df[dm_timing != 1L] # # fit <- coxph( # Surv(dm_followup_years, dm_status) ~ # p20116_i0 + p21022 + p31 + p1558_i0, # data = df_incident # ) # summary(fit)